1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66 #include <mach/vm_types.h>
67 #include <mach_assert.h>
68
69 #include <vm/vm_options.h>
70
71 #include <libkern/OSAtomic.h>
72
73 #include <mach/kern_return.h>
74 #include <mach/port.h>
75 #include <mach/vm_attributes.h>
76 #include <mach/vm_param.h>
77 #include <mach/vm_behavior.h>
78 #include <mach/vm_statistics.h>
79 #include <mach/memory_object.h>
80 #include <mach/mach_vm.h>
81 #include <machine/cpu_capabilities.h>
82 #include <mach/sdt.h>
83
84 #include <kern/assert.h>
85 #include <kern/backtrace.h>
86 #include <kern/counter.h>
87 #include <kern/exc_guard.h>
88 #include <kern/kalloc.h>
89 #include <kern/zalloc_internal.h>
90
91 #include <vm/cpm.h>
92 #include <vm/vm_compressor.h>
93 #include <vm/vm_compressor_pager.h>
94 #include <vm/vm_init.h>
95 #include <vm/vm_fault.h>
96 #include <vm/vm_map_internal.h>
97 #include <vm/vm_object.h>
98 #include <vm/vm_page.h>
99 #include <vm/vm_pageout.h>
100 #include <vm/pmap.h>
101 #include <vm/vm_kern.h>
102 #include <ipc/ipc_port.h>
103 #include <kern/sched_prim.h>
104 #include <kern/misc_protos.h>
105
106 #include <mach/vm_map_server.h>
107 #include <mach/mach_host_server.h>
108 #include <vm/vm_protos.h>
109 #include <vm/vm_purgeable_internal.h>
110 #include <vm/vm_reclaim_internal.h>
111
112 #include <vm/vm_protos.h>
113 #include <vm/vm_shared_region.h>
114 #include <vm/vm_map_store.h>
115
116 #include <san/kasan.h>
117
118 #include <sys/resource.h>
119 #include <sys/codesign.h>
120 #include <sys/code_signing.h>
121 #include <sys/mman.h>
122 #include <sys/reboot.h>
123 #include <sys/kdebug_triage.h>
124
125 #include <libkern/section_keywords.h>
126
127 #if DEVELOPMENT || DEBUG
128 extern int proc_selfcsflags(void);
129 int panic_on_unsigned_execute = 0;
130 int panic_on_mlock_failure = 0;
131 #endif /* DEVELOPMENT || DEBUG */
132
133 #if MACH_ASSERT
134 int debug4k_filter = 0;
135 char debug4k_proc_name[1024] = "";
136 int debug4k_proc_filter = (int)-1 & ~(1 << __DEBUG4K_FAULT);
137 int debug4k_panic_on_misaligned_sharing = 0;
138 const char *debug4k_category_name[] = {
139 "error", /* 0 */
140 "life", /* 1 */
141 "load", /* 2 */
142 "fault", /* 3 */
143 "copy", /* 4 */
144 "share", /* 5 */
145 "adjust", /* 6 */
146 "pmap", /* 7 */
147 "mementry", /* 8 */
148 "iokit", /* 9 */
149 "upl", /* 10 */
150 "exc", /* 11 */
151 "vfs" /* 12 */
152 };
153 #endif /* MACH_ASSERT */
154 int debug4k_no_cow_copyin = 0;
155
156
157 #if __arm64__
158 extern const int fourk_binary_compatibility_unsafe;
159 extern const int fourk_binary_compatibility_allow_wx;
160 #endif /* __arm64__ */
161 extern int proc_selfpid(void);
162 extern char *proc_name_address(void *p);
163
164 #if VM_MAP_DEBUG_APPLE_PROTECT
165 int vm_map_debug_apple_protect = 0;
166 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
167 #if VM_MAP_DEBUG_FOURK
168 int vm_map_debug_fourk = 0;
169 #endif /* VM_MAP_DEBUG_FOURK */
170
171 #if DEBUG || DEVELOPMENT
172 static TUNABLE(bool, vm_map_executable_immutable,
173 "vm_map_executable_immutable", true);
174 #else
175 #define vm_map_executable_immutable true
176 #endif
177
178 #if CONFIG_MAP_RANGES
179 static TUNABLE(bool, vm_map_user_ranges, "vm_map_user_ranges", true);
180 static SECURITY_READ_ONLY_LATE(uint8_t) vm_map_range_id_map[VM_MEMORY_COUNT];
181 #endif
182
183 os_refgrp_decl(static, map_refgrp, "vm_map", NULL);
184
185 extern u_int32_t random(void); /* from <libkern/libkern.h> */
186 /* Internal prototypes
187 */
188
189 typedef struct vm_map_zap {
190 vm_map_entry_t vmz_head;
191 vm_map_entry_t *vmz_tail;
192 } *vm_map_zap_t;
193
194 #define VM_MAP_ZAP_DECLARE(zap) \
195 struct vm_map_zap zap = { .vmz_tail = &zap.vmz_head }
196
197 static vm_map_entry_t vm_map_entry_insert(
198 vm_map_t map,
199 vm_map_entry_t insp_entry,
200 vm_map_offset_t start,
201 vm_map_offset_t end,
202 vm_object_t object,
203 vm_object_offset_t offset,
204 vm_map_kernel_flags_t vmk_flags,
205 boolean_t needs_copy,
206 vm_prot_t cur_protection,
207 vm_prot_t max_protection,
208 vm_inherit_t inheritance,
209 boolean_t no_cache,
210 boolean_t permanent,
211 unsigned int superpage_size,
212 boolean_t clear_map_aligned,
213 int alias);
214
215 static void vm_map_simplify_range(
216 vm_map_t map,
217 vm_map_offset_t start,
218 vm_map_offset_t end); /* forward */
219
220 static boolean_t vm_map_range_check(
221 vm_map_t map,
222 vm_map_offset_t start,
223 vm_map_offset_t end,
224 vm_map_entry_t *entry);
225
226 static void vm_map_submap_pmap_clean(
227 vm_map_t map,
228 vm_map_offset_t start,
229 vm_map_offset_t end,
230 vm_map_t sub_map,
231 vm_map_offset_t offset);
232
233 static void vm_map_pmap_enter(
234 vm_map_t map,
235 vm_map_offset_t addr,
236 vm_map_offset_t end_addr,
237 vm_object_t object,
238 vm_object_offset_t offset,
239 vm_prot_t protection);
240
241 static void _vm_map_clip_end(
242 struct vm_map_header *map_header,
243 vm_map_entry_t entry,
244 vm_map_offset_t end);
245
246 static void _vm_map_clip_start(
247 struct vm_map_header *map_header,
248 vm_map_entry_t entry,
249 vm_map_offset_t start);
250
251 static kmem_return_t vm_map_delete(
252 vm_map_t map,
253 vm_map_offset_t start,
254 vm_map_offset_t end,
255 vmr_flags_t flags,
256 kmem_guard_t guard,
257 vm_map_zap_t zap);
258
259 static void vm_map_copy_insert(
260 vm_map_t map,
261 vm_map_entry_t after_where,
262 vm_map_copy_t copy);
263
264 static kern_return_t vm_map_copy_overwrite_unaligned(
265 vm_map_t dst_map,
266 vm_map_entry_t entry,
267 vm_map_copy_t copy,
268 vm_map_address_t start,
269 boolean_t discard_on_success);
270
271 static kern_return_t vm_map_copy_overwrite_aligned(
272 vm_map_t dst_map,
273 vm_map_entry_t tmp_entry,
274 vm_map_copy_t copy,
275 vm_map_offset_t start,
276 pmap_t pmap);
277
278 static kern_return_t vm_map_copyin_kernel_buffer(
279 vm_map_t src_map,
280 vm_map_address_t src_addr,
281 vm_map_size_t len,
282 boolean_t src_destroy,
283 vm_map_copy_t *copy_result); /* OUT */
284
285 static kern_return_t vm_map_copyout_kernel_buffer(
286 vm_map_t map,
287 vm_map_address_t *addr, /* IN/OUT */
288 vm_map_copy_t copy,
289 vm_map_size_t copy_size,
290 boolean_t overwrite,
291 boolean_t consume_on_success);
292
293 static void vm_map_fork_share(
294 vm_map_t old_map,
295 vm_map_entry_t old_entry,
296 vm_map_t new_map);
297
298 static boolean_t vm_map_fork_copy(
299 vm_map_t old_map,
300 vm_map_entry_t *old_entry_p,
301 vm_map_t new_map,
302 int vm_map_copyin_flags);
303
304 static kern_return_t vm_map_wire_nested(
305 vm_map_t map,
306 vm_map_offset_t start,
307 vm_map_offset_t end,
308 vm_prot_t caller_prot,
309 vm_tag_t tag,
310 boolean_t user_wire,
311 pmap_t map_pmap,
312 vm_map_offset_t pmap_addr,
313 ppnum_t *physpage_p);
314
315 static kern_return_t vm_map_unwire_nested(
316 vm_map_t map,
317 vm_map_offset_t start,
318 vm_map_offset_t end,
319 boolean_t user_wire,
320 pmap_t map_pmap,
321 vm_map_offset_t pmap_addr);
322
323 static kern_return_t vm_map_overwrite_submap_recurse(
324 vm_map_t dst_map,
325 vm_map_offset_t dst_addr,
326 vm_map_size_t dst_size);
327
328 static kern_return_t vm_map_copy_overwrite_nested(
329 vm_map_t dst_map,
330 vm_map_offset_t dst_addr,
331 vm_map_copy_t copy,
332 boolean_t interruptible,
333 pmap_t pmap,
334 boolean_t discard_on_success);
335
336 static kern_return_t vm_map_remap_extract(
337 vm_map_t map,
338 vm_map_offset_t addr,
339 vm_map_size_t size,
340 boolean_t copy,
341 struct vm_map_header *map_header,
342 vm_prot_t *cur_protection,
343 vm_prot_t *max_protection,
344 vm_inherit_t inheritance,
345 vm_map_kernel_flags_t vmk_flags);
346
347 static kern_return_t vm_map_remap_range_allocate(
348 vm_map_t map,
349 vm_map_address_t *address,
350 vm_map_size_t size,
351 vm_map_offset_t mask,
352 int flags,
353 vm_map_kernel_flags_t vmk_flags,
354 vm_tag_t tag,
355 vm_map_entry_t *map_entry,
356 vm_map_zap_t zap_list);
357
358 static void vm_map_region_look_for_page(
359 vm_map_t map,
360 vm_map_offset_t va,
361 vm_object_t object,
362 vm_object_offset_t offset,
363 int max_refcnt,
364 unsigned short depth,
365 vm_region_extended_info_t extended,
366 mach_msg_type_number_t count);
367
368 static int vm_map_region_count_obj_refs(
369 vm_map_entry_t entry,
370 vm_object_t object);
371
372
373 static kern_return_t vm_map_willneed(
374 vm_map_t map,
375 vm_map_offset_t start,
376 vm_map_offset_t end);
377
378 static kern_return_t vm_map_reuse_pages(
379 vm_map_t map,
380 vm_map_offset_t start,
381 vm_map_offset_t end);
382
383 static kern_return_t vm_map_reusable_pages(
384 vm_map_t map,
385 vm_map_offset_t start,
386 vm_map_offset_t end);
387
388 static kern_return_t vm_map_can_reuse(
389 vm_map_t map,
390 vm_map_offset_t start,
391 vm_map_offset_t end);
392
393 #if MACH_ASSERT
394 static kern_return_t vm_map_pageout(
395 vm_map_t map,
396 vm_map_offset_t start,
397 vm_map_offset_t end);
398 #endif /* MACH_ASSERT */
399
400 kern_return_t vm_map_corpse_footprint_collect(
401 vm_map_t old_map,
402 vm_map_entry_t old_entry,
403 vm_map_t new_map);
404 void vm_map_corpse_footprint_collect_done(
405 vm_map_t new_map);
406 void vm_map_corpse_footprint_destroy(
407 vm_map_t map);
408 kern_return_t vm_map_corpse_footprint_query_page_info(
409 vm_map_t map,
410 vm_map_offset_t va,
411 int *disposition_p);
412 void vm_map_footprint_query_page_info(
413 vm_map_t map,
414 vm_map_entry_t map_entry,
415 vm_map_offset_t curr_s_offset,
416 int *disposition_p);
417
418 #if CONFIG_MAP_RANGES
419 static void vm_map_range_map_init(void);
420 #endif /* CONFIG_MAP_RANGES */
421
422 pid_t find_largest_process_vm_map_entries(void);
423
424 extern int exit_with_guard_exception(void *p, mach_exception_data_type_t code,
425 mach_exception_data_type_t subcode);
426
427 /*
428 * Macros to copy a vm_map_entry. We must be careful to correctly
429 * manage the wired page count. vm_map_entry_copy() creates a new
430 * map entry to the same memory - the wired count in the new entry
431 * must be set to zero. vm_map_entry_copy_full() creates a new
432 * entry that is identical to the old entry. This preserves the
433 * wire count; it's used for map splitting and zone changing in
434 * vm_map_copyout.
435 */
436
437 static inline void
vm_map_entry_copy_pmap_cs_assoc(vm_map_t map __unused,vm_map_entry_t new __unused,vm_map_entry_t old __unused)438 vm_map_entry_copy_pmap_cs_assoc(
439 vm_map_t map __unused,
440 vm_map_entry_t new __unused,
441 vm_map_entry_t old __unused)
442 {
443 /* when pmap_cs is not enabled, assert as a sanity check */
444 assert(new->pmap_cs_associated == FALSE);
445 }
446
447 /*
448 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
449 * But for security reasons on some platforms, we don't want the
450 * new mapping to be "used for jit", so we reset the flag here.
451 */
452 static inline void
vm_map_entry_copy_code_signing(vm_map_t map,vm_map_entry_t new,vm_map_entry_t old __unused)453 vm_map_entry_copy_code_signing(
454 vm_map_t map,
455 vm_map_entry_t new,
456 vm_map_entry_t old __unused)
457 {
458 if (VM_MAP_POLICY_ALLOW_JIT_COPY(map)) {
459 assert(new->used_for_jit == old->used_for_jit);
460 } else {
461 new->used_for_jit = FALSE;
462 }
463 }
464
465 static inline void
vm_map_entry_copy_full(vm_map_entry_t new,vm_map_entry_t old)466 vm_map_entry_copy_full(
467 vm_map_entry_t new,
468 vm_map_entry_t old)
469 {
470 #if MAP_ENTRY_CREATION_DEBUG
471 btref_put(new->vme_creation_bt);
472 btref_retain(old->vme_creation_bt);
473 #endif
474 #if MAP_ENTRY_INSERTION_DEBUG
475 btref_put(new->vme_insertion_bt);
476 btref_retain(old->vme_insertion_bt);
477 #endif
478 *new = *old;
479 }
480
481 static inline void
vm_map_entry_copy(vm_map_t map,vm_map_entry_t new,vm_map_entry_t old)482 vm_map_entry_copy(
483 vm_map_t map,
484 vm_map_entry_t new,
485 vm_map_entry_t old)
486 {
487 vm_map_entry_copy_full(new, old);
488
489 new->is_shared = FALSE;
490 new->needs_wakeup = FALSE;
491 new->in_transition = FALSE;
492 new->wired_count = 0;
493 new->user_wired_count = 0;
494 new->vme_permanent = FALSE;
495 vm_map_entry_copy_code_signing(map, new, old);
496 vm_map_entry_copy_pmap_cs_assoc(map, new, old);
497 if (new->iokit_acct) {
498 assertf(!new->use_pmap, "old %p new %p\n", old, new);
499 new->iokit_acct = FALSE;
500 new->use_pmap = TRUE;
501 }
502 new->vme_resilient_codesign = FALSE;
503 new->vme_resilient_media = FALSE;
504 new->vme_atomic = FALSE;
505 new->vme_no_copy_on_read = FALSE;
506 }
507
508 /*
509 * Normal lock_read_to_write() returns FALSE/0 on failure.
510 * These functions evaluate to zero on success and non-zero value on failure.
511 */
512 __attribute__((always_inline))
513 int
vm_map_lock_read_to_write(vm_map_t map)514 vm_map_lock_read_to_write(vm_map_t map)
515 {
516 if (lck_rw_lock_shared_to_exclusive(&(map)->lock)) {
517 DTRACE_VM(vm_map_lock_upgrade);
518 return 0;
519 }
520 return 1;
521 }
522
523 __attribute__((always_inline))
524 boolean_t
vm_map_try_lock(vm_map_t map)525 vm_map_try_lock(vm_map_t map)
526 {
527 if (lck_rw_try_lock_exclusive(&(map)->lock)) {
528 DTRACE_VM(vm_map_lock_w);
529 return TRUE;
530 }
531 return FALSE;
532 }
533
534 __attribute__((always_inline))
535 boolean_t
vm_map_try_lock_read(vm_map_t map)536 vm_map_try_lock_read(vm_map_t map)
537 {
538 if (lck_rw_try_lock_shared(&(map)->lock)) {
539 DTRACE_VM(vm_map_lock_r);
540 return TRUE;
541 }
542 return FALSE;
543 }
544
545 /*!
546 * @function kdp_vm_map_is_acquired_exclusive
547 *
548 * @abstract
549 * Checks if vm map is acquired exclusive.
550 *
551 * @discussion
552 * NOT SAFE: To be used only by kernel debugger.
553 *
554 * @param map map to check
555 *
556 * @returns TRUE if the map is acquired exclusively.
557 */
558 boolean_t
kdp_vm_map_is_acquired_exclusive(vm_map_t map)559 kdp_vm_map_is_acquired_exclusive(vm_map_t map)
560 {
561 return kdp_lck_rw_lock_is_acquired_exclusive(&map->lock);
562 }
563
564 /*
565 * Routines to get the page size the caller should
566 * use while inspecting the target address space.
567 * Use the "_safely" variant if the caller is dealing with a user-provided
568 * array whose size depends on the page size, to avoid any overflow or
569 * underflow of a user-allocated buffer.
570 */
571 int
vm_self_region_page_shift_safely(vm_map_t target_map)572 vm_self_region_page_shift_safely(
573 vm_map_t target_map)
574 {
575 int effective_page_shift = 0;
576
577 if (PAGE_SIZE == (4096)) {
578 /* x86_64 and 4k watches: always use 4k */
579 return PAGE_SHIFT;
580 }
581 /* did caller provide an explicit page size for this thread to use? */
582 effective_page_shift = thread_self_region_page_shift();
583 if (effective_page_shift) {
584 /* use the explicitly-provided page size */
585 return effective_page_shift;
586 }
587 /* no explicit page size: use the caller's page size... */
588 effective_page_shift = VM_MAP_PAGE_SHIFT(current_map());
589 if (effective_page_shift == VM_MAP_PAGE_SHIFT(target_map)) {
590 /* page size match: safe to use */
591 return effective_page_shift;
592 }
593 /* page size mismatch */
594 return -1;
595 }
596 int
vm_self_region_page_shift(vm_map_t target_map)597 vm_self_region_page_shift(
598 vm_map_t target_map)
599 {
600 int effective_page_shift;
601
602 effective_page_shift = vm_self_region_page_shift_safely(target_map);
603 if (effective_page_shift == -1) {
604 /* no safe value but OK to guess for caller */
605 effective_page_shift = MIN(VM_MAP_PAGE_SHIFT(current_map()),
606 VM_MAP_PAGE_SHIFT(target_map));
607 }
608 return effective_page_shift;
609 }
610
611
612 /*
613 * Decide if we want to allow processes to execute from their data or stack areas.
614 * override_nx() returns true if we do. Data/stack execution can be enabled independently
615 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
616 * or allow_stack_exec to enable data execution for that type of data area for that particular
617 * ABI (or both by or'ing the flags together). These are initialized in the architecture
618 * specific pmap files since the default behavior varies according to architecture. The
619 * main reason it varies is because of the need to provide binary compatibility with old
620 * applications that were written before these restrictions came into being. In the old
621 * days, an app could execute anything it could read, but this has slowly been tightened
622 * up over time. The default behavior is:
623 *
624 * 32-bit PPC apps may execute from both stack and data areas
625 * 32-bit Intel apps may exeucte from data areas but not stack
626 * 64-bit PPC/Intel apps may not execute from either data or stack
627 *
628 * An application on any architecture may override these defaults by explicitly
629 * adding PROT_EXEC permission to the page in question with the mprotect(2)
630 * system call. This code here just determines what happens when an app tries to
631 * execute from a page that lacks execute permission.
632 *
633 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
634 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
635 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
636 * execution from data areas for a particular binary even if the arch normally permits it. As
637 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
638 * to support some complicated use cases, notably browsers with out-of-process plugins that
639 * are not all NX-safe.
640 */
641
642 extern int allow_data_exec, allow_stack_exec;
643
644 int
override_nx(vm_map_t map,uint32_t user_tag)645 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
646 {
647 int current_abi;
648
649 if (map->pmap == kernel_pmap) {
650 return FALSE;
651 }
652
653 /*
654 * Determine if the app is running in 32 or 64 bit mode.
655 */
656
657 if (vm_map_is_64bit(map)) {
658 current_abi = VM_ABI_64;
659 } else {
660 current_abi = VM_ABI_32;
661 }
662
663 /*
664 * Determine if we should allow the execution based on whether it's a
665 * stack or data area and the current architecture.
666 */
667
668 if (user_tag == VM_MEMORY_STACK) {
669 return allow_stack_exec & current_abi;
670 }
671
672 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
673 }
674
675
676 /*
677 * Virtual memory maps provide for the mapping, protection,
678 * and sharing of virtual memory objects. In addition,
679 * this module provides for an efficient virtual copy of
680 * memory from one map to another.
681 *
682 * Synchronization is required prior to most operations.
683 *
684 * Maps consist of an ordered doubly-linked list of simple
685 * entries; a single hint is used to speed up lookups.
686 *
687 * Sharing maps have been deleted from this version of Mach.
688 * All shared objects are now mapped directly into the respective
689 * maps. This requires a change in the copy on write strategy;
690 * the asymmetric (delayed) strategy is used for shared temporary
691 * objects instead of the symmetric (shadow) strategy. All maps
692 * are now "top level" maps (either task map, kernel map or submap
693 * of the kernel map).
694 *
695 * Since portions of maps are specified by start/end addreses,
696 * which may not align with existing map entries, all
697 * routines merely "clip" entries to these start/end values.
698 * [That is, an entry is split into two, bordering at a
699 * start or end value.] Note that these clippings may not
700 * always be necessary (as the two resulting entries are then
701 * not changed); however, the clipping is done for convenience.
702 * No attempt is currently made to "glue back together" two
703 * abutting entries.
704 *
705 * The symmetric (shadow) copy strategy implements virtual copy
706 * by copying VM object references from one map to
707 * another, and then marking both regions as copy-on-write.
708 * It is important to note that only one writeable reference
709 * to a VM object region exists in any map when this strategy
710 * is used -- this means that shadow object creation can be
711 * delayed until a write operation occurs. The symmetric (delayed)
712 * strategy allows multiple maps to have writeable references to
713 * the same region of a vm object, and hence cannot delay creating
714 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
715 * Copying of permanent objects is completely different; see
716 * vm_object_copy_strategically() in vm_object.c.
717 */
718
719 ZONE_DECLARE_ID(ZONE_ID_VM_MAP_COPY, struct vm_map_copy);
720
721 #define VM_MAP_ZONE_NAME "maps"
722 #define VM_MAP_ZFLAGS ( \
723 ZC_NOENCRYPT | \
724 ZC_VM_LP64)
725
726 #define VM_MAP_ENTRY_ZONE_NAME "VM map entries"
727 #define VM_MAP_ENTRY_ZFLAGS ( \
728 ZC_NOENCRYPT | \
729 ZC_CACHING | \
730 ZC_KASAN_NOQUARANTINE | \
731 ZC_VM_LP64)
732
733 #define VM_MAP_HOLES_ZONE_NAME "VM map holes"
734 #define VM_MAP_HOLES_ZFLAGS ( \
735 ZC_NOENCRYPT | \
736 ZC_CACHING | \
737 ZC_KASAN_NOQUARANTINE | \
738 ZC_VM_LP64)
739
740 /*
741 * Asserts that a vm_map_copy object is coming from the
742 * vm_map_copy_zone to ensure that it isn't a fake constructed
743 * anywhere else.
744 */
745 void
vm_map_copy_require(struct vm_map_copy * copy)746 vm_map_copy_require(struct vm_map_copy *copy)
747 {
748 zone_id_require(ZONE_ID_VM_MAP_COPY, sizeof(struct vm_map_copy), copy);
749 }
750
751 /*
752 * vm_map_require:
753 *
754 * Ensures that the argument is memory allocated from the genuine
755 * vm map zone. (See zone_id_require_allow_foreign).
756 */
757 void
vm_map_require(vm_map_t map)758 vm_map_require(vm_map_t map)
759 {
760 zone_id_require(ZONE_ID_VM_MAP, sizeof(struct _vm_map), map);
761 }
762
763 #define VM_MAP_EARLY_COUNT_MAX 16
764 static __startup_data vm_offset_t map_data;
765 static __startup_data vm_size_t map_data_size;
766 static __startup_data vm_offset_t kentry_data;
767 static __startup_data vm_size_t kentry_data_size;
768 static __startup_data vm_offset_t map_holes_data;
769 static __startup_data vm_size_t map_holes_data_size;
770 static __startup_data vm_map_t *early_map_owners[VM_MAP_EARLY_COUNT_MAX];
771 static __startup_data uint32_t early_map_count;
772
773 #if XNU_TARGET_OS_OSX
774 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
775 #else /* XNU_TARGET_OS_OSX */
776 #define NO_COALESCE_LIMIT 0
777 #endif /* XNU_TARGET_OS_OSX */
778
779 /* Skip acquiring locks if we're in the midst of a kernel core dump */
780 unsigned int not_in_kdp = 1;
781
782 unsigned int vm_map_set_cache_attr_count = 0;
783
784 kern_return_t
vm_map_set_cache_attr(vm_map_t map,vm_map_offset_t va)785 vm_map_set_cache_attr(
786 vm_map_t map,
787 vm_map_offset_t va)
788 {
789 vm_map_entry_t map_entry;
790 vm_object_t object;
791 kern_return_t kr = KERN_SUCCESS;
792
793 vm_map_lock_read(map);
794
795 if (!vm_map_lookup_entry(map, va, &map_entry) ||
796 map_entry->is_sub_map) {
797 /*
798 * that memory is not properly mapped
799 */
800 kr = KERN_INVALID_ARGUMENT;
801 goto done;
802 }
803 object = VME_OBJECT(map_entry);
804
805 if (object == VM_OBJECT_NULL) {
806 /*
807 * there should be a VM object here at this point
808 */
809 kr = KERN_INVALID_ARGUMENT;
810 goto done;
811 }
812 vm_object_lock(object);
813 object->set_cache_attr = TRUE;
814 vm_object_unlock(object);
815
816 vm_map_set_cache_attr_count++;
817 done:
818 vm_map_unlock_read(map);
819
820 return kr;
821 }
822
823
824 #if CONFIG_CODE_DECRYPTION
825 /*
826 * vm_map_apple_protected:
827 * This remaps the requested part of the object with an object backed by
828 * the decrypting pager.
829 * crypt_info contains entry points and session data for the crypt module.
830 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
831 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
832 */
833 kern_return_t
vm_map_apple_protected(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_object_offset_t crypto_backing_offset,struct pager_crypt_info * crypt_info,uint32_t cryptid)834 vm_map_apple_protected(
835 vm_map_t map,
836 vm_map_offset_t start,
837 vm_map_offset_t end,
838 vm_object_offset_t crypto_backing_offset,
839 struct pager_crypt_info *crypt_info,
840 uint32_t cryptid)
841 {
842 boolean_t map_locked;
843 kern_return_t kr;
844 vm_map_entry_t map_entry;
845 struct vm_map_entry tmp_entry;
846 memory_object_t unprotected_mem_obj;
847 vm_object_t protected_object;
848 vm_map_offset_t map_addr;
849 vm_map_offset_t start_aligned, end_aligned;
850 vm_object_offset_t crypto_start, crypto_end;
851 int vm_flags;
852 vm_map_kernel_flags_t vmk_flags;
853 boolean_t cache_pager;
854
855 vm_flags = 0;
856 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
857
858 map_locked = FALSE;
859 unprotected_mem_obj = MEMORY_OBJECT_NULL;
860
861 start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
862 end_aligned = vm_map_round_page(end, PAGE_MASK_64);
863 start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
864 end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
865
866 #if __arm64__
867 /*
868 * "start" and "end" might be 4K-aligned but not 16K-aligned,
869 * so we might have to loop and establish up to 3 mappings:
870 *
871 * + the first 16K-page, which might overlap with the previous
872 * 4K-aligned mapping,
873 * + the center,
874 * + the last 16K-page, which might overlap with the next
875 * 4K-aligned mapping.
876 * Each of these mapping might be backed by a vnode pager (if
877 * properly page-aligned) or a "fourk_pager", itself backed by a
878 * vnode pager (if 4K-aligned but not page-aligned).
879 */
880 #endif /* __arm64__ */
881
882 map_addr = start_aligned;
883 for (map_addr = start_aligned;
884 map_addr < end;
885 map_addr = tmp_entry.vme_end) {
886 vm_map_lock(map);
887 map_locked = TRUE;
888
889 /* lookup the protected VM object */
890 if (!vm_map_lookup_entry(map,
891 map_addr,
892 &map_entry) ||
893 map_entry->is_sub_map ||
894 VME_OBJECT(map_entry) == VM_OBJECT_NULL) {
895 /* that memory is not properly mapped */
896 kr = KERN_INVALID_ARGUMENT;
897 goto done;
898 }
899
900 /* ensure mapped memory is mapped as executable except
901 * except for model decryption flow */
902 if ((cryptid != CRYPTID_MODEL_ENCRYPTION) &&
903 !(map_entry->protection & VM_PROT_EXECUTE)) {
904 kr = KERN_INVALID_ARGUMENT;
905 goto done;
906 }
907
908 /* get the protected object to be decrypted */
909 protected_object = VME_OBJECT(map_entry);
910 if (protected_object == VM_OBJECT_NULL) {
911 /* there should be a VM object here at this point */
912 kr = KERN_INVALID_ARGUMENT;
913 goto done;
914 }
915 /* ensure protected object stays alive while map is unlocked */
916 vm_object_reference(protected_object);
917
918 /* limit the map entry to the area we want to cover */
919 vm_map_clip_start(map, map_entry, start_aligned);
920 vm_map_clip_end(map, map_entry, end_aligned);
921
922 tmp_entry = *map_entry;
923 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
924 vm_map_unlock(map);
925 map_locked = FALSE;
926
927 /*
928 * This map entry might be only partially encrypted
929 * (if not fully "page-aligned").
930 */
931 crypto_start = 0;
932 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
933 if (tmp_entry.vme_start < start) {
934 if (tmp_entry.vme_start != start_aligned) {
935 kr = KERN_INVALID_ADDRESS;
936 }
937 crypto_start += (start - tmp_entry.vme_start);
938 }
939 if (tmp_entry.vme_end > end) {
940 if (tmp_entry.vme_end != end_aligned) {
941 kr = KERN_INVALID_ADDRESS;
942 }
943 crypto_end -= (tmp_entry.vme_end - end);
944 }
945
946 /*
947 * This "extra backing offset" is needed to get the decryption
948 * routine to use the right key. It adjusts for the possibly
949 * relative offset of an interposed "4K" pager...
950 */
951 if (crypto_backing_offset == (vm_object_offset_t) -1) {
952 crypto_backing_offset = VME_OFFSET(&tmp_entry);
953 }
954
955 cache_pager = TRUE;
956 #if XNU_TARGET_OS_OSX
957 if (vm_map_is_alien(map)) {
958 cache_pager = FALSE;
959 }
960 #endif /* XNU_TARGET_OS_OSX */
961
962 /*
963 * Lookup (and create if necessary) the protected memory object
964 * matching that VM object.
965 * If successful, this also grabs a reference on the memory object,
966 * to guarantee that it doesn't go away before we get a chance to map
967 * it.
968 */
969 unprotected_mem_obj = apple_protect_pager_setup(
970 protected_object,
971 VME_OFFSET(&tmp_entry),
972 crypto_backing_offset,
973 crypt_info,
974 crypto_start,
975 crypto_end,
976 cache_pager);
977
978 /* release extra ref on protected object */
979 vm_object_deallocate(protected_object);
980
981 if (unprotected_mem_obj == NULL) {
982 kr = KERN_FAILURE;
983 goto done;
984 }
985
986 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
987 /* can overwrite an immutable mapping */
988 vmk_flags.vmkf_overwrite_immutable = TRUE;
989 #if __arm64__
990 if (tmp_entry.used_for_jit &&
991 (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
992 PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
993 fourk_binary_compatibility_unsafe &&
994 fourk_binary_compatibility_allow_wx) {
995 printf("** FOURK_COMPAT [%d]: "
996 "allowing write+execute at 0x%llx\n",
997 proc_selfpid(), tmp_entry.vme_start);
998 vmk_flags.vmkf_map_jit = TRUE;
999 }
1000 #endif /* __arm64__ */
1001
1002 /* map this memory object in place of the current one */
1003 map_addr = tmp_entry.vme_start;
1004 kr = vm_map_enter_mem_object(map,
1005 &map_addr,
1006 (tmp_entry.vme_end -
1007 tmp_entry.vme_start),
1008 (mach_vm_offset_t) 0,
1009 vm_flags,
1010 vmk_flags,
1011 VM_KERN_MEMORY_NONE,
1012 (ipc_port_t)(uintptr_t) unprotected_mem_obj,
1013 0,
1014 TRUE,
1015 tmp_entry.protection,
1016 tmp_entry.max_protection,
1017 tmp_entry.inheritance);
1018 assertf(kr == KERN_SUCCESS,
1019 "kr = 0x%x\n", kr);
1020 assertf(map_addr == tmp_entry.vme_start,
1021 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
1022 (uint64_t)map_addr,
1023 (uint64_t) tmp_entry.vme_start,
1024 &tmp_entry);
1025
1026 #if VM_MAP_DEBUG_APPLE_PROTECT
1027 if (vm_map_debug_apple_protect) {
1028 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
1029 " backing:[object:%p,offset:0x%llx,"
1030 "crypto_backing_offset:0x%llx,"
1031 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
1032 map,
1033 (uint64_t) map_addr,
1034 (uint64_t) (map_addr + (tmp_entry.vme_end -
1035 tmp_entry.vme_start)),
1036 unprotected_mem_obj,
1037 protected_object,
1038 VME_OFFSET(&tmp_entry),
1039 crypto_backing_offset,
1040 crypto_start,
1041 crypto_end);
1042 }
1043 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1044
1045 /*
1046 * Release the reference obtained by
1047 * apple_protect_pager_setup().
1048 * The mapping (if it succeeded) is now holding a reference on
1049 * the memory object.
1050 */
1051 memory_object_deallocate(unprotected_mem_obj);
1052 unprotected_mem_obj = MEMORY_OBJECT_NULL;
1053
1054 /* continue with next map entry */
1055 crypto_backing_offset += (tmp_entry.vme_end -
1056 tmp_entry.vme_start);
1057 crypto_backing_offset -= crypto_start;
1058 }
1059 kr = KERN_SUCCESS;
1060
1061 done:
1062 if (map_locked) {
1063 vm_map_unlock(map);
1064 }
1065 return kr;
1066 }
1067 #endif /* CONFIG_CODE_DECRYPTION */
1068
1069
1070 LCK_GRP_DECLARE(vm_map_lck_grp, "vm_map");
1071 LCK_ATTR_DECLARE(vm_map_lck_attr, 0, 0);
1072 LCK_ATTR_DECLARE(vm_map_lck_rw_attr, 0, LCK_ATTR_DEBUG);
1073
1074 #if XNU_TARGET_OS_OSX
1075 int malloc_no_cow = 0;
1076 #else /* XNU_TARGET_OS_OSX */
1077 int malloc_no_cow = 1;
1078 #endif /* XNU_TARGET_OS_OSX */
1079 uint64_t vm_memory_malloc_no_cow_mask = 0ULL;
1080 #if DEBUG
1081 int vm_check_map_sanity = 0;
1082 #endif
1083
1084 /*
1085 * vm_map_init:
1086 *
1087 * Initialize the vm_map module. Must be called before
1088 * any other vm_map routines.
1089 *
1090 * Map and entry structures are allocated from zones -- we must
1091 * initialize those zones.
1092 *
1093 * There are three zones of interest:
1094 *
1095 * vm_map_zone: used to allocate maps.
1096 * vm_map_entry_zone: used to allocate map entries.
1097 *
1098 * LP32:
1099 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
1100 *
1101 * The kernel allocates map entries from a special zone that is initially
1102 * "crammed" with memory. It would be difficult (perhaps impossible) for
1103 * the kernel to allocate more memory to a entry zone when it became
1104 * empty since the very act of allocating memory implies the creation
1105 * of a new entry.
1106 */
1107 __startup_func
1108 void
vm_map_init(void)1109 vm_map_init(void)
1110 {
1111
1112 #if MACH_ASSERT
1113 PE_parse_boot_argn("debug4k_filter", &debug4k_filter,
1114 sizeof(debug4k_filter));
1115 #endif /* MACH_ASSERT */
1116
1117 zone_create_ext(VM_MAP_ZONE_NAME, sizeof(struct _vm_map),
1118 VM_MAP_ZFLAGS, ZONE_ID_VM_MAP, NULL);
1119
1120 /*
1121 * Don't quarantine because we always need elements available
1122 * Disallow GC on this zone... to aid the GC.
1123 */
1124 zone_create_ext(VM_MAP_ENTRY_ZONE_NAME,
1125 sizeof(struct vm_map_entry), VM_MAP_ENTRY_ZFLAGS,
1126 ZONE_ID_VM_MAP_ENTRY, ^(zone_t z) {
1127 z->z_elems_rsv = (uint16_t)(32 *
1128 (ml_early_cpu_max_number() + 1));
1129 });
1130
1131 zone_create_ext(VM_MAP_HOLES_ZONE_NAME,
1132 sizeof(struct vm_map_links), VM_MAP_HOLES_ZFLAGS,
1133 ZONE_ID_VM_MAP_HOLES, ^(zone_t z) {
1134 z->z_elems_rsv = (uint16_t)(16 * 1024 / zone_elem_size(z));
1135 });
1136
1137 zone_create_ext("VM map copies", sizeof(struct vm_map_copy),
1138 ZC_NOENCRYPT | ZC_CACHING, ZONE_ID_VM_MAP_COPY, NULL);
1139
1140 /*
1141 * Add the stolen memory to zones, adjust zone size and stolen counts.
1142 */
1143 zone_cram_early(vm_map_zone, map_data, map_data_size);
1144 zone_cram_early(vm_map_entry_zone, kentry_data, kentry_data_size);
1145 zone_cram_early(vm_map_holes_zone, map_holes_data, map_holes_data_size);
1146 printf("VM boostrap: %d maps, %d entries and %d holes available\n",
1147 vm_map_zone->z_elems_free,
1148 vm_map_entry_zone->z_elems_free,
1149 vm_map_holes_zone->z_elems_free);
1150
1151 /*
1152 * Since these are covered by zones, remove them from stolen page accounting.
1153 */
1154 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
1155
1156 #if VM_MAP_DEBUG_APPLE_PROTECT
1157 PE_parse_boot_argn("vm_map_debug_apple_protect",
1158 &vm_map_debug_apple_protect,
1159 sizeof(vm_map_debug_apple_protect));
1160 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1161 #if VM_MAP_DEBUG_APPLE_FOURK
1162 PE_parse_boot_argn("vm_map_debug_fourk",
1163 &vm_map_debug_fourk,
1164 sizeof(vm_map_debug_fourk));
1165 #endif /* VM_MAP_DEBUG_FOURK */
1166
1167 PE_parse_boot_argn("malloc_no_cow",
1168 &malloc_no_cow,
1169 sizeof(malloc_no_cow));
1170 if (malloc_no_cow) {
1171 vm_memory_malloc_no_cow_mask = 0ULL;
1172 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC;
1173 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_SMALL;
1174 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_MEDIUM;
1175 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE;
1176 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
1177 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
1178 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_TINY;
1179 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE;
1180 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED;
1181 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_NANO;
1182 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
1183 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
1184 &vm_memory_malloc_no_cow_mask,
1185 sizeof(vm_memory_malloc_no_cow_mask));
1186 }
1187
1188 #if CONFIG_MAP_RANGES
1189 vm_map_range_map_init();
1190 #endif /* CONFIG_MAP_RANGES */
1191
1192 #if DEBUG
1193 PE_parse_boot_argn("vm_check_map_sanity", &vm_check_map_sanity, sizeof(vm_check_map_sanity));
1194 if (vm_check_map_sanity) {
1195 kprintf("VM sanity checking enabled\n");
1196 } else {
1197 kprintf("VM sanity checking disabled. Set bootarg vm_check_map_sanity=1 to enable\n");
1198 }
1199 #endif /* DEBUG */
1200
1201 #if DEVELOPMENT || DEBUG
1202 PE_parse_boot_argn("panic_on_unsigned_execute",
1203 &panic_on_unsigned_execute,
1204 sizeof(panic_on_unsigned_execute));
1205 PE_parse_boot_argn("panic_on_mlock_failure",
1206 &panic_on_mlock_failure,
1207 sizeof(panic_on_mlock_failure));
1208 #endif /* DEVELOPMENT || DEBUG */
1209 }
1210
1211 __startup_func
1212 static void
vm_map_steal_memory(void)1213 vm_map_steal_memory(void)
1214 {
1215 /*
1216 * We need to reserve enough memory to support boostraping VM maps
1217 * and the zone subsystem.
1218 *
1219 * The VM Maps that need to function before zones can support them
1220 * are the ones registered with vm_map_will_allocate_early_map(),
1221 * which are:
1222 * - the kernel map
1223 * - the various submaps used by zones (pgz, meta, ...)
1224 *
1225 * We also need enough entries and holes to support them
1226 * until zone_metadata_init() is called, which is when
1227 * the zone allocator becomes capable of expanding dynamically.
1228 *
1229 * We need:
1230 * - VM_MAP_EARLY_COUNT_MAX worth of VM Maps.
1231 * - To allow for 3-4 entries per map, but the kernel map
1232 * needs a multiple of VM_MAP_EARLY_COUNT_MAX entries
1233 * to describe the submaps, so double it (and make it 8x too)
1234 * - To allow for holes between entries,
1235 * hence needs the same budget as entries
1236 */
1237 map_data_size = zone_get_early_alloc_size(VM_MAP_ZONE_NAME,
1238 sizeof(struct _vm_map), VM_MAP_ZFLAGS,
1239 VM_MAP_EARLY_COUNT_MAX);
1240
1241 kentry_data_size = zone_get_early_alloc_size(VM_MAP_ENTRY_ZONE_NAME,
1242 sizeof(struct vm_map_entry), VM_MAP_ENTRY_ZFLAGS,
1243 8 * VM_MAP_EARLY_COUNT_MAX);
1244
1245 map_holes_data_size = zone_get_early_alloc_size(VM_MAP_HOLES_ZONE_NAME,
1246 sizeof(struct vm_map_links), VM_MAP_HOLES_ZFLAGS,
1247 8 * VM_MAP_EARLY_COUNT_MAX);
1248
1249 /*
1250 * Steal a contiguous range of memory so that a simple range check
1251 * can validate early addresses being freed/crammed to these
1252 * zones
1253 */
1254 map_data = zone_early_mem_init(map_data_size + kentry_data_size +
1255 map_holes_data_size);
1256 kentry_data = map_data + map_data_size;
1257 map_holes_data = kentry_data + kentry_data_size;
1258 }
1259 STARTUP(PMAP_STEAL, STARTUP_RANK_FIRST, vm_map_steal_memory);
1260
1261 __startup_func
1262 static void
vm_kernel_boostraped(void)1263 vm_kernel_boostraped(void)
1264 {
1265 printf("VM bootstrap done: %d maps, %d entries and %d holes left\n",
1266 vm_map_zone->z_elems_free,
1267 vm_map_entry_zone->z_elems_free,
1268 vm_map_holes_zone->z_elems_free);
1269 }
1270 STARTUP(ZALLOC, STARTUP_RANK_SECOND, vm_kernel_boostraped);
1271
1272 void
vm_map_disable_hole_optimization(vm_map_t map)1273 vm_map_disable_hole_optimization(vm_map_t map)
1274 {
1275 vm_map_entry_t head_entry, hole_entry, next_hole_entry;
1276
1277 if (map->holelistenabled) {
1278 head_entry = hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1279
1280 while (hole_entry != NULL) {
1281 next_hole_entry = hole_entry->vme_next;
1282
1283 hole_entry->vme_next = NULL;
1284 hole_entry->vme_prev = NULL;
1285 zfree_id(ZONE_ID_VM_MAP_HOLES, hole_entry);
1286
1287 if (next_hole_entry == head_entry) {
1288 hole_entry = NULL;
1289 } else {
1290 hole_entry = next_hole_entry;
1291 }
1292 }
1293
1294 map->holes_list = NULL;
1295 map->holelistenabled = FALSE;
1296
1297 map->first_free = vm_map_first_entry(map);
1298 SAVE_HINT_HOLE_WRITE(map, NULL);
1299 }
1300 }
1301
1302 boolean_t
vm_kernel_map_is_kernel(vm_map_t map)1303 vm_kernel_map_is_kernel(vm_map_t map)
1304 {
1305 return map->pmap == kernel_pmap;
1306 }
1307
1308 /*
1309 * vm_map_create:
1310 *
1311 * Creates and returns a new empty VM map with
1312 * the given physical map structure, and having
1313 * the given lower and upper address bounds.
1314 */
1315
1316 extern vm_map_t vm_map_create_external(
1317 pmap_t pmap,
1318 vm_map_offset_t min_off,
1319 vm_map_offset_t max_off,
1320 boolean_t pageable);
1321
1322 vm_map_t
vm_map_create_external(pmap_t pmap,vm_map_offset_t min,vm_map_offset_t max,boolean_t pageable)1323 vm_map_create_external(
1324 pmap_t pmap,
1325 vm_map_offset_t min,
1326 vm_map_offset_t max,
1327 boolean_t pageable)
1328 {
1329 vm_map_create_options_t options = VM_MAP_CREATE_DEFAULT;
1330
1331 if (pageable) {
1332 options |= VM_MAP_CREATE_PAGEABLE;
1333 }
1334 return vm_map_create_options(pmap, min, max, options);
1335 }
1336
1337 __startup_func
1338 void
vm_map_will_allocate_early_map(vm_map_t * owner)1339 vm_map_will_allocate_early_map(vm_map_t *owner)
1340 {
1341 if (early_map_count >= VM_MAP_EARLY_COUNT_MAX) {
1342 panic("VM_MAP_EARLY_COUNT_MAX is too low");
1343 }
1344
1345 early_map_owners[early_map_count++] = owner;
1346 }
1347
1348 __startup_func
1349 void
vm_map_relocate_early_maps(vm_offset_t delta)1350 vm_map_relocate_early_maps(vm_offset_t delta)
1351 {
1352 for (uint32_t i = 0; i < early_map_count; i++) {
1353 vm_address_t addr = (vm_address_t)*early_map_owners[i];
1354
1355 *early_map_owners[i] = (vm_map_t)(addr + delta);
1356 }
1357
1358 early_map_count = ~0u;
1359 }
1360
1361 /*
1362 * Routine: vm_map_relocate_early_elem
1363 *
1364 * Purpose:
1365 * Early zone elements are allocated in a temporary part
1366 * of the address space.
1367 *
1368 * Once the zones live in their final place, the early
1369 * VM maps, map entries and map holes need to be relocated.
1370 *
1371 * It involves rewriting any vm_map_t, vm_map_entry_t or
1372 * pointers to vm_map_links. Other pointers to other types
1373 * are fine.
1374 *
1375 * Fortunately, pointers to those types are self-contained
1376 * in those zones, _except_ for pointers to VM maps,
1377 * which are tracked during early boot and fixed with
1378 * vm_map_relocate_early_maps().
1379 */
1380 __startup_func
1381 void
vm_map_relocate_early_elem(uint32_t zone_id,vm_offset_t new_addr,vm_offset_t delta)1382 vm_map_relocate_early_elem(
1383 uint32_t zone_id,
1384 vm_offset_t new_addr,
1385 vm_offset_t delta)
1386 {
1387 #define relocate(type_t, field) ({ \
1388 typeof(((type_t)NULL)->field) *__field = &((type_t)new_addr)->field; \
1389 if (*__field) { \
1390 *__field = (typeof(*__field))((vm_offset_t)*__field + delta); \
1391 } \
1392 })
1393
1394 switch (zone_id) {
1395 case ZONE_ID_VM_MAP:
1396 case ZONE_ID_VM_MAP_ENTRY:
1397 case ZONE_ID_VM_MAP_HOLES:
1398 break;
1399
1400 default:
1401 panic("Unexpected zone ID %d", zone_id);
1402 }
1403
1404 if (zone_id == ZONE_ID_VM_MAP) {
1405 relocate(vm_map_t, hdr.links.prev);
1406 relocate(vm_map_t, hdr.links.next);
1407 ((vm_map_t)new_addr)->pmap = kernel_pmap;
1408 #ifdef VM_MAP_STORE_USE_RB
1409 relocate(vm_map_t, hdr.rb_head_store.rbh_root);
1410 #endif /* VM_MAP_STORE_USE_RB */
1411 relocate(vm_map_t, hint);
1412 relocate(vm_map_t, hole_hint);
1413 relocate(vm_map_t, first_free);
1414 return;
1415 }
1416
1417 relocate(struct vm_map_links *, prev);
1418 relocate(struct vm_map_links *, next);
1419
1420 if (zone_id == ZONE_ID_VM_MAP_ENTRY) {
1421 #ifdef VM_MAP_STORE_USE_RB
1422 relocate(vm_map_entry_t, store.entry.rbe_left);
1423 relocate(vm_map_entry_t, store.entry.rbe_right);
1424 relocate(vm_map_entry_t, store.entry.rbe_parent);
1425 #endif /* VM_MAP_STORE_USE_RB */
1426 if (((vm_map_entry_t)new_addr)->is_sub_map) {
1427 /* no object to relocate because we haven't made any */
1428 ((vm_map_entry_t)new_addr)->vme_submap +=
1429 delta >> VME_SUBMAP_SHIFT;
1430 }
1431 #if MAP_ENTRY_CREATION_DEBUG
1432 relocate(vm_map_entry_t, vme_creation_maphdr);
1433 #endif /* MAP_ENTRY_CREATION_DEBUG */
1434 }
1435
1436 #undef relocate
1437 }
1438
1439 vm_map_t
vm_map_create_options(pmap_t pmap,vm_map_offset_t min,vm_map_offset_t max,vm_map_create_options_t options)1440 vm_map_create_options(
1441 pmap_t pmap,
1442 vm_map_offset_t min,
1443 vm_map_offset_t max,
1444 vm_map_create_options_t options)
1445 {
1446 vm_map_t result;
1447
1448 #if DEBUG || DEVELOPMENT
1449 if (__improbable(startup_phase < STARTUP_SUB_ZALLOC)) {
1450 if (early_map_count != ~0u && early_map_count !=
1451 zone_count_allocated(vm_map_zone) + 1) {
1452 panic("allocating %dth early map, owner not known",
1453 zone_count_allocated(vm_map_zone) + 1);
1454 }
1455 if (early_map_count != ~0u && pmap && pmap != kernel_pmap) {
1456 panic("allocating %dth early map for non kernel pmap",
1457 early_map_count);
1458 }
1459 }
1460 #endif /* DEBUG || DEVELOPMENT */
1461
1462 result = zalloc_id(ZONE_ID_VM_MAP, Z_WAITOK | Z_NOFAIL | Z_ZERO);
1463
1464 vm_map_first_entry(result) = vm_map_to_entry(result);
1465 vm_map_last_entry(result) = vm_map_to_entry(result);
1466
1467 vm_map_store_init(&result->hdr);
1468 result->hdr.entries_pageable = (bool)(options & VM_MAP_CREATE_PAGEABLE);
1469 vm_map_set_page_shift(result, PAGE_SHIFT);
1470
1471 result->size_limit = RLIM_INFINITY; /* default unlimited */
1472 result->data_limit = RLIM_INFINITY; /* default unlimited */
1473 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
1474 os_ref_init_count_raw(&result->map_refcnt, &map_refgrp, 1);
1475 result->pmap = pmap;
1476 result->min_offset = min;
1477 result->max_offset = max;
1478 result->first_free = vm_map_to_entry(result);
1479 result->hint = vm_map_to_entry(result);
1480
1481 if (options & VM_MAP_CREATE_NEVER_FAULTS) {
1482 assert(pmap == kernel_pmap);
1483 result->never_faults = true;
1484 }
1485
1486 /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1487 if (options & VM_MAP_CREATE_CORPSE_FOOTPRINT) {
1488 result->has_corpse_footprint = true;
1489 } else if (!(options & VM_MAP_CREATE_DISABLE_HOLELIST)) {
1490 struct vm_map_links *hole_entry;
1491
1492 hole_entry = zalloc_id(ZONE_ID_VM_MAP_HOLES, Z_WAITOK | Z_NOFAIL);
1493 hole_entry->start = min;
1494 #if defined(__arm64__)
1495 hole_entry->end = result->max_offset;
1496 #else
1497 hole_entry->end = MAX(max, (vm_map_offset_t)MACH_VM_MAX_ADDRESS);
1498 #endif
1499 result->holes_list = result->hole_hint = hole_entry;
1500 hole_entry->prev = hole_entry->next = CAST_TO_VM_MAP_ENTRY(hole_entry);
1501 result->holelistenabled = true;
1502 }
1503
1504 vm_map_lock_init(result);
1505
1506 return result;
1507 }
1508
1509 /*
1510 * Adjusts a submap that was made by kmem_suballoc()
1511 * before it knew where it would be mapped,
1512 * so that it has the right min/max offsets.
1513 *
1514 * We do not need to hold any locks:
1515 * only the caller knows about this map,
1516 * and it is not published on any entry yet.
1517 */
1518 static void
vm_map_adjust_offsets(vm_map_t map,vm_map_offset_t min_off,vm_map_offset_t max_off)1519 vm_map_adjust_offsets(
1520 vm_map_t map,
1521 vm_map_offset_t min_off,
1522 vm_map_offset_t max_off)
1523 {
1524 assert(map->min_offset == 0);
1525 assert(map->max_offset == max_off - min_off);
1526 assert(map->hdr.nentries == 0);
1527 assert(os_ref_get_count_raw(&map->map_refcnt) == 2);
1528
1529 map->min_offset = min_off;
1530 map->max_offset = max_off;
1531
1532 if (map->holelistenabled) {
1533 struct vm_map_links *hole = map->holes_list;
1534
1535 hole->start = min_off;
1536 #if defined(__arm64__)
1537 hole->end = max_off;
1538 #else
1539 hole->end = MAX(max_off, (vm_map_offset_t)MACH_VM_MAX_ADDRESS);
1540 #endif
1541 }
1542 }
1543
1544
1545 vm_map_size_t
vm_map_adjusted_size(vm_map_t map)1546 vm_map_adjusted_size(vm_map_t map)
1547 {
1548 struct vm_reserved_region *regions = NULL;
1549 size_t num_regions = 0;
1550 mach_vm_size_t reserved_size = 0, map_size = 0;
1551
1552 if (map == NULL || (map->size == 0)) {
1553 return 0;
1554 }
1555
1556 map_size = map->size;
1557
1558 if (map->reserved_regions == FALSE || !vm_map_is_exotic(map) || map->terminated) {
1559 /*
1560 * No special reserved regions or not an exotic map or the task
1561 * is terminating and these special regions might have already
1562 * been deallocated.
1563 */
1564 return map_size;
1565 }
1566
1567 num_regions = ml_get_vm_reserved_regions(vm_map_is_64bit(map), ®ions);
1568 assert((num_regions == 0) || (num_regions > 0 && regions != NULL));
1569
1570 while (num_regions) {
1571 reserved_size += regions[--num_regions].vmrr_size;
1572 }
1573
1574 /*
1575 * There are a few places where the map is being switched out due to
1576 * 'termination' without that bit being set (e.g. exec and corpse purging).
1577 * In those cases, we could have the map's regions being deallocated on
1578 * a core while some accounting process is trying to get the map's size.
1579 * So this assert can't be enabled till all those places are uniform in
1580 * their use of the 'map->terminated' bit.
1581 *
1582 * assert(map_size >= reserved_size);
1583 */
1584
1585 return (map_size >= reserved_size) ? (map_size - reserved_size) : map_size;
1586 }
1587
1588 /*
1589 * vm_map_entry_create: [ internal use only ]
1590 *
1591 * Allocates a VM map entry for insertion in the
1592 * given map (or map copy). No fields are filled.
1593 *
1594 * The VM entry will be zero initialized, except for:
1595 * - behavior set to VM_BEHAVIOR_DEFAULT
1596 * - inheritance set to VM_INHERIT_DEFAULT
1597 */
1598 #define vm_map_entry_create(map) _vm_map_entry_create(&(map)->hdr)
1599
1600 #define vm_map_copy_entry_create(copy) _vm_map_entry_create(&(copy)->cpy_hdr)
1601
1602 static vm_map_entry_t
_vm_map_entry_create(struct vm_map_header * map_header __unused)1603 _vm_map_entry_create(
1604 struct vm_map_header *map_header __unused)
1605 {
1606 vm_map_entry_t entry = NULL;
1607
1608 entry = zalloc_id(ZONE_ID_VM_MAP_ENTRY, Z_WAITOK | Z_ZERO);
1609
1610 /*
1611 * Help the compiler with what we know to be true,
1612 * so that the further bitfields inits have good codegen.
1613 *
1614 * See rdar://87041299
1615 */
1616 __builtin_assume(entry->vme_object_value == 0);
1617 __builtin_assume(*(uint64_t *)(&entry->vme_object_value + 1) == 0);
1618 __builtin_assume(*(uint64_t *)(&entry->vme_object_value + 2) == 0);
1619
1620 static_assert(VM_MAX_TAG_VALUE <= VME_ALIAS_MASK,
1621 "VME_ALIAS_MASK covers tags");
1622
1623 static_assert(VM_BEHAVIOR_DEFAULT == 0,
1624 "can skip zeroing of the behavior field");
1625 entry->inheritance = VM_INHERIT_DEFAULT;
1626
1627 vm_map_store_update((vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1628
1629 #if MAP_ENTRY_CREATION_DEBUG
1630 entry->vme_creation_maphdr = map_header;
1631 entry->vme_creation_bt = btref_get(__builtin_frame_address(0),
1632 BTREF_GET_NOWAIT);
1633 #endif
1634 return entry;
1635 }
1636
1637 /*
1638 * vm_map_entry_dispose: [ internal use only ]
1639 *
1640 * Inverse of vm_map_entry_create.
1641 *
1642 * write map lock held so no need to
1643 * do anything special to insure correctness
1644 * of the stores
1645 */
1646 static void
vm_map_entry_dispose(vm_map_entry_t entry)1647 vm_map_entry_dispose(
1648 vm_map_entry_t entry)
1649 {
1650 #if MAP_ENTRY_CREATION_DEBUG
1651 btref_put(entry->vme_creation_bt);
1652 #endif
1653 #if MAP_ENTRY_INSERTION_DEBUG
1654 btref_put(entry->vme_insertion_bt);
1655 #endif
1656 zfree(vm_map_entry_zone, entry);
1657 }
1658
1659 #define vm_map_copy_entry_dispose(copy_entry) \
1660 vm_map_entry_dispose(copy_entry)
1661
1662 static vm_map_entry_t
vm_map_zap_first_entry(vm_map_zap_t list)1663 vm_map_zap_first_entry(
1664 vm_map_zap_t list)
1665 {
1666 return list->vmz_head;
1667 }
1668
1669 static vm_map_entry_t
vm_map_zap_last_entry(vm_map_zap_t list)1670 vm_map_zap_last_entry(
1671 vm_map_zap_t list)
1672 {
1673 assert(vm_map_zap_first_entry(list));
1674 return __container_of(list->vmz_tail, struct vm_map_entry, vme_next);
1675 }
1676
1677 static void
vm_map_zap_append(vm_map_zap_t list,vm_map_entry_t entry)1678 vm_map_zap_append(
1679 vm_map_zap_t list,
1680 vm_map_entry_t entry)
1681 {
1682 entry->vme_next = VM_MAP_ENTRY_NULL;
1683 *list->vmz_tail = entry;
1684 list->vmz_tail = &entry->vme_next;
1685 }
1686
1687 static vm_map_entry_t
vm_map_zap_pop(vm_map_zap_t list)1688 vm_map_zap_pop(
1689 vm_map_zap_t list)
1690 {
1691 vm_map_entry_t head = list->vmz_head;
1692
1693 if (head != VM_MAP_ENTRY_NULL &&
1694 (list->vmz_head = head->vme_next) == VM_MAP_ENTRY_NULL) {
1695 list->vmz_tail = &list->vmz_head;
1696 }
1697
1698 return head;
1699 }
1700
1701 static void
vm_map_zap_dispose(vm_map_zap_t list)1702 vm_map_zap_dispose(
1703 vm_map_zap_t list)
1704 {
1705 vm_map_entry_t entry;
1706
1707 while ((entry = vm_map_zap_pop(list))) {
1708 if (entry->is_sub_map) {
1709 vm_map_deallocate(VME_SUBMAP(entry));
1710 } else {
1711 vm_object_deallocate(VME_OBJECT(entry));
1712 }
1713
1714 vm_map_entry_dispose(entry);
1715 }
1716 }
1717
1718 #if MACH_ASSERT
1719 static boolean_t first_free_check = FALSE;
1720 boolean_t
first_free_is_valid(vm_map_t map)1721 first_free_is_valid(
1722 vm_map_t map)
1723 {
1724 if (!first_free_check) {
1725 return TRUE;
1726 }
1727
1728 return first_free_is_valid_store( map );
1729 }
1730 #endif /* MACH_ASSERT */
1731
1732
1733 #define vm_map_copy_entry_link(copy, after_where, entry) \
1734 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1735
1736 #define vm_map_copy_entry_unlink(copy, entry) \
1737 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry), false)
1738
1739 /*
1740 * vm_map_destroy:
1741 *
1742 * Actually destroy a map.
1743 */
1744 void
vm_map_destroy(vm_map_t map)1745 vm_map_destroy(
1746 vm_map_t map)
1747 {
1748 /* final cleanup: this is not allowed to fail */
1749 vmr_flags_t flags = VM_MAP_REMOVE_NO_FLAGS;
1750
1751 VM_MAP_ZAP_DECLARE(zap);
1752
1753 vm_map_lock(map);
1754
1755 map->terminated = true;
1756 /* clean up regular map entries */
1757 (void)vm_map_delete(map, map->min_offset, map->max_offset, flags,
1758 KMEM_GUARD_NONE, &zap);
1759 /* clean up leftover special mappings (commpage, GPU carveout, etc...) */
1760 (void)vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL, flags,
1761 KMEM_GUARD_NONE, &zap);
1762
1763 vm_map_disable_hole_optimization(map);
1764 vm_map_corpse_footprint_destroy(map);
1765
1766 vm_map_unlock(map);
1767
1768 vm_map_zap_dispose(&zap);
1769
1770 assert(map->hdr.nentries == 0);
1771
1772 if (map->pmap) {
1773 pmap_destroy(map->pmap);
1774 }
1775
1776 lck_rw_destroy(&map->lock, &vm_map_lck_grp);
1777
1778 zfree_id(ZONE_ID_VM_MAP, map);
1779 }
1780
1781 /*
1782 * Returns pid of the task with the largest number of VM map entries.
1783 * Used in the zone-map-exhaustion jetsam path.
1784 */
1785 pid_t
find_largest_process_vm_map_entries(void)1786 find_largest_process_vm_map_entries(void)
1787 {
1788 pid_t victim_pid = -1;
1789 int max_vm_map_entries = 0;
1790 task_t task = TASK_NULL;
1791 queue_head_t *task_list = &tasks;
1792
1793 lck_mtx_lock(&tasks_threads_lock);
1794 queue_iterate(task_list, task, task_t, tasks) {
1795 if (task == kernel_task || !task->active) {
1796 continue;
1797 }
1798
1799 vm_map_t task_map = task->map;
1800 if (task_map != VM_MAP_NULL) {
1801 int task_vm_map_entries = task_map->hdr.nentries;
1802 if (task_vm_map_entries > max_vm_map_entries) {
1803 max_vm_map_entries = task_vm_map_entries;
1804 victim_pid = pid_from_task(task);
1805 }
1806 }
1807 }
1808 lck_mtx_unlock(&tasks_threads_lock);
1809
1810 printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1811 return victim_pid;
1812 }
1813
1814
1815 /*
1816 * vm_map_lookup_entry: [ internal use only ]
1817 *
1818 * Calls into the vm map store layer to find the map
1819 * entry containing (or immediately preceding) the
1820 * specified address in the given map; the entry is returned
1821 * in the "entry" parameter. The boolean
1822 * result indicates whether the address is
1823 * actually contained in the map.
1824 */
1825 boolean_t
vm_map_lookup_entry(vm_map_t map,vm_map_offset_t address,vm_map_entry_t * entry)1826 vm_map_lookup_entry(
1827 vm_map_t map,
1828 vm_map_offset_t address,
1829 vm_map_entry_t *entry) /* OUT */
1830 {
1831 #if CONFIG_KERNEL_TBI
1832 if (VM_KERNEL_ADDRESS(address)) {
1833 address = VM_KERNEL_STRIP_UPTR(address);
1834 }
1835 #endif /* CONFIG_KERNEL_TBI */
1836 #if CONFIG_PROB_GZALLOC
1837 if (map->pmap == kernel_pmap) {
1838 assertf(!pgz_owned(address),
1839 "it is the responsibility of callers to unguard PGZ addresses");
1840 }
1841 #endif /* CONFIG_PROB_GZALLOC */
1842 return vm_map_store_lookup_entry( map, address, entry );
1843 }
1844
1845 boolean_t
vm_map_lookup_entry_or_next(vm_map_t map,vm_map_offset_t address,vm_map_entry_t * entry)1846 vm_map_lookup_entry_or_next(
1847 vm_map_t map,
1848 vm_map_offset_t address,
1849 vm_map_entry_t *entry) /* OUT */
1850 {
1851 if (vm_map_lookup_entry(map, address, entry)) {
1852 return true;
1853 }
1854
1855 *entry = (*entry)->vme_next;
1856 return false;
1857 }
1858
1859 #if CONFIG_PROB_GZALLOC
1860 boolean_t
vm_map_lookup_entry_allow_pgz(vm_map_t map,vm_map_offset_t address,vm_map_entry_t * entry)1861 vm_map_lookup_entry_allow_pgz(
1862 vm_map_t map,
1863 vm_map_offset_t address,
1864 vm_map_entry_t *entry) /* OUT */
1865 {
1866 #if CONFIG_KERNEL_TBI
1867 if (VM_KERNEL_ADDRESS(address)) {
1868 address = VM_KERNEL_STRIP_UPTR(address);
1869 }
1870 #endif /* CONFIG_KERNEL_TBI */
1871 return vm_map_store_lookup_entry( map, address, entry );
1872 }
1873 #endif /* CONFIG_PROB_GZALLOC */
1874
1875 #if !ZSECURITY_CONFIG(KERNEL_DATA_SPLIT)
1876 /*
1877 * Routine: vm_map_adjust_direction
1878 * Purpose:
1879 * Overrides direction to reduce fragmentation. Allocate small
1880 * allocations from the end and large allocations from the right.
1881 */
1882 static void
vm_map_adjust_direction(vm_map_kernel_flags_t * vmk_flags,vm_map_size_t size)1883 vm_map_adjust_direction(
1884 vm_map_kernel_flags_t *vmk_flags,
1885 vm_map_size_t size)
1886 {
1887 if (size < KMEM_SMALLMAP_THRESHOLD) {
1888 vmk_flags->vmkf_last_free = true;
1889 } else {
1890 vmk_flags->vmkf_last_free = false;
1891 }
1892 }
1893 #endif /* !ZSECURITY_CONFIG(KERNEL_DATA_SPLIT) */
1894
1895 /*
1896 * Routine: vm_map_range_invalid_panic
1897 * Purpose:
1898 * Panic on detection of an invalid range id.
1899 */
1900 __abortlike
1901 static void
vm_map_range_invalid_panic(vm_map_t map,vm_map_range_id_t range_id)1902 vm_map_range_invalid_panic(
1903 vm_map_t map,
1904 vm_map_range_id_t range_id)
1905 {
1906 panic("invalid range ID (%u) for map %p", range_id, map);
1907 }
1908
1909 /*
1910 * Routine: vm_map_get_range
1911 * Purpose:
1912 * Adjust bounds based on security policy.
1913 */
1914 static struct mach_vm_range
vm_map_get_range(vm_map_t map,vm_map_address_t * address,vm_map_kernel_flags_t * vmk_flags,vm_map_size_t size)1915 vm_map_get_range(
1916 vm_map_t map,
1917 vm_map_address_t *address,
1918 vm_map_kernel_flags_t *vmk_flags,
1919 vm_map_size_t size)
1920 {
1921 struct mach_vm_range effective_range = {};
1922 vm_map_range_id_t range_id = vmk_flags->vmkf_range_id;
1923
1924 if (map == kernel_map) {
1925 effective_range = kmem_ranges[range_id];
1926
1927 if (startup_phase >= STARTUP_SUB_KMEM) {
1928 /*
1929 * Hint provided by caller is zeroed as the range is restricted to a
1930 * subset of the entire kernel_map VA, which could put the hint outside
1931 * the range, causing vm_map_store_find_space to fail.
1932 */
1933 *address = 0ull;
1934 /*
1935 * Ensure that range_id passed in by the caller is within meaningful
1936 * bounds. Range id of KMEM_RANGE_ID_NONE will cause vm_map_locate_space
1937 * to fail as the corresponding range is invalid. Range id larger than
1938 * KMEM_RANGE_ID_MAX will lead to an OOB access.
1939 */
1940 if ((range_id == KMEM_RANGE_ID_NONE) ||
1941 (range_id > KMEM_RANGE_ID_MAX)) {
1942 vm_map_range_invalid_panic(map, range_id);
1943 }
1944 #if ZSECURITY_CONFIG(KERNEL_DATA_SPLIT)
1945 /*
1946 * Each allocation front looks like [ S | L | S ]
1947 * Adjust range for allocations larger than KMEM_SMALLMAP_THRESHOLD.
1948 * Allocations smaller than KMEM_SMALLMAP_THRESHOLD are allowed to
1949 * use the entire range. Two small allocations from different fronts
1950 * (left and right) can only meet when memory in the that range is
1951 * entirely exhausted.
1952 */
1953 if (size >= KMEM_SMALLMAP_THRESHOLD) {
1954 effective_range = kmem_large_ranges[range_id];
1955 }
1956 #else /* ZSECURITY_CONFIG(KERNEL_DATA_SPLIT) */
1957 vm_map_adjust_direction(vmk_flags, size);
1958 #endif /* ZSECURITY_CONFIG(KERNEL_DATA_SPLIT) */
1959 }
1960 #if CONFIG_MAP_RANGES
1961 } else if (map->uses_user_ranges) {
1962 if (range_id > UMEM_RANGE_ID_MAX) {
1963 vm_map_range_invalid_panic(map, range_id);
1964 }
1965
1966 effective_range = map->user_range[range_id];
1967 #endif /* CONFIG_MAP_RANGES */
1968 } else {
1969 /*
1970 * If minimum is 0, bump it up by PAGE_SIZE. We want to limit
1971 * allocations of PAGEZERO to explicit requests since its
1972 * normal use is to catch dereferences of NULL and many
1973 * applications also treat pointers with a value of 0 as
1974 * special and suddenly having address 0 contain useable
1975 * memory would tend to confuse those applications.
1976 */
1977 effective_range.min_address = MAX(map->min_offset, VM_MAP_PAGE_SIZE(map));
1978 effective_range.max_address = map->max_offset;
1979 }
1980
1981 return effective_range;
1982 }
1983
1984 /*
1985 * Routine: vm_map_locate_space
1986 * Purpose:
1987 * Finds a range in the specified virtual address map,
1988 * returning the start of that range,
1989 * as well as the entry right before it.
1990 */
1991 kern_return_t
vm_map_locate_space(vm_map_t map,vm_map_size_t size,vm_map_offset_t mask,vm_map_kernel_flags_t vmk_flags,vm_map_offset_t * start_inout,vm_map_entry_t * entry_out)1992 vm_map_locate_space(
1993 vm_map_t map,
1994 vm_map_size_t size,
1995 vm_map_offset_t mask,
1996 vm_map_kernel_flags_t vmk_flags,
1997 vm_map_offset_t *start_inout,
1998 vm_map_entry_t *entry_out)
1999 {
2000 struct mach_vm_range effective_range = {};
2001 vm_map_size_t guard_offset;
2002 vm_map_offset_t hint, limit;
2003 vm_map_entry_t entry;
2004
2005 /*
2006 * Only supported by vm_map_enter() with a fixed address.
2007 */
2008 assert(!vmk_flags.vmkf_beyond_max);
2009
2010 if (__improbable(map->wait_for_space)) {
2011 /*
2012 * support for "wait_for_space" is minimal,
2013 * its only consumer is the ipc_kernel_copy_map.
2014 */
2015 assert(!map->holelistenabled &&
2016 !vmk_flags.vmkf_last_free &&
2017 !vmk_flags.vmkf_keep_map_locked &&
2018 !vmk_flags.vmkf_map_jit &&
2019 !vmk_flags.vmkf_random_address &&
2020 *start_inout <= map->min_offset);
2021 } else if (vmk_flags.vmkf_last_free) {
2022 assert(!vmk_flags.vmkf_map_jit &&
2023 !vmk_flags.vmkf_random_address);
2024 }
2025
2026 if (vmk_flags.vmkf_guard_before) {
2027 guard_offset = VM_MAP_PAGE_SIZE(map);
2028 assert(size > guard_offset);
2029 size -= guard_offset;
2030 } else {
2031 assert(size != 0);
2032 guard_offset = 0;
2033 }
2034
2035 effective_range = vm_map_get_range(map, start_inout, &vmk_flags, size);
2036 #if XNU_TARGET_OS_OSX
2037 if (__improbable(vmk_flags.vmkf_32bit_map_va)) {
2038 assert(map != kernel_map);
2039 effective_range.max_address = MIN(map->max_offset, 0x00000000FFFFF000ULL);
2040 }
2041 #endif /* XNU_TARGET_OS_OSX */
2042
2043 again:
2044 if (vmk_flags.vmkf_last_free) {
2045 hint = *start_inout;
2046
2047 if (hint == 0 || hint > effective_range.max_address) {
2048 hint = effective_range.max_address;
2049 }
2050 if (hint <= effective_range.min_address) {
2051 return KERN_NO_SPACE;
2052 }
2053 limit = effective_range.min_address;
2054 } else {
2055 hint = *start_inout;
2056
2057 if (vmk_flags.vmkf_map_jit) {
2058 if (map->jit_entry_exists &&
2059 !VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map)) {
2060 return KERN_INVALID_ARGUMENT;
2061 }
2062 if (VM_MAP_POLICY_ALLOW_JIT_RANDOM_ADDRESS(map)) {
2063 vmk_flags.vmkf_random_address = true;
2064 }
2065 }
2066
2067 if (vmk_flags.vmkf_random_address) {
2068 kern_return_t kr;
2069
2070 kr = vm_map_random_address_for_size(map, &hint, size, vmk_flags);
2071 if (kr != KERN_SUCCESS) {
2072 return kr;
2073 }
2074 }
2075 #if XNU_TARGET_OS_OSX
2076 else if ((hint == 0 || hint == vm_map_min(map)) &&
2077 !map->disable_vmentry_reuse &&
2078 map->vmmap_high_start != 0) {
2079 hint = map->vmmap_high_start;
2080 }
2081 #endif /* XNU_TARGET_OS_OSX */
2082
2083 if (hint < effective_range.min_address) {
2084 hint = effective_range.min_address;
2085 }
2086 if (effective_range.max_address <= hint) {
2087 return KERN_NO_SPACE;
2088 }
2089
2090 limit = effective_range.max_address;
2091 }
2092 entry = vm_map_store_find_space(map,
2093 hint, limit, vmk_flags.vmkf_last_free,
2094 guard_offset, size, mask,
2095 start_inout);
2096
2097 if (__improbable(entry == NULL)) {
2098 if (map->wait_for_space &&
2099 guard_offset + size <=
2100 effective_range.max_address - effective_range.min_address) {
2101 assert_wait((event_t)map, THREAD_ABORTSAFE);
2102 vm_map_unlock(map);
2103 thread_block(THREAD_CONTINUE_NULL);
2104 vm_map_lock(map);
2105 goto again;
2106 }
2107 return KERN_NO_SPACE;
2108 }
2109
2110 if (entry_out) {
2111 *entry_out = entry;
2112 }
2113 return KERN_SUCCESS;
2114 }
2115
2116
2117 /*
2118 * Routine: vm_map_find_space
2119 * Purpose:
2120 * Allocate a range in the specified virtual address map,
2121 * returning the entry allocated for that range.
2122 * Used by kmem_alloc, etc.
2123 *
2124 * The map must be NOT be locked. It will be returned locked
2125 * on KERN_SUCCESS, unlocked on failure.
2126 *
2127 * If an entry is allocated, the object/offset fields
2128 * are initialized to zero.
2129 */
2130 kern_return_t
vm_map_find_space(vm_map_t map,vm_map_offset_t hint_address,vm_map_size_t size,vm_map_offset_t mask,vm_map_kernel_flags_t vmk_flags,vm_map_entry_t * o_entry)2131 vm_map_find_space(
2132 vm_map_t map,
2133 vm_map_offset_t hint_address,
2134 vm_map_size_t size,
2135 vm_map_offset_t mask,
2136 vm_map_kernel_flags_t vmk_flags,
2137 vm_map_entry_t *o_entry) /* OUT */
2138 {
2139 vm_map_entry_t new_entry, entry;
2140 kern_return_t kr;
2141
2142 if (size == 0) {
2143 return KERN_INVALID_ARGUMENT;
2144 }
2145
2146 new_entry = vm_map_entry_create(map);
2147 new_entry->use_pmap = true;
2148 new_entry->protection = VM_PROT_DEFAULT;
2149 new_entry->max_protection = VM_PROT_ALL;
2150
2151 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
2152 new_entry->map_aligned = true;
2153 }
2154 if (vmk_flags.vmkf_permanent) {
2155 new_entry->vme_permanent = true;
2156 }
2157
2158 vm_map_lock(map);
2159
2160 kr = vm_map_locate_space(map, size, mask, vmk_flags,
2161 &hint_address, &entry);
2162 if (kr != KERN_SUCCESS) {
2163 vm_map_unlock(map);
2164 vm_map_entry_dispose(new_entry);
2165 return kr;
2166 }
2167 new_entry->vme_start = hint_address;
2168 new_entry->vme_end = hint_address + size;
2169
2170 /*
2171 * At this point,
2172 *
2173 * - new_entry's "vme_start" and "vme_end" should define
2174 * the endpoints of the available new range,
2175 *
2176 * - and "entry" should refer to the region before
2177 * the new range,
2178 *
2179 * - and the map should still be locked.
2180 */
2181
2182 assert(page_aligned(new_entry->vme_start));
2183 assert(page_aligned(new_entry->vme_end));
2184 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start, VM_MAP_PAGE_MASK(map)));
2185 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end, VM_MAP_PAGE_MASK(map)));
2186
2187 /*
2188 * Insert the new entry into the list
2189 */
2190
2191 vm_map_store_entry_link(map, entry, new_entry, VM_MAP_KERNEL_FLAGS_NONE);
2192 map->size += size;
2193
2194 /*
2195 * Update the lookup hint
2196 */
2197 SAVE_HINT_MAP_WRITE(map, new_entry);
2198
2199 *o_entry = new_entry;
2200 return KERN_SUCCESS;
2201 }
2202
2203 int vm_map_pmap_enter_print = FALSE;
2204 int vm_map_pmap_enter_enable = FALSE;
2205
2206 /*
2207 * Routine: vm_map_pmap_enter [internal only]
2208 *
2209 * Description:
2210 * Force pages from the specified object to be entered into
2211 * the pmap at the specified address if they are present.
2212 * As soon as a page not found in the object the scan ends.
2213 *
2214 * Returns:
2215 * Nothing.
2216 *
2217 * In/out conditions:
2218 * The source map should not be locked on entry.
2219 */
2220 __unused static void
vm_map_pmap_enter(vm_map_t map,vm_map_offset_t addr,vm_map_offset_t end_addr,vm_object_t object,vm_object_offset_t offset,vm_prot_t protection)2221 vm_map_pmap_enter(
2222 vm_map_t map,
2223 vm_map_offset_t addr,
2224 vm_map_offset_t end_addr,
2225 vm_object_t object,
2226 vm_object_offset_t offset,
2227 vm_prot_t protection)
2228 {
2229 int type_of_fault;
2230 kern_return_t kr;
2231 struct vm_object_fault_info fault_info = {};
2232
2233 if (map->pmap == 0) {
2234 return;
2235 }
2236
2237 assert(VM_MAP_PAGE_SHIFT(map) == PAGE_SHIFT);
2238
2239 while (addr < end_addr) {
2240 vm_page_t m;
2241
2242
2243 /*
2244 * TODO:
2245 * From vm_map_enter(), we come into this function without the map
2246 * lock held or the object lock held.
2247 * We haven't taken a reference on the object either.
2248 * We should do a proper lookup on the map to make sure
2249 * that things are sane before we go locking objects that
2250 * could have been deallocated from under us.
2251 */
2252
2253 vm_object_lock(object);
2254
2255 m = vm_page_lookup(object, offset);
2256
2257 if (m == VM_PAGE_NULL || m->vmp_busy || m->vmp_fictitious ||
2258 (m->vmp_unusual && (VMP_ERROR_GET(m) || m->vmp_restart || m->vmp_absent))) {
2259 vm_object_unlock(object);
2260 return;
2261 }
2262
2263 if (vm_map_pmap_enter_print) {
2264 printf("vm_map_pmap_enter:");
2265 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
2266 map, (unsigned long long)addr, object, (unsigned long long)offset);
2267 }
2268 type_of_fault = DBG_CACHE_HIT_FAULT;
2269 kr = vm_fault_enter(m, map->pmap,
2270 addr,
2271 PAGE_SIZE, 0,
2272 protection, protection,
2273 VM_PAGE_WIRED(m),
2274 FALSE, /* change_wiring */
2275 VM_KERN_MEMORY_NONE, /* tag - not wiring */
2276 &fault_info,
2277 NULL, /* need_retry */
2278 &type_of_fault);
2279
2280 vm_object_unlock(object);
2281
2282 offset += PAGE_SIZE_64;
2283 addr += PAGE_SIZE;
2284 }
2285 }
2286
2287 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
2288 kern_return_t
vm_map_random_address_for_size(vm_map_t map,vm_map_offset_t * address,vm_map_size_t size,vm_map_kernel_flags_t vmk_flags)2289 vm_map_random_address_for_size(
2290 vm_map_t map,
2291 vm_map_offset_t *address,
2292 vm_map_size_t size,
2293 vm_map_kernel_flags_t vmk_flags)
2294 {
2295 kern_return_t kr = KERN_SUCCESS;
2296 int tries = 0;
2297 vm_map_offset_t random_addr = 0;
2298 vm_map_offset_t hole_end;
2299
2300 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
2301 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
2302 vm_map_size_t vm_hole_size = 0;
2303 vm_map_size_t addr_space_size;
2304 struct mach_vm_range effective_range = vm_map_get_range(map, address, &vmk_flags, size);
2305
2306 addr_space_size = effective_range.max_address - effective_range.min_address;
2307 if (size >= addr_space_size) {
2308 return KERN_NO_SPACE;
2309 }
2310 addr_space_size -= size;
2311
2312 assert(VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map)));
2313
2314 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2315 if (startup_phase < STARTUP_SUB_ZALLOC) {
2316 random_addr = (vm_map_offset_t)early_random();
2317 } else {
2318 random_addr = (vm_map_offset_t)random();
2319 }
2320 random_addr <<= VM_MAP_PAGE_SHIFT(map);
2321 random_addr = vm_map_trunc_page(
2322 effective_range.min_address + (random_addr % addr_space_size),
2323 VM_MAP_PAGE_MASK(map));
2324
2325 #if CONFIG_PROB_GZALLOC
2326 if (map->pmap == kernel_pmap && pgz_owned(random_addr)) {
2327 continue;
2328 }
2329 #endif /* CONFIG_PROB_GZALLOC */
2330
2331 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
2332 if (prev_entry == vm_map_to_entry(map)) {
2333 next_entry = vm_map_first_entry(map);
2334 } else {
2335 next_entry = prev_entry->vme_next;
2336 }
2337 if (next_entry == vm_map_to_entry(map)) {
2338 hole_end = vm_map_max(map);
2339 } else {
2340 hole_end = next_entry->vme_start;
2341 }
2342 vm_hole_size = hole_end - random_addr;
2343 if (vm_hole_size >= size) {
2344 *address = random_addr;
2345 break;
2346 }
2347 }
2348 tries++;
2349 }
2350
2351 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2352 kr = KERN_NO_SPACE;
2353 }
2354 return kr;
2355 }
2356
2357 static boolean_t
vm_memory_malloc_no_cow(int alias)2358 vm_memory_malloc_no_cow(
2359 int alias)
2360 {
2361 uint64_t alias_mask;
2362
2363 if (alias > 63) {
2364 return FALSE;
2365 }
2366
2367 alias_mask = 1ULL << alias;
2368 if (alias_mask & vm_memory_malloc_no_cow_mask) {
2369 return TRUE;
2370 }
2371 return FALSE;
2372 }
2373
2374 uint64_t vm_map_enter_RLIMIT_AS_count = 0;
2375 uint64_t vm_map_enter_RLIMIT_DATA_count = 0;
2376 /*
2377 * Routine: vm_map_enter
2378 *
2379 * Description:
2380 * Allocate a range in the specified virtual address map.
2381 * The resulting range will refer to memory defined by
2382 * the given memory object and offset into that object.
2383 *
2384 * Arguments are as defined in the vm_map call.
2385 */
2386 static unsigned int vm_map_enter_restore_successes = 0;
2387 static unsigned int vm_map_enter_restore_failures = 0;
2388 kern_return_t
vm_map_enter(vm_map_t map,vm_map_offset_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t alias,vm_object_t object,vm_object_offset_t offset,boolean_t needs_copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)2389 vm_map_enter(
2390 vm_map_t map,
2391 vm_map_offset_t *address, /* IN/OUT */
2392 vm_map_size_t size,
2393 vm_map_offset_t mask,
2394 int flags,
2395 vm_map_kernel_flags_t vmk_flags,
2396 vm_tag_t alias,
2397 vm_object_t object,
2398 vm_object_offset_t offset,
2399 boolean_t needs_copy,
2400 vm_prot_t cur_protection,
2401 vm_prot_t max_protection,
2402 vm_inherit_t inheritance)
2403 {
2404 vm_map_entry_t entry, new_entry;
2405 vm_map_offset_t start, tmp_start, tmp_offset;
2406 vm_map_offset_t end, tmp_end;
2407 vm_map_offset_t tmp2_start, tmp2_end;
2408 vm_map_offset_t step;
2409 kern_return_t result = KERN_SUCCESS;
2410 boolean_t map_locked = FALSE;
2411 boolean_t pmap_empty = TRUE;
2412 boolean_t new_mapping_established = FALSE;
2413 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
2414 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
2415 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
2416 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2417 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
2418 const boolean_t is_submap = vmk_flags.vmkf_submap;
2419 boolean_t permanent = (((flags & VM_FLAGS_PERMANENT) != 0) || vmk_flags.vmkf_permanent);
2420 const boolean_t no_copy_on_read = vmk_flags.vmkf_no_copy_on_read;
2421 const boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
2422 boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
2423 boolean_t resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
2424 boolean_t resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
2425 boolean_t entry_for_tpro = ((flags & VM_FLAGS_TPRO) != 0);
2426 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
2427 vm_tag_t user_alias;
2428 kern_return_t kr;
2429 boolean_t clear_map_aligned = FALSE;
2430 vm_map_size_t chunk_size = 0;
2431 vm_object_t caller_object;
2432 VM_MAP_ZAP_DECLARE(zap_old_list);
2433 VM_MAP_ZAP_DECLARE(zap_new_list);
2434
2435 caller_object = object;
2436
2437 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2438
2439 if (flags & VM_FLAGS_4GB_CHUNK) {
2440 #if defined(__LP64__)
2441 chunk_size = (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2442 #else /* __LP64__ */
2443 chunk_size = ANON_CHUNK_SIZE;
2444 #endif /* __LP64__ */
2445 } else {
2446 chunk_size = ANON_CHUNK_SIZE;
2447 }
2448
2449 if (superpage_size) {
2450 switch (superpage_size) {
2451 /*
2452 * Note that the current implementation only supports
2453 * a single size for superpages, SUPERPAGE_SIZE, per
2454 * architecture. As soon as more sizes are supposed
2455 * to be supported, SUPERPAGE_SIZE has to be replaced
2456 * with a lookup of the size depending on superpage_size.
2457 */
2458 #ifdef __x86_64__
2459 case SUPERPAGE_SIZE_ANY:
2460 /* handle it like 2 MB and round up to page size */
2461 size = (size + 2 * 1024 * 1024 - 1) & ~(2 * 1024 * 1024 - 1);
2462 OS_FALLTHROUGH;
2463 case SUPERPAGE_SIZE_2MB:
2464 break;
2465 #endif
2466 default:
2467 return KERN_INVALID_ARGUMENT;
2468 }
2469 mask = SUPERPAGE_SIZE - 1;
2470 if (size & (SUPERPAGE_SIZE - 1)) {
2471 return KERN_INVALID_ARGUMENT;
2472 }
2473 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
2474 }
2475
2476
2477 if ((cur_protection & VM_PROT_WRITE) &&
2478 (cur_protection & VM_PROT_EXECUTE) &&
2479 #if XNU_TARGET_OS_OSX
2480 map->pmap != kernel_pmap &&
2481 (cs_process_global_enforcement() ||
2482 (vmk_flags.vmkf_cs_enforcement_override
2483 ? vmk_flags.vmkf_cs_enforcement
2484 : (vm_map_cs_enforcement(map)
2485 #if __arm64__
2486 || !VM_MAP_IS_EXOTIC(map)
2487 #endif /* __arm64__ */
2488 ))) &&
2489 #endif /* XNU_TARGET_OS_OSX */
2490 (VM_MAP_POLICY_WX_FAIL(map) ||
2491 VM_MAP_POLICY_WX_STRIP_X(map)) &&
2492 !entry_for_jit) {
2493 boolean_t vm_protect_wx_fail = VM_MAP_POLICY_WX_FAIL(map);
2494
2495 DTRACE_VM3(cs_wx,
2496 uint64_t, 0,
2497 uint64_t, 0,
2498 vm_prot_t, cur_protection);
2499 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. %s\n",
2500 proc_selfpid(),
2501 (get_bsdtask_info(current_task())
2502 ? proc_name_address(get_bsdtask_info(current_task()))
2503 : "?"),
2504 __FUNCTION__,
2505 (vm_protect_wx_fail ? "failing" : "turning off execute"));
2506 cur_protection &= ~VM_PROT_EXECUTE;
2507 if (vm_protect_wx_fail) {
2508 return KERN_PROTECTION_FAILURE;
2509 }
2510 }
2511
2512 /*
2513 * If the task has requested executable lockdown,
2514 * deny any new executable mapping.
2515 */
2516 if (map->map_disallow_new_exec == TRUE) {
2517 if (cur_protection & VM_PROT_EXECUTE) {
2518 return KERN_PROTECTION_FAILURE;
2519 }
2520 }
2521
2522 if (resilient_codesign) {
2523 assert(!is_submap);
2524 int reject_prot = (needs_copy ? VM_PROT_ALLEXEC : (VM_PROT_WRITE | VM_PROT_ALLEXEC));
2525 if ((cur_protection | max_protection) & reject_prot) {
2526 return KERN_PROTECTION_FAILURE;
2527 }
2528 }
2529
2530 if (resilient_media) {
2531 assert(!is_submap);
2532 // assert(!needs_copy);
2533 if (object != VM_OBJECT_NULL &&
2534 !object->internal) {
2535 /*
2536 * This mapping is directly backed by an external
2537 * memory manager (e.g. a vnode pager for a file):
2538 * we would not have any safe place to inject
2539 * a zero-filled page if an actual page is not
2540 * available, without possibly impacting the actual
2541 * contents of the mapped object (e.g. the file),
2542 * so we can't provide any media resiliency here.
2543 */
2544 return KERN_INVALID_ARGUMENT;
2545 }
2546 }
2547
2548 if (is_submap) {
2549 vm_map_t submap;
2550 if (purgable) {
2551 /* submaps can not be purgeable */
2552 return KERN_INVALID_ARGUMENT;
2553 }
2554 if (object == VM_OBJECT_NULL) {
2555 /* submaps can not be created lazily */
2556 return KERN_INVALID_ARGUMENT;
2557 }
2558 submap = (vm_map_t) object;
2559 if (VM_MAP_PAGE_SHIFT(submap) != VM_MAP_PAGE_SHIFT(map)) {
2560 /* page size mismatch */
2561 return KERN_INVALID_ARGUMENT;
2562 }
2563 }
2564 if (vmk_flags.vmkf_already) {
2565 /*
2566 * VM_FLAGS_ALREADY says that it's OK if the same mapping
2567 * is already present. For it to be meaningul, the requested
2568 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2569 * we shouldn't try and remove what was mapped there first
2570 * (!VM_FLAGS_OVERWRITE).
2571 */
2572 if ((flags & VM_FLAGS_ANYWHERE) ||
2573 (flags & VM_FLAGS_OVERWRITE)) {
2574 return KERN_INVALID_ARGUMENT;
2575 }
2576 }
2577
2578 if (size == 0 ||
2579 (offset & MIN(VM_MAP_PAGE_MASK(map), PAGE_MASK_64)) != 0) {
2580 *address = 0;
2581 return KERN_INVALID_ARGUMENT;
2582 }
2583
2584 if (map->pmap == kernel_pmap) {
2585 user_alias = VM_KERN_MEMORY_NONE;
2586 } else {
2587 user_alias = alias;
2588 }
2589
2590 if (user_alias == VM_MEMORY_MALLOC_MEDIUM) {
2591 chunk_size = MALLOC_MEDIUM_CHUNK_SIZE;
2592 }
2593
2594 #define RETURN(value) { result = value; goto BailOut; }
2595
2596 assertf(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK), "0x%llx", (uint64_t)*address);
2597 assertf(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK), "0x%llx", (uint64_t)size);
2598 if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK) {
2599 assertf(page_aligned(*address), "0x%llx", (uint64_t)*address);
2600 assertf(page_aligned(size), "0x%llx", (uint64_t)size);
2601 }
2602
2603 if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK &&
2604 !VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
2605 /*
2606 * In most cases, the caller rounds the size up to the
2607 * map's page size.
2608 * If we get a size that is explicitly not map-aligned here,
2609 * we'll have to respect the caller's wish and mark the
2610 * mapping as "not map-aligned" to avoid tripping the
2611 * map alignment checks later.
2612 */
2613 clear_map_aligned = TRUE;
2614 }
2615 if (!anywhere &&
2616 VM_MAP_PAGE_MASK(map) >= PAGE_MASK &&
2617 !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2618 /*
2619 * We've been asked to map at a fixed address and that
2620 * address is not aligned to the map's specific alignment.
2621 * The caller should know what it's doing (i.e. most likely
2622 * mapping some fragmented copy map, transferring memory from
2623 * a VM map with a different alignment), so clear map_aligned
2624 * for this new VM map entry and proceed.
2625 */
2626 clear_map_aligned = TRUE;
2627 }
2628
2629 /*
2630 * Only zero-fill objects are allowed to be purgable.
2631 * LP64todo - limit purgable objects to 32-bits for now
2632 */
2633 if (purgable &&
2634 (offset != 0 ||
2635 (object != VM_OBJECT_NULL &&
2636 (object->vo_size != size ||
2637 object->purgable == VM_PURGABLE_DENY))
2638 #if __LP64__
2639 || size > ANON_MAX_SIZE
2640 #endif
2641 )) {
2642 return KERN_INVALID_ARGUMENT;
2643 }
2644
2645 start = *address;
2646
2647 if (anywhere) {
2648 vm_map_lock(map);
2649 map_locked = TRUE;
2650
2651 if (flags & VM_FLAGS_RANDOM_ADDR) {
2652 vmk_flags.vmkf_random_address = true;
2653 }
2654
2655 result = vm_map_locate_space(map, size, mask, vmk_flags,
2656 &start, &entry);
2657 if (result != KERN_SUCCESS) {
2658 goto BailOut;
2659 }
2660
2661 *address = start;
2662 end = start + size;
2663 assert(VM_MAP_PAGE_ALIGNED(*address,
2664 VM_MAP_PAGE_MASK(map)));
2665 } else {
2666 vm_map_offset_t effective_min_offset, effective_max_offset;
2667
2668 effective_min_offset = map->min_offset;
2669 effective_max_offset = map->max_offset;
2670
2671 if (vmk_flags.vmkf_beyond_max) {
2672 /*
2673 * Allow an insertion beyond the map's max offset.
2674 */
2675 effective_max_offset = 0x00000000FFFFF000ULL;
2676 if (vm_map_is_64bit(map)) {
2677 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
2678 }
2679 #if XNU_TARGET_OS_OSX
2680 } else if (__improbable(vmk_flags.vmkf_32bit_map_va)) {
2681 effective_max_offset = MIN(map->max_offset, 0x00000000FFFFF000ULL);
2682 #endif /* XNU_TARGET_OS_OSX */
2683 }
2684
2685 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT &&
2686 !overwrite &&
2687 user_alias == VM_MEMORY_REALLOC) {
2688 /*
2689 * Force realloc() to switch to a new allocation,
2690 * to prevent 4k-fragmented virtual ranges.
2691 */
2692 // DEBUG4K_ERROR("no realloc in place");
2693 return KERN_NO_SPACE;
2694 }
2695
2696 /*
2697 * Verify that:
2698 * the address doesn't itself violate
2699 * the mask requirement.
2700 */
2701
2702 vm_map_lock(map);
2703 map_locked = TRUE;
2704 if ((start & mask) != 0) {
2705 RETURN(KERN_NO_SPACE);
2706 }
2707
2708 /*
2709 * ... the address is within bounds
2710 */
2711
2712 end = start + size;
2713
2714 if ((start < effective_min_offset) ||
2715 (end > effective_max_offset) ||
2716 (start >= end)) {
2717 RETURN(KERN_INVALID_ADDRESS);
2718 }
2719
2720 if (overwrite) {
2721 vmr_flags_t remove_flags = VM_MAP_REMOVE_NO_MAP_ALIGN;
2722 kern_return_t remove_kr;
2723
2724 /*
2725 * Fixed mapping and "overwrite" flag: attempt to
2726 * remove all existing mappings in the specified
2727 * address range, saving them in our "zap_old_list".
2728 *
2729 * This avoids releasing the VM map lock in
2730 * vm_map_entry_delete() and allows atomicity
2731 * when we want to replace some mappings with a new one.
2732 * It also allows us to restore the old VM mappings if the
2733 * new mapping fails.
2734 */
2735 remove_flags |= VM_MAP_REMOVE_NO_YIELD;
2736
2737 if (vmk_flags.vmkf_overwrite_immutable) {
2738 /* we can overwrite immutable mappings */
2739 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2740 }
2741 if (vmk_flags.vmkf_remap_prot_copy) {
2742 remove_flags |= VM_MAP_REMOVE_IMMUTABLE_CODE;
2743 }
2744 remove_kr = vm_map_delete(map, start, end, remove_flags,
2745 KMEM_GUARD_NONE, &zap_old_list).kmr_return;
2746 if (remove_kr) {
2747 /* XXX FBDP restore zap_old_list? */
2748 RETURN(remove_kr);
2749 }
2750 }
2751
2752 /*
2753 * ... the starting address isn't allocated
2754 */
2755
2756 if (vm_map_lookup_entry(map, start, &entry)) {
2757 if (!(vmk_flags.vmkf_already)) {
2758 RETURN(KERN_NO_SPACE);
2759 }
2760 /*
2761 * Check if what's already there is what we want.
2762 */
2763 tmp_start = start;
2764 tmp_offset = offset;
2765 if (entry->vme_start < start) {
2766 tmp_start -= start - entry->vme_start;
2767 tmp_offset -= start - entry->vme_start;
2768 }
2769 for (; entry->vme_start < end;
2770 entry = entry->vme_next) {
2771 /*
2772 * Check if the mapping's attributes
2773 * match the existing map entry.
2774 */
2775 if (entry == vm_map_to_entry(map) ||
2776 entry->vme_start != tmp_start ||
2777 entry->is_sub_map != is_submap ||
2778 VME_OFFSET(entry) != tmp_offset ||
2779 entry->needs_copy != needs_copy ||
2780 entry->protection != cur_protection ||
2781 entry->max_protection != max_protection ||
2782 entry->inheritance != inheritance ||
2783 entry->iokit_acct != iokit_acct ||
2784 VME_ALIAS(entry) != alias) {
2785 /* not the same mapping ! */
2786 RETURN(KERN_NO_SPACE);
2787 }
2788 /*
2789 * Check if the same object is being mapped.
2790 */
2791 if (is_submap) {
2792 if (VME_SUBMAP(entry) !=
2793 (vm_map_t) object) {
2794 /* not the same submap */
2795 RETURN(KERN_NO_SPACE);
2796 }
2797 } else {
2798 if (VME_OBJECT(entry) != object) {
2799 /* not the same VM object... */
2800 vm_object_t obj2;
2801
2802 obj2 = VME_OBJECT(entry);
2803 if ((obj2 == VM_OBJECT_NULL ||
2804 obj2->internal) &&
2805 (object == VM_OBJECT_NULL ||
2806 object->internal)) {
2807 /*
2808 * ... but both are
2809 * anonymous memory,
2810 * so equivalent.
2811 */
2812 } else {
2813 RETURN(KERN_NO_SPACE);
2814 }
2815 }
2816 }
2817
2818 tmp_offset += entry->vme_end - entry->vme_start;
2819 tmp_start += entry->vme_end - entry->vme_start;
2820 if (entry->vme_end >= end) {
2821 /* reached the end of our mapping */
2822 break;
2823 }
2824 }
2825 /* it all matches: let's use what's already there ! */
2826 RETURN(KERN_MEMORY_PRESENT);
2827 }
2828
2829 /*
2830 * ... the next region doesn't overlap the
2831 * end point.
2832 */
2833
2834 if ((entry->vme_next != vm_map_to_entry(map)) &&
2835 (entry->vme_next->vme_start < end)) {
2836 RETURN(KERN_NO_SPACE);
2837 }
2838 }
2839
2840 /*
2841 * At this point,
2842 * "start" and "end" should define the endpoints of the
2843 * available new range, and
2844 * "entry" should refer to the region before the new
2845 * range, and
2846 *
2847 * the map should be locked.
2848 */
2849
2850 /*
2851 * See whether we can avoid creating a new entry (and object) by
2852 * extending one of our neighbors. [So far, we only attempt to
2853 * extend from below.] Note that we can never extend/join
2854 * purgable objects because they need to remain distinct
2855 * entities in order to implement their "volatile object"
2856 * semantics.
2857 */
2858
2859 if (purgable ||
2860 entry_for_jit ||
2861 entry_for_tpro ||
2862 vm_memory_malloc_no_cow(user_alias)) {
2863 if (object == VM_OBJECT_NULL) {
2864 object = vm_object_allocate(size);
2865 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2866 object->true_share = FALSE;
2867 if (purgable) {
2868 task_t owner;
2869 object->purgable = VM_PURGABLE_NONVOLATILE;
2870 if (map->pmap == kernel_pmap) {
2871 /*
2872 * Purgeable mappings made in a kernel
2873 * map are "owned" by the kernel itself
2874 * rather than the current user task
2875 * because they're likely to be used by
2876 * more than this user task (see
2877 * execargs_purgeable_allocate(), for
2878 * example).
2879 */
2880 owner = kernel_task;
2881 } else {
2882 owner = current_task();
2883 }
2884 assert(object->vo_owner == NULL);
2885 assert(object->resident_page_count == 0);
2886 assert(object->wired_page_count == 0);
2887 vm_object_lock(object);
2888 vm_purgeable_nonvolatile_enqueue(object, owner);
2889 vm_object_unlock(object);
2890 }
2891 offset = (vm_object_offset_t)0;
2892 }
2893 } else if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
2894 /* no coalescing if address space uses sub-pages */
2895 } else if ((is_submap == FALSE) &&
2896 (object == VM_OBJECT_NULL) &&
2897 (entry != vm_map_to_entry(map)) &&
2898 (entry->vme_end == start) &&
2899 (!entry->is_shared) &&
2900 (!entry->is_sub_map) &&
2901 (!entry->in_transition) &&
2902 (!entry->needs_wakeup) &&
2903 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2904 (entry->protection == cur_protection) &&
2905 (entry->max_protection == max_protection) &&
2906 (entry->inheritance == inheritance) &&
2907 ((user_alias == VM_MEMORY_REALLOC) ||
2908 (VME_ALIAS(entry) == alias)) &&
2909 (entry->no_cache == no_cache) &&
2910 (entry->vme_permanent == permanent) &&
2911 /* no coalescing for immutable executable mappings */
2912 !((entry->protection & VM_PROT_EXECUTE) &&
2913 entry->vme_permanent) &&
2914 (!entry->superpage_size && !superpage_size) &&
2915 /*
2916 * No coalescing if not map-aligned, to avoid propagating
2917 * that condition any further than needed:
2918 */
2919 (!entry->map_aligned || !clear_map_aligned) &&
2920 (!entry->zero_wired_pages) &&
2921 (!entry->used_for_jit && !entry_for_jit) &&
2922 (!entry->pmap_cs_associated) &&
2923 (entry->iokit_acct == iokit_acct) &&
2924 (!entry->vme_resilient_codesign) &&
2925 (!entry->vme_resilient_media) &&
2926 (!entry->vme_atomic) &&
2927 (entry->vme_no_copy_on_read == no_copy_on_read) &&
2928
2929 ((entry->vme_end - entry->vme_start) + size <=
2930 (user_alias == VM_MEMORY_REALLOC ?
2931 ANON_CHUNK_SIZE :
2932 NO_COALESCE_LIMIT)) &&
2933
2934 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
2935 if (vm_object_coalesce(VME_OBJECT(entry),
2936 VM_OBJECT_NULL,
2937 VME_OFFSET(entry),
2938 (vm_object_offset_t) 0,
2939 (vm_map_size_t)(entry->vme_end - entry->vme_start),
2940 (vm_map_size_t)(end - entry->vme_end))) {
2941 /*
2942 * Coalesced the two objects - can extend
2943 * the previous map entry to include the
2944 * new range.
2945 */
2946 map->size += (end - entry->vme_end);
2947 assert(entry->vme_start < end);
2948 assert(VM_MAP_PAGE_ALIGNED(end,
2949 VM_MAP_PAGE_MASK(map)));
2950 if (__improbable(vm_debug_events)) {
2951 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
2952 }
2953 entry->vme_end = end;
2954 if (map->holelistenabled) {
2955 vm_map_store_update_first_free(map, entry, TRUE);
2956 } else {
2957 vm_map_store_update_first_free(map, map->first_free, TRUE);
2958 }
2959 new_mapping_established = TRUE;
2960 RETURN(KERN_SUCCESS);
2961 }
2962 }
2963
2964 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2965 new_entry = NULL;
2966
2967 if (vmk_flags.vmkf_submap_adjust) {
2968 vm_map_adjust_offsets((vm_map_t)caller_object, start, end);
2969 offset = start;
2970 }
2971
2972 for (tmp2_start = start; tmp2_start < end; tmp2_start += step) {
2973 tmp2_end = tmp2_start + step;
2974 /*
2975 * Create a new entry
2976 *
2977 * XXX FBDP
2978 * The reserved "page zero" in each process's address space can
2979 * be arbitrarily large. Splitting it into separate objects and
2980 * therefore different VM map entries serves no purpose and just
2981 * slows down operations on the VM map, so let's not split the
2982 * allocation into chunks if the max protection is NONE. That
2983 * memory should never be accessible, so it will never get to the
2984 * default pager.
2985 */
2986 tmp_start = tmp2_start;
2987 if (!is_submap &&
2988 object == VM_OBJECT_NULL &&
2989 size > chunk_size &&
2990 max_protection != VM_PROT_NONE &&
2991 superpage_size == 0) {
2992 tmp_end = tmp_start + chunk_size;
2993 } else {
2994 tmp_end = tmp2_end;
2995 }
2996 do {
2997 if (!is_submap &&
2998 object != VM_OBJECT_NULL &&
2999 object->internal &&
3000 offset + (tmp_end - tmp_start) > object->vo_size) {
3001 // printf("FBDP object %p size 0x%llx overmapping offset 0x%llx size 0x%llx\n", object, object->vo_size, offset, (uint64_t)(tmp_end - tmp_start));
3002 DTRACE_VM5(vm_map_enter_overmap,
3003 vm_map_t, map,
3004 vm_map_address_t, tmp_start,
3005 vm_map_address_t, tmp_end,
3006 vm_object_offset_t, offset,
3007 vm_object_size_t, object->vo_size);
3008 }
3009 new_entry = vm_map_entry_insert(map,
3010 entry, tmp_start, tmp_end,
3011 object, offset, vmk_flags,
3012 needs_copy,
3013 cur_protection, max_protection,
3014 (entry_for_jit && !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map) ?
3015 VM_INHERIT_NONE : inheritance),
3016 no_cache,
3017 permanent,
3018 superpage_size,
3019 clear_map_aligned,
3020 alias);
3021
3022 assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
3023
3024 if (resilient_codesign) {
3025 int reject_prot = (needs_copy ? VM_PROT_ALLEXEC : (VM_PROT_WRITE | VM_PROT_ALLEXEC));
3026 if (!((cur_protection | max_protection) & reject_prot)) {
3027 new_entry->vme_resilient_codesign = TRUE;
3028 }
3029 }
3030
3031 if (resilient_media &&
3032 (object == VM_OBJECT_NULL ||
3033 object->internal)) {
3034 new_entry->vme_resilient_media = TRUE;
3035 }
3036
3037 assert(!new_entry->iokit_acct);
3038 if (!is_submap &&
3039 object != VM_OBJECT_NULL &&
3040 (object->purgable != VM_PURGABLE_DENY ||
3041 object->vo_ledger_tag)) {
3042 assert(new_entry->use_pmap);
3043 assert(!new_entry->iokit_acct);
3044 /*
3045 * Turn off pmap accounting since
3046 * purgeable (or tagged) objects have their
3047 * own ledgers.
3048 */
3049 new_entry->use_pmap = FALSE;
3050 } else if (!is_submap &&
3051 iokit_acct &&
3052 object != VM_OBJECT_NULL &&
3053 object->internal) {
3054 /* alternate accounting */
3055 assert(!new_entry->iokit_acct);
3056 assert(new_entry->use_pmap);
3057 new_entry->iokit_acct = TRUE;
3058 new_entry->use_pmap = FALSE;
3059 DTRACE_VM4(
3060 vm_map_iokit_mapped_region,
3061 vm_map_t, map,
3062 vm_map_offset_t, new_entry->vme_start,
3063 vm_map_offset_t, new_entry->vme_end,
3064 int, VME_ALIAS(new_entry));
3065 vm_map_iokit_mapped_region(
3066 map,
3067 (new_entry->vme_end -
3068 new_entry->vme_start));
3069 } else if (!is_submap) {
3070 assert(!new_entry->iokit_acct);
3071 assert(new_entry->use_pmap);
3072 }
3073
3074 if (is_submap) {
3075 vm_map_t submap;
3076 boolean_t submap_is_64bit;
3077 boolean_t use_pmap;
3078
3079 assert(new_entry->is_sub_map);
3080 assert(!new_entry->use_pmap);
3081 assert(!new_entry->iokit_acct);
3082 submap = (vm_map_t) object;
3083 submap_is_64bit = vm_map_is_64bit(submap);
3084 use_pmap = vmk_flags.vmkf_nested_pmap;
3085 #ifndef NO_NESTED_PMAP
3086 if (use_pmap && submap->pmap == NULL) {
3087 ledger_t ledger = map->pmap->ledger;
3088 /* we need a sub pmap to nest... */
3089 submap->pmap = pmap_create_options(ledger, 0,
3090 submap_is_64bit ? PMAP_CREATE_64BIT : 0);
3091 if (submap->pmap == NULL) {
3092 /* let's proceed without nesting... */
3093 }
3094 #if defined(__arm64__)
3095 else {
3096 pmap_set_nested(submap->pmap);
3097 }
3098 #endif
3099 }
3100 if (use_pmap && submap->pmap != NULL) {
3101 if (VM_MAP_PAGE_SHIFT(map) != VM_MAP_PAGE_SHIFT(submap)) {
3102 DEBUG4K_ERROR("map %p (%d) submap %p (%d): incompatible page sizes\n", map, VM_MAP_PAGE_SHIFT(map), submap, VM_MAP_PAGE_SHIFT(submap));
3103 kr = KERN_FAILURE;
3104 } else {
3105 kr = pmap_nest(map->pmap,
3106 submap->pmap,
3107 tmp_start,
3108 tmp_end - tmp_start);
3109 }
3110 if (kr != KERN_SUCCESS) {
3111 printf("vm_map_enter: "
3112 "pmap_nest(0x%llx,0x%llx) "
3113 "error 0x%x\n",
3114 (long long)tmp_start,
3115 (long long)tmp_end,
3116 kr);
3117 } else {
3118 /* we're now nested ! */
3119 new_entry->use_pmap = TRUE;
3120 pmap_empty = FALSE;
3121 }
3122 }
3123 #endif /* NO_NESTED_PMAP */
3124 }
3125 entry = new_entry;
3126
3127 if (superpage_size) {
3128 vm_page_t pages, m;
3129 vm_object_t sp_object;
3130 vm_object_offset_t sp_offset;
3131
3132 VME_OFFSET_SET(entry, 0);
3133
3134 /* allocate one superpage */
3135 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES - 1, TRUE, 0);
3136 if (kr != KERN_SUCCESS) {
3137 /* deallocate whole range... */
3138 new_mapping_established = TRUE;
3139 /* ... but only up to "tmp_end" */
3140 size -= end - tmp_end;
3141 RETURN(kr);
3142 }
3143
3144 /* create one vm_object per superpage */
3145 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
3146 sp_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
3147 sp_object->phys_contiguous = TRUE;
3148 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages) * PAGE_SIZE;
3149 VME_OBJECT_SET(entry, sp_object, false, 0);
3150 assert(entry->use_pmap);
3151
3152 /* enter the base pages into the object */
3153 vm_object_lock(sp_object);
3154 for (sp_offset = 0;
3155 sp_offset < SUPERPAGE_SIZE;
3156 sp_offset += PAGE_SIZE) {
3157 m = pages;
3158 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
3159 pages = NEXT_PAGE(m);
3160 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3161 vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
3162 }
3163 vm_object_unlock(sp_object);
3164 }
3165 } while (tmp_end != tmp2_end &&
3166 (tmp_start = tmp_end) &&
3167 (tmp_end = (tmp2_end - tmp_end > chunk_size) ?
3168 tmp_end + chunk_size : tmp2_end));
3169 }
3170
3171 new_mapping_established = TRUE;
3172
3173 BailOut:
3174 assert(map_locked == TRUE);
3175
3176 /*
3177 * Address space limit enforcement (RLIMIT_AS and RLIMIT_DATA):
3178 * If we have identified and possibly established the new mapping(s),
3179 * make sure we did not go beyond the address space limit.
3180 */
3181 if (result == KERN_SUCCESS) {
3182 if (map->size_limit != RLIM_INFINITY &&
3183 map->size > map->size_limit) {
3184 /*
3185 * Establishing the requested mappings would exceed
3186 * the process's RLIMIT_AS limit: fail with
3187 * KERN_NO_SPACE.
3188 */
3189 result = KERN_NO_SPACE;
3190 printf("%d[%s] %s: map size 0x%llx over RLIMIT_AS 0x%llx\n",
3191 proc_selfpid(),
3192 (get_bsdtask_info(current_task())
3193 ? proc_name_address(get_bsdtask_info(current_task()))
3194 : "?"),
3195 __FUNCTION__,
3196 (uint64_t) map->size,
3197 (uint64_t) map->size_limit);
3198 DTRACE_VM2(vm_map_enter_RLIMIT_AS,
3199 vm_map_size_t, map->size,
3200 uint64_t, map->size_limit);
3201 vm_map_enter_RLIMIT_AS_count++;
3202 } else if (map->data_limit != RLIM_INFINITY &&
3203 map->size > map->data_limit) {
3204 /*
3205 * Establishing the requested mappings would exceed
3206 * the process's RLIMIT_DATA limit: fail with
3207 * KERN_NO_SPACE.
3208 */
3209 result = KERN_NO_SPACE;
3210 printf("%d[%s] %s: map size 0x%llx over RLIMIT_DATA 0x%llx\n",
3211 proc_selfpid(),
3212 (get_bsdtask_info(current_task())
3213 ? proc_name_address(get_bsdtask_info(current_task()))
3214 : "?"),
3215 __FUNCTION__,
3216 (uint64_t) map->size,
3217 (uint64_t) map->data_limit);
3218 DTRACE_VM2(vm_map_enter_RLIMIT_DATA,
3219 vm_map_size_t, map->size,
3220 uint64_t, map->data_limit);
3221 vm_map_enter_RLIMIT_DATA_count++;
3222 }
3223 }
3224
3225 if (result == KERN_SUCCESS) {
3226 vm_prot_t pager_prot;
3227 memory_object_t pager;
3228
3229 #if DEBUG
3230 if (pmap_empty &&
3231 !(vmk_flags.vmkf_no_pmap_check)) {
3232 assert(pmap_is_empty(map->pmap,
3233 *address,
3234 *address + size));
3235 }
3236 #endif /* DEBUG */
3237
3238 /*
3239 * For "named" VM objects, let the pager know that the
3240 * memory object is being mapped. Some pagers need to keep
3241 * track of this, to know when they can reclaim the memory
3242 * object, for example.
3243 * VM calls memory_object_map() for each mapping (specifying
3244 * the protection of each mapping) and calls
3245 * memory_object_last_unmap() when all the mappings are gone.
3246 */
3247 pager_prot = max_protection;
3248 if (needs_copy) {
3249 /*
3250 * Copy-On-Write mapping: won't modify
3251 * the memory object.
3252 */
3253 pager_prot &= ~VM_PROT_WRITE;
3254 }
3255 if (!is_submap &&
3256 object != VM_OBJECT_NULL &&
3257 object->named &&
3258 object->pager != MEMORY_OBJECT_NULL) {
3259 vm_object_lock(object);
3260 pager = object->pager;
3261 if (object->named &&
3262 pager != MEMORY_OBJECT_NULL) {
3263 assert(object->pager_ready);
3264 vm_object_mapping_wait(object, THREAD_UNINT);
3265 vm_object_mapping_begin(object);
3266 vm_object_unlock(object);
3267
3268 kr = memory_object_map(pager, pager_prot);
3269 assert(kr == KERN_SUCCESS);
3270
3271 vm_object_lock(object);
3272 vm_object_mapping_end(object);
3273 }
3274 vm_object_unlock(object);
3275 }
3276 }
3277
3278 assert(map_locked == TRUE);
3279
3280 if (new_mapping_established) {
3281 /*
3282 * If we release the map lock for any reason below,
3283 * another thread could deallocate our new mapping,
3284 * releasing the caller's reference on "caller_object",
3285 * which was transferred to the mapping.
3286 * If this was the only reference, the object could be
3287 * destroyed.
3288 *
3289 * We need to take an extra reference on "caller_object"
3290 * to keep it alive if we need to return the caller's
3291 * reference to the caller in case of failure.
3292 */
3293 if (is_submap) {
3294 vm_map_reference((vm_map_t)caller_object);
3295 } else {
3296 vm_object_reference(caller_object);
3297 }
3298 }
3299
3300 if (!keep_map_locked) {
3301 vm_map_unlock(map);
3302 map_locked = FALSE;
3303 entry = VM_MAP_ENTRY_NULL;
3304 new_entry = VM_MAP_ENTRY_NULL;
3305 }
3306
3307 /*
3308 * We can't hold the map lock if we enter this block.
3309 */
3310
3311 if (result == KERN_SUCCESS) {
3312 /* Wire down the new entry if the user
3313 * requested all new map entries be wired.
3314 */
3315 if ((map->wiring_required) || (superpage_size)) {
3316 assert(!keep_map_locked);
3317 pmap_empty = FALSE; /* pmap won't be empty */
3318 kr = vm_map_wire_kernel(map, start, end,
3319 cur_protection, VM_KERN_MEMORY_MLOCK,
3320 TRUE);
3321 result = kr;
3322 }
3323
3324 }
3325
3326 if (result != KERN_SUCCESS) {
3327 if (new_mapping_established) {
3328 vmr_flags_t remove_flags = VM_MAP_REMOVE_NO_FLAGS;
3329
3330 /*
3331 * We have to get rid of the new mappings since we
3332 * won't make them available to the user.
3333 * Try and do that atomically, to minimize the risk
3334 * that someone else create new mappings that range.
3335 */
3336 if (!map_locked) {
3337 vm_map_lock(map);
3338 map_locked = TRUE;
3339 }
3340 remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
3341 remove_flags |= VM_MAP_REMOVE_NO_YIELD;
3342 if (permanent) {
3343 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
3344 }
3345 (void) vm_map_delete(map,
3346 *address, *address + size,
3347 remove_flags,
3348 KMEM_GUARD_NONE, &zap_new_list);
3349 }
3350
3351 if (vm_map_zap_first_entry(&zap_old_list)) {
3352 vm_map_entry_t entry1, entry2;
3353
3354 /*
3355 * The new mapping failed. Attempt to restore
3356 * the old mappings, saved in the "zap_old_map".
3357 */
3358 if (!map_locked) {
3359 vm_map_lock(map);
3360 map_locked = TRUE;
3361 }
3362
3363 /* first check if the coast is still clear */
3364 start = vm_map_zap_first_entry(&zap_old_list)->vme_start;
3365 end = vm_map_zap_last_entry(&zap_old_list)->vme_end;
3366
3367 if (vm_map_lookup_entry(map, start, &entry1) ||
3368 vm_map_lookup_entry(map, end, &entry2) ||
3369 entry1 != entry2) {
3370 /*
3371 * Part of that range has already been
3372 * re-mapped: we can't restore the old
3373 * mappings...
3374 */
3375 vm_map_enter_restore_failures++;
3376 } else {
3377 /*
3378 * Transfer the saved map entries from
3379 * "zap_old_map" to the original "map",
3380 * inserting them all after "entry1".
3381 */
3382 while ((entry2 = vm_map_zap_pop(&zap_old_list))) {
3383 vm_map_size_t entry_size;
3384
3385 entry_size = (entry2->vme_end -
3386 entry2->vme_start);
3387 vm_map_store_entry_link(map, entry1, entry2,
3388 VM_MAP_KERNEL_FLAGS_NONE);
3389 map->size += entry_size;
3390 entry1 = entry2;
3391 }
3392 if (map->wiring_required) {
3393 /*
3394 * XXX TODO: we should rewire the
3395 * old pages here...
3396 */
3397 }
3398 vm_map_enter_restore_successes++;
3399 }
3400 }
3401 }
3402
3403 /*
3404 * The caller is responsible for releasing the lock if it requested to
3405 * keep the map locked.
3406 */
3407 if (map_locked && !keep_map_locked) {
3408 vm_map_unlock(map);
3409 }
3410
3411 vm_map_zap_dispose(&zap_old_list);
3412 vm_map_zap_dispose(&zap_new_list);
3413
3414 if (new_mapping_established) {
3415 /*
3416 * The caller had a reference on "caller_object" and we
3417 * transferred that reference to the mapping.
3418 * We also took an extra reference on "caller_object" to keep
3419 * it alive while the map was unlocked.
3420 */
3421 if (result == KERN_SUCCESS) {
3422 /*
3423 * On success, the caller's reference on the object gets
3424 * tranferred to the mapping.
3425 * Release our extra reference.
3426 */
3427 if (is_submap) {
3428 vm_map_deallocate((vm_map_t)caller_object);
3429 } else {
3430 vm_object_deallocate(caller_object);
3431 }
3432 } else {
3433 /*
3434 * On error, the caller expects to still have a
3435 * reference on the object it gave us.
3436 * Let's use our extra reference for that.
3437 */
3438 }
3439 }
3440
3441 return result;
3442
3443 #undef RETURN
3444 }
3445
3446 #if __arm64__
3447 extern const struct memory_object_pager_ops fourk_pager_ops;
3448 kern_return_t
vm_map_enter_fourk(vm_map_t map,vm_map_offset_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t alias,vm_object_t object,vm_object_offset_t offset,boolean_t needs_copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)3449 vm_map_enter_fourk(
3450 vm_map_t map,
3451 vm_map_offset_t *address, /* IN/OUT */
3452 vm_map_size_t size,
3453 vm_map_offset_t mask,
3454 int flags,
3455 vm_map_kernel_flags_t vmk_flags,
3456 vm_tag_t alias,
3457 vm_object_t object,
3458 vm_object_offset_t offset,
3459 boolean_t needs_copy,
3460 vm_prot_t cur_protection,
3461 vm_prot_t max_protection,
3462 vm_inherit_t inheritance)
3463 {
3464 vm_map_entry_t entry, new_entry;
3465 vm_map_offset_t start, fourk_start;
3466 vm_map_offset_t end, fourk_end;
3467 vm_map_size_t fourk_size;
3468 kern_return_t result = KERN_SUCCESS;
3469 boolean_t map_locked = FALSE;
3470 boolean_t pmap_empty = TRUE;
3471 boolean_t new_mapping_established = FALSE;
3472 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3473 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
3474 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
3475 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
3476 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
3477 const boolean_t is_submap = vmk_flags.vmkf_submap;
3478 boolean_t permanent = (((flags & VM_FLAGS_PERMANENT) != 0) || vmk_flags.vmkf_permanent);
3479 const boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
3480 // boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
3481 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3482 vm_map_offset_t effective_min_offset, effective_max_offset;
3483 kern_return_t kr;
3484 boolean_t clear_map_aligned = FALSE;
3485 memory_object_t fourk_mem_obj;
3486 vm_object_t fourk_object;
3487 vm_map_offset_t fourk_pager_offset;
3488 int fourk_pager_index_start, fourk_pager_index_num;
3489 int cur_idx;
3490 boolean_t fourk_copy;
3491 vm_object_t copy_object;
3492 vm_object_offset_t copy_offset;
3493 VM_MAP_ZAP_DECLARE(zap_list);
3494
3495 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
3496 panic("%s:%d", __FUNCTION__, __LINE__);
3497 }
3498 fourk_mem_obj = MEMORY_OBJECT_NULL;
3499 fourk_object = VM_OBJECT_NULL;
3500
3501 if (superpage_size) {
3502 return KERN_NOT_SUPPORTED;
3503 }
3504
3505 if ((cur_protection & VM_PROT_WRITE) &&
3506 (cur_protection & VM_PROT_EXECUTE) &&
3507 #if XNU_TARGET_OS_OSX
3508 map->pmap != kernel_pmap &&
3509 (vm_map_cs_enforcement(map)
3510 #if __arm64__
3511 || !VM_MAP_IS_EXOTIC(map)
3512 #endif /* __arm64__ */
3513 ) &&
3514 #endif /* XNU_TARGET_OS_OSX */
3515 !entry_for_jit) {
3516 DTRACE_VM3(cs_wx,
3517 uint64_t, 0,
3518 uint64_t, 0,
3519 vm_prot_t, cur_protection);
3520 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
3521 "turning off execute\n",
3522 proc_selfpid(),
3523 (get_bsdtask_info(current_task())
3524 ? proc_name_address(get_bsdtask_info(current_task()))
3525 : "?"),
3526 __FUNCTION__);
3527 cur_protection &= ~VM_PROT_EXECUTE;
3528 }
3529
3530 /*
3531 * If the task has requested executable lockdown,
3532 * deny any new executable mapping.
3533 */
3534 if (map->map_disallow_new_exec == TRUE) {
3535 if (cur_protection & VM_PROT_EXECUTE) {
3536 return KERN_PROTECTION_FAILURE;
3537 }
3538 }
3539
3540 if (is_submap) {
3541 return KERN_NOT_SUPPORTED;
3542 }
3543 if (vmk_flags.vmkf_already) {
3544 return KERN_NOT_SUPPORTED;
3545 }
3546 if (purgable || entry_for_jit) {
3547 return KERN_NOT_SUPPORTED;
3548 }
3549
3550 effective_min_offset = map->min_offset;
3551
3552 if (vmk_flags.vmkf_beyond_max) {
3553 return KERN_NOT_SUPPORTED;
3554 } else {
3555 effective_max_offset = map->max_offset;
3556 }
3557
3558 if (size == 0 ||
3559 (offset & FOURK_PAGE_MASK) != 0) {
3560 *address = 0;
3561 return KERN_INVALID_ARGUMENT;
3562 }
3563
3564 #define RETURN(value) { result = value; goto BailOut; }
3565
3566 assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3567 assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3568
3569 if (!anywhere && overwrite) {
3570 return KERN_NOT_SUPPORTED;
3571 }
3572
3573 fourk_start = *address;
3574 fourk_size = size;
3575 fourk_end = fourk_start + fourk_size;
3576
3577 start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3578 end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3579 size = end - start;
3580
3581 if (anywhere) {
3582 return KERN_NOT_SUPPORTED;
3583 } else {
3584 /*
3585 * Verify that:
3586 * the address doesn't itself violate
3587 * the mask requirement.
3588 */
3589
3590 vm_map_lock(map);
3591 map_locked = TRUE;
3592 if ((start & mask) != 0) {
3593 RETURN(KERN_NO_SPACE);
3594 }
3595
3596 /*
3597 * ... the address is within bounds
3598 */
3599
3600 end = start + size;
3601
3602 if ((start < effective_min_offset) ||
3603 (end > effective_max_offset) ||
3604 (start >= end)) {
3605 RETURN(KERN_INVALID_ADDRESS);
3606 }
3607
3608 /*
3609 * ... the starting address isn't allocated
3610 */
3611 if (vm_map_lookup_entry(map, start, &entry)) {
3612 vm_object_t cur_object, shadow_object;
3613
3614 /*
3615 * We might already some 4K mappings
3616 * in a 16K page here.
3617 */
3618
3619 if (entry->vme_end - entry->vme_start
3620 != SIXTEENK_PAGE_SIZE) {
3621 RETURN(KERN_NO_SPACE);
3622 }
3623 if (entry->is_sub_map) {
3624 RETURN(KERN_NO_SPACE);
3625 }
3626 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3627 RETURN(KERN_NO_SPACE);
3628 }
3629
3630 /* go all the way down the shadow chain */
3631 cur_object = VME_OBJECT(entry);
3632 vm_object_lock(cur_object);
3633 while (cur_object->shadow != VM_OBJECT_NULL) {
3634 shadow_object = cur_object->shadow;
3635 vm_object_lock(shadow_object);
3636 vm_object_unlock(cur_object);
3637 cur_object = shadow_object;
3638 shadow_object = VM_OBJECT_NULL;
3639 }
3640 if (cur_object->internal ||
3641 cur_object->pager == NULL) {
3642 vm_object_unlock(cur_object);
3643 RETURN(KERN_NO_SPACE);
3644 }
3645 if (cur_object->pager->mo_pager_ops
3646 != &fourk_pager_ops) {
3647 vm_object_unlock(cur_object);
3648 RETURN(KERN_NO_SPACE);
3649 }
3650 fourk_object = cur_object;
3651 fourk_mem_obj = fourk_object->pager;
3652
3653 /* keep the "4K" object alive */
3654 vm_object_reference_locked(fourk_object);
3655 memory_object_reference(fourk_mem_obj);
3656 vm_object_unlock(fourk_object);
3657
3658 /* merge permissions */
3659 entry->protection |= cur_protection;
3660 entry->max_protection |= max_protection;
3661
3662 if ((entry->protection & VM_PROT_WRITE) &&
3663 (entry->protection & VM_PROT_ALLEXEC) &&
3664 fourk_binary_compatibility_unsafe &&
3665 fourk_binary_compatibility_allow_wx) {
3666 /* write+execute: need to be "jit" */
3667 entry->used_for_jit = TRUE;
3668 }
3669 goto map_in_fourk_pager;
3670 }
3671
3672 /*
3673 * ... the next region doesn't overlap the
3674 * end point.
3675 */
3676
3677 if ((entry->vme_next != vm_map_to_entry(map)) &&
3678 (entry->vme_next->vme_start < end)) {
3679 RETURN(KERN_NO_SPACE);
3680 }
3681 }
3682
3683 /*
3684 * At this point,
3685 * "start" and "end" should define the endpoints of the
3686 * available new range, and
3687 * "entry" should refer to the region before the new
3688 * range, and
3689 *
3690 * the map should be locked.
3691 */
3692
3693 /* create a new "4K" pager */
3694 fourk_mem_obj = fourk_pager_create();
3695 fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3696 assert(fourk_object);
3697
3698 /* keep the "4" object alive */
3699 vm_object_reference(fourk_object);
3700
3701 /* create a "copy" object, to map the "4K" object copy-on-write */
3702 fourk_copy = TRUE;
3703 result = vm_object_copy_strategically(fourk_object,
3704 0,
3705 end - start,
3706 ©_object,
3707 ©_offset,
3708 &fourk_copy);
3709 assert(result == KERN_SUCCESS);
3710 assert(copy_object != VM_OBJECT_NULL);
3711 assert(copy_offset == 0);
3712
3713 /* map the "4K" pager's copy object */
3714 new_entry = vm_map_entry_insert(map,
3715 entry,
3716 vm_map_trunc_page(start, VM_MAP_PAGE_MASK(map)),
3717 vm_map_round_page(end, VM_MAP_PAGE_MASK(map)),
3718 copy_object,
3719 0, /* offset */
3720 vmk_flags,
3721 FALSE, /* needs_copy */
3722 cur_protection, max_protection,
3723 (entry_for_jit && !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map) ?
3724 VM_INHERIT_NONE : inheritance),
3725 no_cache,
3726 permanent,
3727 superpage_size,
3728 clear_map_aligned,
3729 alias);
3730 entry = new_entry;
3731
3732 #if VM_MAP_DEBUG_FOURK
3733 if (vm_map_debug_fourk) {
3734 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3735 map,
3736 (uint64_t) entry->vme_start,
3737 (uint64_t) entry->vme_end,
3738 fourk_mem_obj);
3739 }
3740 #endif /* VM_MAP_DEBUG_FOURK */
3741
3742 new_mapping_established = TRUE;
3743
3744 map_in_fourk_pager:
3745 /* "map" the original "object" where it belongs in the "4K" pager */
3746 fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3747 fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3748 if (fourk_size > SIXTEENK_PAGE_SIZE) {
3749 fourk_pager_index_num = 4;
3750 } else {
3751 fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3752 }
3753 if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3754 fourk_pager_index_num = 4 - fourk_pager_index_start;
3755 }
3756 for (cur_idx = 0;
3757 cur_idx < fourk_pager_index_num;
3758 cur_idx++) {
3759 vm_object_t old_object;
3760 vm_object_offset_t old_offset;
3761
3762 kr = fourk_pager_populate(fourk_mem_obj,
3763 TRUE, /* overwrite */
3764 fourk_pager_index_start + cur_idx,
3765 object,
3766 (object
3767 ? (offset +
3768 (cur_idx * FOURK_PAGE_SIZE))
3769 : 0),
3770 &old_object,
3771 &old_offset);
3772 #if VM_MAP_DEBUG_FOURK
3773 if (vm_map_debug_fourk) {
3774 if (old_object == (vm_object_t) -1 &&
3775 old_offset == (vm_object_offset_t) -1) {
3776 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3777 "pager [%p:0x%llx] "
3778 "populate[%d] "
3779 "[object:%p,offset:0x%llx]\n",
3780 map,
3781 (uint64_t) entry->vme_start,
3782 (uint64_t) entry->vme_end,
3783 fourk_mem_obj,
3784 VME_OFFSET(entry),
3785 fourk_pager_index_start + cur_idx,
3786 object,
3787 (object
3788 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3789 : 0));
3790 } else {
3791 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3792 "pager [%p:0x%llx] "
3793 "populate[%d] [object:%p,offset:0x%llx] "
3794 "old [%p:0x%llx]\n",
3795 map,
3796 (uint64_t) entry->vme_start,
3797 (uint64_t) entry->vme_end,
3798 fourk_mem_obj,
3799 VME_OFFSET(entry),
3800 fourk_pager_index_start + cur_idx,
3801 object,
3802 (object
3803 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3804 : 0),
3805 old_object,
3806 old_offset);
3807 }
3808 }
3809 #endif /* VM_MAP_DEBUG_FOURK */
3810
3811 assert(kr == KERN_SUCCESS);
3812 if (object != old_object &&
3813 object != VM_OBJECT_NULL &&
3814 object != (vm_object_t) -1) {
3815 vm_object_reference(object);
3816 }
3817 if (object != old_object &&
3818 old_object != VM_OBJECT_NULL &&
3819 old_object != (vm_object_t) -1) {
3820 vm_object_deallocate(old_object);
3821 }
3822 }
3823
3824 BailOut:
3825 assert(map_locked == TRUE);
3826
3827 if (result == KERN_SUCCESS) {
3828 vm_prot_t pager_prot;
3829 memory_object_t pager;
3830
3831 #if DEBUG
3832 if (pmap_empty &&
3833 !(vmk_flags.vmkf_no_pmap_check)) {
3834 assert(pmap_is_empty(map->pmap,
3835 *address,
3836 *address + size));
3837 }
3838 #endif /* DEBUG */
3839
3840 /*
3841 * For "named" VM objects, let the pager know that the
3842 * memory object is being mapped. Some pagers need to keep
3843 * track of this, to know when they can reclaim the memory
3844 * object, for example.
3845 * VM calls memory_object_map() for each mapping (specifying
3846 * the protection of each mapping) and calls
3847 * memory_object_last_unmap() when all the mappings are gone.
3848 */
3849 pager_prot = max_protection;
3850 if (needs_copy) {
3851 /*
3852 * Copy-On-Write mapping: won't modify
3853 * the memory object.
3854 */
3855 pager_prot &= ~VM_PROT_WRITE;
3856 }
3857 if (!is_submap &&
3858 object != VM_OBJECT_NULL &&
3859 object->named &&
3860 object->pager != MEMORY_OBJECT_NULL) {
3861 vm_object_lock(object);
3862 pager = object->pager;
3863 if (object->named &&
3864 pager != MEMORY_OBJECT_NULL) {
3865 assert(object->pager_ready);
3866 vm_object_mapping_wait(object, THREAD_UNINT);
3867 vm_object_mapping_begin(object);
3868 vm_object_unlock(object);
3869
3870 kr = memory_object_map(pager, pager_prot);
3871 assert(kr == KERN_SUCCESS);
3872
3873 vm_object_lock(object);
3874 vm_object_mapping_end(object);
3875 }
3876 vm_object_unlock(object);
3877 }
3878 if (!is_submap &&
3879 fourk_object != VM_OBJECT_NULL &&
3880 fourk_object->named &&
3881 fourk_object->pager != MEMORY_OBJECT_NULL) {
3882 vm_object_lock(fourk_object);
3883 pager = fourk_object->pager;
3884 if (fourk_object->named &&
3885 pager != MEMORY_OBJECT_NULL) {
3886 assert(fourk_object->pager_ready);
3887 vm_object_mapping_wait(fourk_object,
3888 THREAD_UNINT);
3889 vm_object_mapping_begin(fourk_object);
3890 vm_object_unlock(fourk_object);
3891
3892 kr = memory_object_map(pager, VM_PROT_READ);
3893 assert(kr == KERN_SUCCESS);
3894
3895 vm_object_lock(fourk_object);
3896 vm_object_mapping_end(fourk_object);
3897 }
3898 vm_object_unlock(fourk_object);
3899 }
3900 }
3901
3902 if (fourk_object != VM_OBJECT_NULL) {
3903 vm_object_deallocate(fourk_object);
3904 fourk_object = VM_OBJECT_NULL;
3905 memory_object_deallocate(fourk_mem_obj);
3906 fourk_mem_obj = MEMORY_OBJECT_NULL;
3907 }
3908
3909 assert(map_locked == TRUE);
3910
3911 if (!keep_map_locked) {
3912 vm_map_unlock(map);
3913 map_locked = FALSE;
3914 }
3915
3916 /*
3917 * We can't hold the map lock if we enter this block.
3918 */
3919
3920 if (result == KERN_SUCCESS) {
3921 /* Wire down the new entry if the user
3922 * requested all new map entries be wired.
3923 */
3924 if ((map->wiring_required) || (superpage_size)) {
3925 assert(!keep_map_locked);
3926 pmap_empty = FALSE; /* pmap won't be empty */
3927 kr = vm_map_wire_kernel(map, start, end,
3928 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3929 TRUE);
3930 result = kr;
3931 }
3932
3933 }
3934
3935 if (result != KERN_SUCCESS) {
3936 if (new_mapping_established) {
3937 /*
3938 * We have to get rid of the new mappings since we
3939 * won't make them available to the user.
3940 * Try and do that atomically, to minimize the risk
3941 * that someone else create new mappings that range.
3942 */
3943
3944 if (!map_locked) {
3945 vm_map_lock(map);
3946 map_locked = TRUE;
3947 }
3948 (void)vm_map_delete(map, *address, *address + size,
3949 VM_MAP_REMOVE_NO_MAP_ALIGN | VM_MAP_REMOVE_NO_YIELD,
3950 KMEM_GUARD_NONE, &zap_list);
3951 }
3952 }
3953
3954 /*
3955 * The caller is responsible for releasing the lock if it requested to
3956 * keep the map locked.
3957 */
3958 if (map_locked && !keep_map_locked) {
3959 vm_map_unlock(map);
3960 }
3961
3962 vm_map_zap_dispose(&zap_list);
3963
3964 return result;
3965
3966 #undef RETURN
3967 }
3968 #endif /* __arm64__ */
3969
3970 /*
3971 * Counters for the prefault optimization.
3972 */
3973 int64_t vm_prefault_nb_pages = 0;
3974 int64_t vm_prefault_nb_bailout = 0;
3975
3976 static kern_return_t
vm_map_enter_mem_object_helper(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,ipc_port_t port,vm_object_offset_t offset,boolean_t copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance,upl_page_list_ptr_t page_list,unsigned int page_list_count)3977 vm_map_enter_mem_object_helper(
3978 vm_map_t target_map,
3979 vm_map_offset_t *address,
3980 vm_map_size_t initial_size,
3981 vm_map_offset_t mask,
3982 int flags,
3983 vm_map_kernel_flags_t vmk_flags,
3984 vm_tag_t tag,
3985 ipc_port_t port,
3986 vm_object_offset_t offset,
3987 boolean_t copy,
3988 vm_prot_t cur_protection,
3989 vm_prot_t max_protection,
3990 vm_inherit_t inheritance,
3991 upl_page_list_ptr_t page_list,
3992 unsigned int page_list_count)
3993 {
3994 vm_map_address_t map_addr;
3995 vm_map_size_t map_size;
3996 vm_object_t object;
3997 vm_object_size_t size;
3998 kern_return_t result;
3999 boolean_t mask_cur_protection, mask_max_protection;
4000 boolean_t kernel_prefault, try_prefault = (page_list_count != 0);
4001 vm_map_offset_t offset_in_mapping = 0;
4002 #if __arm64__
4003 boolean_t fourk = vmk_flags.vmkf_fourk;
4004 #endif /* __arm64__ */
4005
4006 if (VM_MAP_PAGE_SHIFT(target_map) < PAGE_SHIFT) {
4007 /* XXX TODO4K prefaulting depends on page size... */
4008 try_prefault = FALSE;
4009 }
4010
4011 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
4012
4013 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
4014 mask_max_protection = max_protection & VM_PROT_IS_MASK;
4015 cur_protection &= ~VM_PROT_IS_MASK;
4016 max_protection &= ~VM_PROT_IS_MASK;
4017
4018 /*
4019 * Check arguments for validity
4020 */
4021 if ((target_map == VM_MAP_NULL) ||
4022 (cur_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4023 (max_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4024 (inheritance > VM_INHERIT_LAST_VALID) ||
4025 (try_prefault && (copy || !page_list)) ||
4026 initial_size == 0) {
4027 return KERN_INVALID_ARGUMENT;
4028 }
4029
4030 #if __arm64__
4031 if (cur_protection & VM_PROT_EXECUTE) {
4032 cur_protection |= VM_PROT_READ;
4033 }
4034
4035 if (fourk && VM_MAP_PAGE_SHIFT(target_map) < PAGE_SHIFT) {
4036 /* no "fourk" if map is using a sub-page page size */
4037 fourk = FALSE;
4038 }
4039 if (fourk) {
4040 map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
4041 map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
4042 } else
4043 #endif /* __arm64__ */
4044 {
4045 map_addr = vm_map_trunc_page(*address,
4046 VM_MAP_PAGE_MASK(target_map));
4047 map_size = vm_map_round_page(initial_size,
4048 VM_MAP_PAGE_MASK(target_map));
4049 }
4050 size = vm_object_round_page(initial_size);
4051
4052 /*
4053 * Find the vm object (if any) corresponding to this port.
4054 */
4055 if (!IP_VALID(port)) {
4056 object = VM_OBJECT_NULL;
4057 offset = 0;
4058 copy = FALSE;
4059 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
4060 vm_named_entry_t named_entry;
4061 vm_object_offset_t data_offset;
4062
4063 named_entry = mach_memory_entry_from_port(port);
4064
4065 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4066 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4067 data_offset = named_entry->data_offset;
4068 offset += named_entry->data_offset;
4069 } else {
4070 data_offset = 0;
4071 }
4072
4073 /* a few checks to make sure user is obeying rules */
4074 if (size == 0) {
4075 if (offset >= named_entry->size) {
4076 return KERN_INVALID_RIGHT;
4077 }
4078 size = named_entry->size - offset;
4079 }
4080 if (mask_max_protection) {
4081 max_protection &= named_entry->protection;
4082 }
4083 if (mask_cur_protection) {
4084 cur_protection &= named_entry->protection;
4085 }
4086 if ((named_entry->protection & max_protection) !=
4087 max_protection) {
4088 return KERN_INVALID_RIGHT;
4089 }
4090 if ((named_entry->protection & cur_protection) !=
4091 cur_protection) {
4092 return KERN_INVALID_RIGHT;
4093 }
4094 if (offset + size < offset) {
4095 /* overflow */
4096 return KERN_INVALID_ARGUMENT;
4097 }
4098 if (named_entry->size < (offset + initial_size)) {
4099 return KERN_INVALID_ARGUMENT;
4100 }
4101
4102 if (named_entry->is_copy) {
4103 /* for a vm_map_copy, we can only map it whole */
4104 if ((size != named_entry->size) &&
4105 (vm_map_round_page(size,
4106 VM_MAP_PAGE_MASK(target_map)) ==
4107 named_entry->size)) {
4108 /* XXX FBDP use the rounded size... */
4109 size = vm_map_round_page(
4110 size,
4111 VM_MAP_PAGE_MASK(target_map));
4112 }
4113 }
4114
4115 /* the callers parameter offset is defined to be the */
4116 /* offset from beginning of named entry offset in object */
4117 offset = offset + named_entry->offset;
4118
4119 if (!VM_MAP_PAGE_ALIGNED(size,
4120 VM_MAP_PAGE_MASK(target_map))) {
4121 /*
4122 * Let's not map more than requested;
4123 * vm_map_enter() will handle this "not map-aligned"
4124 * case.
4125 */
4126 map_size = size;
4127 }
4128
4129 named_entry_lock(named_entry);
4130 if (named_entry->is_sub_map) {
4131 vm_map_t submap;
4132
4133 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4134 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4135 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
4136 }
4137
4138 submap = named_entry->backing.map;
4139 vm_map_reference(submap);
4140 named_entry_unlock(named_entry);
4141
4142 vmk_flags.vmkf_submap = TRUE;
4143
4144 result = vm_map_enter(target_map,
4145 &map_addr,
4146 map_size,
4147 mask,
4148 flags,
4149 vmk_flags,
4150 tag,
4151 (vm_object_t)(uintptr_t) submap,
4152 offset,
4153 copy,
4154 cur_protection,
4155 max_protection,
4156 inheritance);
4157 if (result != KERN_SUCCESS) {
4158 vm_map_deallocate(submap);
4159 } else {
4160 /*
4161 * No need to lock "submap" just to check its
4162 * "mapped" flag: that flag is never reset
4163 * once it's been set and if we race, we'll
4164 * just end up setting it twice, which is OK.
4165 */
4166 if (submap->mapped_in_other_pmaps == FALSE &&
4167 vm_map_pmap(submap) != PMAP_NULL &&
4168 vm_map_pmap(submap) !=
4169 vm_map_pmap(target_map)) {
4170 /*
4171 * This submap is being mapped in a map
4172 * that uses a different pmap.
4173 * Set its "mapped_in_other_pmaps" flag
4174 * to indicate that we now need to
4175 * remove mappings from all pmaps rather
4176 * than just the submap's pmap.
4177 */
4178 vm_map_lock(submap);
4179 submap->mapped_in_other_pmaps = TRUE;
4180 vm_map_unlock(submap);
4181 }
4182 *address = map_addr;
4183 }
4184 return result;
4185 } else if (named_entry->is_copy) {
4186 kern_return_t kr;
4187 vm_map_copy_t copy_map;
4188 vm_map_entry_t copy_entry;
4189 vm_map_offset_t copy_addr;
4190 vm_map_copy_t target_copy_map;
4191 vm_map_offset_t overmap_start, overmap_end;
4192 vm_map_offset_t trimmed_start;
4193 vm_map_size_t target_size;
4194
4195 if (flags & ~(VM_FLAGS_FIXED |
4196 VM_FLAGS_ANYWHERE |
4197 VM_FLAGS_OVERWRITE |
4198 VM_FLAGS_RETURN_4K_DATA_ADDR |
4199 VM_FLAGS_RETURN_DATA_ADDR |
4200 VM_FLAGS_ALIAS_MASK)) {
4201 named_entry_unlock(named_entry);
4202 return KERN_INVALID_ARGUMENT;
4203 }
4204
4205 copy_map = named_entry->backing.copy;
4206 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
4207 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
4208 /* unsupported type; should not happen */
4209 printf("vm_map_enter_mem_object: "
4210 "memory_entry->backing.copy "
4211 "unsupported type 0x%x\n",
4212 copy_map->type);
4213 named_entry_unlock(named_entry);
4214 return KERN_INVALID_ARGUMENT;
4215 }
4216
4217 if (VM_MAP_PAGE_SHIFT(target_map) != copy_map->cpy_hdr.page_shift) {
4218 DEBUG4K_SHARE("copy_map %p offset %llx size 0x%llx pgshift %d -> target_map %p pgshift %d\n", copy_map, offset, (uint64_t)map_size, copy_map->cpy_hdr.page_shift, target_map, VM_MAP_PAGE_SHIFT(target_map));
4219 }
4220
4221 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4222 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4223 offset_in_mapping = offset & VM_MAP_PAGE_MASK(target_map);
4224 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4225 offset_in_mapping &= ~((signed)(0xFFF));
4226 }
4227 }
4228
4229 target_copy_map = VM_MAP_COPY_NULL;
4230 target_size = copy_map->size;
4231 overmap_start = 0;
4232 overmap_end = 0;
4233 trimmed_start = 0;
4234 if (copy_map->cpy_hdr.page_shift != VM_MAP_PAGE_SHIFT(target_map)) {
4235 DEBUG4K_ADJUST("adjusting...\n");
4236 kr = vm_map_copy_adjust_to_target(
4237 copy_map,
4238 offset /* includes data_offset */,
4239 initial_size,
4240 target_map,
4241 copy,
4242 &target_copy_map,
4243 &overmap_start,
4244 &overmap_end,
4245 &trimmed_start);
4246 if (kr != KERN_SUCCESS) {
4247 named_entry_unlock(named_entry);
4248 return kr;
4249 }
4250 target_size = target_copy_map->size;
4251 if (trimmed_start >= data_offset) {
4252 data_offset = offset & VM_MAP_PAGE_MASK(target_map);
4253 } else {
4254 data_offset -= trimmed_start;
4255 }
4256 } else {
4257 /*
4258 * Assert that the vm_map_copy is coming from the right
4259 * zone and hasn't been forged
4260 */
4261 vm_map_copy_require(copy_map);
4262 target_copy_map = copy_map;
4263 }
4264
4265 /* reserve a contiguous range */
4266 kr = vm_map_enter(target_map,
4267 &map_addr,
4268 vm_map_round_page(target_size, VM_MAP_PAGE_MASK(target_map)),
4269 mask,
4270 flags & (VM_FLAGS_ANYWHERE |
4271 VM_FLAGS_OVERWRITE |
4272 VM_FLAGS_RETURN_4K_DATA_ADDR |
4273 VM_FLAGS_RETURN_DATA_ADDR),
4274 vmk_flags,
4275 tag,
4276 VM_OBJECT_NULL,
4277 0,
4278 FALSE, /* copy */
4279 cur_protection,
4280 max_protection,
4281 inheritance);
4282 if (kr != KERN_SUCCESS) {
4283 DEBUG4K_ERROR("kr 0x%x\n", kr);
4284 if (target_copy_map != copy_map) {
4285 vm_map_copy_discard(target_copy_map);
4286 target_copy_map = VM_MAP_COPY_NULL;
4287 }
4288 named_entry_unlock(named_entry);
4289 return kr;
4290 }
4291
4292 copy_addr = map_addr;
4293
4294 for (copy_entry = vm_map_copy_first_entry(target_copy_map);
4295 copy_entry != vm_map_copy_to_entry(target_copy_map);
4296 copy_entry = copy_entry->vme_next) {
4297 int remap_flags;
4298 vm_map_kernel_flags_t vmk_remap_flags;
4299 vm_map_t copy_submap = VM_MAP_NULL;
4300 vm_object_t copy_object = VM_OBJECT_NULL;
4301 vm_map_size_t copy_size;
4302 vm_object_offset_t copy_offset;
4303 int copy_vm_alias;
4304 boolean_t do_copy;
4305
4306 do_copy = FALSE;
4307 remap_flags = 0;
4308 vmk_remap_flags = VM_MAP_KERNEL_FLAGS_NONE;
4309
4310 if (copy_entry->is_sub_map) {
4311 copy_submap = VME_SUBMAP(copy_entry);
4312 copy_object = (vm_object_t)copy_submap;
4313 } else {
4314 copy_object = VME_OBJECT(copy_entry);
4315 }
4316 copy_offset = VME_OFFSET(copy_entry);
4317 copy_size = (copy_entry->vme_end -
4318 copy_entry->vme_start);
4319 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
4320 if (copy_vm_alias == 0) {
4321 /*
4322 * Caller does not want a specific
4323 * alias for this new mapping: use
4324 * the alias of the original mapping.
4325 */
4326 copy_vm_alias = VME_ALIAS(copy_entry);
4327 }
4328
4329 /* sanity check */
4330 if ((copy_addr + copy_size) >
4331 (map_addr +
4332 overmap_start + overmap_end +
4333 named_entry->size /* XXX full size */)) {
4334 /* over-mapping too much !? */
4335 kr = KERN_INVALID_ARGUMENT;
4336 DEBUG4K_ERROR("kr 0x%x\n", kr);
4337 /* abort */
4338 break;
4339 }
4340
4341 /* take a reference on the object */
4342 if (copy_entry->is_sub_map) {
4343 vmk_remap_flags.vmkf_submap = TRUE;
4344 vm_map_reference(copy_submap);
4345 } else {
4346 if (!copy &&
4347 copy_object != VM_OBJECT_NULL &&
4348 copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4349 /*
4350 * We need to resolve our side of this
4351 * "symmetric" copy-on-write now; we
4352 * need a new object to map and share,
4353 * instead of the current one which
4354 * might still be shared with the
4355 * original mapping.
4356 *
4357 * Note: A "vm_map_copy_t" does not
4358 * have a lock but we're protected by
4359 * the named entry's lock here.
4360 */
4361 // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4362 VME_OBJECT_SHADOW(copy_entry, copy_size, TRUE);
4363 assert(copy_object != VME_OBJECT(copy_entry));
4364 if (!copy_entry->needs_copy &&
4365 copy_entry->protection & VM_PROT_WRITE) {
4366 vm_prot_t prot;
4367
4368 prot = copy_entry->protection & ~VM_PROT_WRITE;
4369 vm_object_pmap_protect(copy_object,
4370 copy_offset,
4371 copy_size,
4372 PMAP_NULL,
4373 PAGE_SIZE,
4374 0,
4375 prot);
4376 }
4377 copy_entry->needs_copy = FALSE;
4378 copy_entry->is_shared = TRUE;
4379 copy_object = VME_OBJECT(copy_entry);
4380 copy_offset = VME_OFFSET(copy_entry);
4381 vm_object_lock(copy_object);
4382 /* we're about to make a shared mapping of this object */
4383 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4384 copy_object->true_share = TRUE;
4385 vm_object_unlock(copy_object);
4386 }
4387
4388 if (copy_object != VM_OBJECT_NULL &&
4389 copy_object->named &&
4390 copy_object->pager != MEMORY_OBJECT_NULL &&
4391 copy_object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4392 memory_object_t pager;
4393 vm_prot_t pager_prot;
4394
4395 /*
4396 * For "named" VM objects, let the pager know that the
4397 * memory object is being mapped. Some pagers need to keep
4398 * track of this, to know when they can reclaim the memory
4399 * object, for example.
4400 * VM calls memory_object_map() for each mapping (specifying
4401 * the protection of each mapping) and calls
4402 * memory_object_last_unmap() when all the mappings are gone.
4403 */
4404 pager_prot = max_protection;
4405 if (copy) {
4406 /*
4407 * Copy-On-Write mapping: won't modify the
4408 * memory object.
4409 */
4410 pager_prot &= ~VM_PROT_WRITE;
4411 }
4412 vm_object_lock(copy_object);
4413 pager = copy_object->pager;
4414 if (copy_object->named &&
4415 pager != MEMORY_OBJECT_NULL &&
4416 copy_object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4417 assert(copy_object->pager_ready);
4418 vm_object_mapping_wait(copy_object, THREAD_UNINT);
4419 vm_object_mapping_begin(copy_object);
4420 vm_object_unlock(copy_object);
4421
4422 kr = memory_object_map(pager, pager_prot);
4423 assert(kr == KERN_SUCCESS);
4424
4425 vm_object_lock(copy_object);
4426 vm_object_mapping_end(copy_object);
4427 }
4428 vm_object_unlock(copy_object);
4429 }
4430
4431 /*
4432 * Perform the copy if requested
4433 */
4434
4435 if (copy && copy_object != VM_OBJECT_NULL) {
4436 vm_object_t new_object;
4437 vm_object_offset_t new_offset;
4438
4439 result = vm_object_copy_strategically(copy_object, copy_offset,
4440 copy_size,
4441 &new_object, &new_offset,
4442 &do_copy);
4443
4444
4445 if (result == KERN_MEMORY_RESTART_COPY) {
4446 boolean_t success;
4447 boolean_t src_needs_copy;
4448
4449 /*
4450 * XXX
4451 * We currently ignore src_needs_copy.
4452 * This really is the issue of how to make
4453 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4454 * non-kernel users to use. Solution forthcoming.
4455 * In the meantime, since we don't allow non-kernel
4456 * memory managers to specify symmetric copy,
4457 * we won't run into problems here.
4458 */
4459 new_object = copy_object;
4460 new_offset = copy_offset;
4461 success = vm_object_copy_quickly(new_object,
4462 new_offset,
4463 copy_size,
4464 &src_needs_copy,
4465 &do_copy);
4466 assert(success);
4467 result = KERN_SUCCESS;
4468 }
4469 if (result != KERN_SUCCESS) {
4470 kr = result;
4471 break;
4472 }
4473
4474 copy_object = new_object;
4475 copy_offset = new_offset;
4476 /*
4477 * No extra object reference for the mapping:
4478 * the mapping should be the only thing keeping
4479 * this new object alive.
4480 */
4481 } else {
4482 /*
4483 * We already have the right object
4484 * to map.
4485 */
4486 copy_object = VME_OBJECT(copy_entry);
4487 /* take an extra ref for the mapping below */
4488 vm_object_reference(copy_object);
4489 }
4490 }
4491
4492 /* over-map the object into destination */
4493 remap_flags |= flags;
4494 remap_flags |= VM_FLAGS_FIXED;
4495 remap_flags |= VM_FLAGS_OVERWRITE;
4496 remap_flags &= ~VM_FLAGS_ANYWHERE;
4497 if (!copy && !copy_entry->is_sub_map) {
4498 /*
4499 * copy-on-write should have been
4500 * resolved at this point, or we would
4501 * end up sharing instead of copying.
4502 */
4503 assert(!copy_entry->needs_copy);
4504 }
4505 #if XNU_TARGET_OS_OSX
4506 if (copy_entry->used_for_jit) {
4507 vmk_remap_flags.vmkf_map_jit = TRUE;
4508 }
4509 #endif /* XNU_TARGET_OS_OSX */
4510
4511 assertf((copy_vm_alias & VME_ALIAS_MASK) == copy_vm_alias,
4512 "VM Tag truncated from 0x%x to 0x%x\n", copy_vm_alias, (copy_vm_alias & VME_ALIAS_MASK));
4513 kr = vm_map_enter(target_map,
4514 ©_addr,
4515 copy_size,
4516 (vm_map_offset_t) 0,
4517 remap_flags,
4518 vmk_remap_flags,
4519 (vm_tag_t) copy_vm_alias, /* see comment at end of vm_fault_unwire re. cast*/
4520 copy_object,
4521 copy_offset,
4522 ((copy_object == NULL)
4523 ? FALSE
4524 : (copy || copy_entry->needs_copy)),
4525 cur_protection,
4526 max_protection,
4527 inheritance);
4528 if (kr != KERN_SUCCESS) {
4529 DEBUG4K_SHARE("failed kr 0x%x\n", kr);
4530 if (copy_entry->is_sub_map) {
4531 vm_map_deallocate(copy_submap);
4532 } else {
4533 vm_object_deallocate(copy_object);
4534 }
4535 /* abort */
4536 break;
4537 }
4538
4539 /* next mapping */
4540 copy_addr += copy_size;
4541 }
4542
4543 if (kr == KERN_SUCCESS) {
4544 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4545 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4546 *address = map_addr + offset_in_mapping;
4547 } else {
4548 *address = map_addr;
4549 }
4550 if (overmap_start) {
4551 *address += overmap_start;
4552 DEBUG4K_SHARE("map %p map_addr 0x%llx offset_in_mapping 0x%llx overmap_start 0x%llx -> *address 0x%llx\n", target_map, (uint64_t)map_addr, (uint64_t) offset_in_mapping, (uint64_t)overmap_start, (uint64_t)*address);
4553 }
4554 }
4555 named_entry_unlock(named_entry);
4556 if (target_copy_map != copy_map) {
4557 vm_map_copy_discard(target_copy_map);
4558 target_copy_map = VM_MAP_COPY_NULL;
4559 }
4560
4561 if (kr != KERN_SUCCESS) {
4562 if (!(flags & VM_FLAGS_OVERWRITE)) {
4563 /* deallocate the contiguous range */
4564 (void) vm_deallocate(target_map,
4565 map_addr,
4566 map_size);
4567 }
4568 }
4569
4570 return kr;
4571 }
4572
4573 if (named_entry->is_object) {
4574 unsigned int access;
4575 vm_prot_t protections;
4576 unsigned int wimg_mode;
4577
4578 /* we are mapping a VM object */
4579
4580 protections = named_entry->protection & (VM_PROT_ALL | VM_PROT_ALLEXEC);
4581 access = GET_MAP_MEM(named_entry->protection);
4582
4583 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4584 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4585 offset_in_mapping = offset - VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(target_map));
4586 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4587 offset_in_mapping &= ~((signed)(0xFFF));
4588 }
4589 offset = VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(target_map));
4590 map_size = VM_MAP_ROUND_PAGE((offset + offset_in_mapping + initial_size) - offset, VM_MAP_PAGE_MASK(target_map));
4591 }
4592
4593 object = vm_named_entry_to_vm_object(named_entry);
4594 assert(object != VM_OBJECT_NULL);
4595 vm_object_lock(object);
4596 named_entry_unlock(named_entry);
4597
4598 vm_object_reference_locked(object);
4599
4600 wimg_mode = object->wimg_bits;
4601 vm_prot_to_wimg(access, &wimg_mode);
4602 if (object->wimg_bits != wimg_mode) {
4603 vm_object_change_wimg_mode(object, wimg_mode);
4604 }
4605
4606 vm_object_unlock(object);
4607 } else {
4608 panic("invalid VM named entry %p", named_entry);
4609 }
4610 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4611 /*
4612 * JMM - This is temporary until we unify named entries
4613 * and raw memory objects.
4614 *
4615 * Detected fake ip_kotype for a memory object. In
4616 * this case, the port isn't really a port at all, but
4617 * instead is just a raw memory object.
4618 */
4619 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4620 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4621 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4622 }
4623
4624 object = memory_object_to_vm_object((memory_object_t)port);
4625 if (object == VM_OBJECT_NULL) {
4626 return KERN_INVALID_OBJECT;
4627 }
4628 vm_object_reference(object);
4629
4630 /* wait for object (if any) to be ready */
4631 if (object != VM_OBJECT_NULL) {
4632 if (object == kernel_object) {
4633 printf("Warning: Attempt to map kernel object"
4634 " by a non-private kernel entity\n");
4635 return KERN_INVALID_OBJECT;
4636 }
4637 if (!object->pager_ready) {
4638 vm_object_lock(object);
4639
4640 while (!object->pager_ready) {
4641 vm_object_wait(object,
4642 VM_OBJECT_EVENT_PAGER_READY,
4643 THREAD_UNINT);
4644 vm_object_lock(object);
4645 }
4646 vm_object_unlock(object);
4647 }
4648 }
4649 } else {
4650 return KERN_INVALID_OBJECT;
4651 }
4652
4653 if (object != VM_OBJECT_NULL &&
4654 object->named &&
4655 object->pager != MEMORY_OBJECT_NULL &&
4656 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4657 memory_object_t pager;
4658 vm_prot_t pager_prot;
4659 kern_return_t kr;
4660
4661 /*
4662 * For "named" VM objects, let the pager know that the
4663 * memory object is being mapped. Some pagers need to keep
4664 * track of this, to know when they can reclaim the memory
4665 * object, for example.
4666 * VM calls memory_object_map() for each mapping (specifying
4667 * the protection of each mapping) and calls
4668 * memory_object_last_unmap() when all the mappings are gone.
4669 */
4670 pager_prot = max_protection;
4671 if (copy) {
4672 /*
4673 * Copy-On-Write mapping: won't modify the
4674 * memory object.
4675 */
4676 pager_prot &= ~VM_PROT_WRITE;
4677 }
4678 vm_object_lock(object);
4679 pager = object->pager;
4680 if (object->named &&
4681 pager != MEMORY_OBJECT_NULL &&
4682 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4683 assert(object->pager_ready);
4684 vm_object_mapping_wait(object, THREAD_UNINT);
4685 vm_object_mapping_begin(object);
4686 vm_object_unlock(object);
4687
4688 kr = memory_object_map(pager, pager_prot);
4689 assert(kr == KERN_SUCCESS);
4690
4691 vm_object_lock(object);
4692 vm_object_mapping_end(object);
4693 }
4694 vm_object_unlock(object);
4695 }
4696
4697 /*
4698 * Perform the copy if requested
4699 */
4700
4701 if (copy) {
4702 vm_object_t new_object;
4703 vm_object_offset_t new_offset;
4704
4705 result = vm_object_copy_strategically(object, offset,
4706 map_size,
4707 &new_object, &new_offset,
4708 ©);
4709
4710
4711 if (result == KERN_MEMORY_RESTART_COPY) {
4712 boolean_t success;
4713 boolean_t src_needs_copy;
4714
4715 /*
4716 * XXX
4717 * We currently ignore src_needs_copy.
4718 * This really is the issue of how to make
4719 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4720 * non-kernel users to use. Solution forthcoming.
4721 * In the meantime, since we don't allow non-kernel
4722 * memory managers to specify symmetric copy,
4723 * we won't run into problems here.
4724 */
4725 new_object = object;
4726 new_offset = offset;
4727 success = vm_object_copy_quickly(new_object,
4728 new_offset,
4729 map_size,
4730 &src_needs_copy,
4731 ©);
4732 assert(success);
4733 result = KERN_SUCCESS;
4734 }
4735 /*
4736 * Throw away the reference to the
4737 * original object, as it won't be mapped.
4738 */
4739
4740 vm_object_deallocate(object);
4741
4742 if (result != KERN_SUCCESS) {
4743 return result;
4744 }
4745
4746 object = new_object;
4747 offset = new_offset;
4748 }
4749
4750 /*
4751 * If non-kernel users want to try to prefault pages, the mapping and prefault
4752 * needs to be atomic.
4753 */
4754 kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4755 vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4756
4757 #if __arm64__
4758 if (fourk) {
4759 /* map this object in a "4K" pager */
4760 result = vm_map_enter_fourk(target_map,
4761 &map_addr,
4762 map_size,
4763 (vm_map_offset_t) mask,
4764 flags,
4765 vmk_flags,
4766 tag,
4767 object,
4768 offset,
4769 copy,
4770 cur_protection,
4771 max_protection,
4772 inheritance);
4773 } else
4774 #endif /* __arm64__ */
4775 {
4776 result = vm_map_enter(target_map,
4777 &map_addr, map_size,
4778 (vm_map_offset_t)mask,
4779 flags,
4780 vmk_flags,
4781 tag,
4782 object, offset,
4783 copy,
4784 cur_protection, max_protection,
4785 inheritance);
4786 }
4787 if (result != KERN_SUCCESS) {
4788 vm_object_deallocate(object);
4789 }
4790
4791 /*
4792 * Try to prefault, and do not forget to release the vm map lock.
4793 */
4794 if (result == KERN_SUCCESS && try_prefault) {
4795 mach_vm_address_t va = map_addr;
4796 kern_return_t kr = KERN_SUCCESS;
4797 unsigned int i = 0;
4798 int pmap_options;
4799
4800 pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
4801 if (object->internal) {
4802 pmap_options |= PMAP_OPTIONS_INTERNAL;
4803 }
4804
4805 for (i = 0; i < page_list_count; ++i) {
4806 if (!UPL_VALID_PAGE(page_list, i)) {
4807 if (kernel_prefault) {
4808 assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4809 result = KERN_MEMORY_ERROR;
4810 break;
4811 }
4812 } else {
4813 /*
4814 * If this function call failed, we should stop
4815 * trying to optimize, other calls are likely
4816 * going to fail too.
4817 *
4818 * We are not gonna report an error for such
4819 * failure though. That's an optimization, not
4820 * something critical.
4821 */
4822 kr = pmap_enter_options(target_map->pmap,
4823 va, UPL_PHYS_PAGE(page_list, i),
4824 cur_protection, VM_PROT_NONE,
4825 0, TRUE, pmap_options, NULL);
4826 if (kr != KERN_SUCCESS) {
4827 OSIncrementAtomic64(&vm_prefault_nb_bailout);
4828 if (kernel_prefault) {
4829 result = kr;
4830 }
4831 break;
4832 }
4833 OSIncrementAtomic64(&vm_prefault_nb_pages);
4834 }
4835
4836 /* Next virtual address */
4837 va += PAGE_SIZE;
4838 }
4839 if (vmk_flags.vmkf_keep_map_locked) {
4840 vm_map_unlock(target_map);
4841 }
4842 }
4843
4844 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4845 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4846 *address = map_addr + offset_in_mapping;
4847 } else {
4848 *address = map_addr;
4849 }
4850 return result;
4851 }
4852
4853 kern_return_t
vm_map_enter_mem_object(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,ipc_port_t port,vm_object_offset_t offset,boolean_t copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)4854 vm_map_enter_mem_object(
4855 vm_map_t target_map,
4856 vm_map_offset_t *address,
4857 vm_map_size_t initial_size,
4858 vm_map_offset_t mask,
4859 int flags,
4860 vm_map_kernel_flags_t vmk_flags,
4861 vm_tag_t tag,
4862 ipc_port_t port,
4863 vm_object_offset_t offset,
4864 boolean_t copy,
4865 vm_prot_t cur_protection,
4866 vm_prot_t max_protection,
4867 vm_inherit_t inheritance)
4868 {
4869 kern_return_t ret;
4870
4871 ret = vm_map_enter_mem_object_helper(target_map,
4872 address,
4873 initial_size,
4874 mask,
4875 flags,
4876 vmk_flags,
4877 tag,
4878 port,
4879 offset,
4880 copy,
4881 cur_protection,
4882 max_protection,
4883 inheritance,
4884 NULL,
4885 0);
4886
4887 #if KASAN
4888 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4889 kasan_notify_address(*address, initial_size);
4890 }
4891 #endif
4892
4893 return ret;
4894 }
4895
4896 kern_return_t
vm_map_enter_mem_object_prefault(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,ipc_port_t port,vm_object_offset_t offset,vm_prot_t cur_protection,vm_prot_t max_protection,upl_page_list_ptr_t page_list,unsigned int page_list_count)4897 vm_map_enter_mem_object_prefault(
4898 vm_map_t target_map,
4899 vm_map_offset_t *address,
4900 vm_map_size_t initial_size,
4901 vm_map_offset_t mask,
4902 int flags,
4903 vm_map_kernel_flags_t vmk_flags,
4904 vm_tag_t tag,
4905 ipc_port_t port,
4906 vm_object_offset_t offset,
4907 vm_prot_t cur_protection,
4908 vm_prot_t max_protection,
4909 upl_page_list_ptr_t page_list,
4910 unsigned int page_list_count)
4911 {
4912 kern_return_t ret;
4913
4914 ret = vm_map_enter_mem_object_helper(target_map,
4915 address,
4916 initial_size,
4917 mask,
4918 flags,
4919 vmk_flags,
4920 tag,
4921 port,
4922 offset,
4923 FALSE,
4924 cur_protection,
4925 max_protection,
4926 VM_INHERIT_DEFAULT,
4927 page_list,
4928 page_list_count);
4929
4930 #if KASAN
4931 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4932 kasan_notify_address(*address, initial_size);
4933 }
4934 #endif
4935
4936 return ret;
4937 }
4938
4939
4940 kern_return_t
vm_map_enter_mem_object_control(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,memory_object_control_t control,vm_object_offset_t offset,boolean_t copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)4941 vm_map_enter_mem_object_control(
4942 vm_map_t target_map,
4943 vm_map_offset_t *address,
4944 vm_map_size_t initial_size,
4945 vm_map_offset_t mask,
4946 int flags,
4947 vm_map_kernel_flags_t vmk_flags,
4948 vm_tag_t tag,
4949 memory_object_control_t control,
4950 vm_object_offset_t offset,
4951 boolean_t copy,
4952 vm_prot_t cur_protection,
4953 vm_prot_t max_protection,
4954 vm_inherit_t inheritance)
4955 {
4956 vm_map_address_t map_addr;
4957 vm_map_size_t map_size;
4958 vm_object_t object;
4959 vm_object_size_t size;
4960 kern_return_t result;
4961 memory_object_t pager;
4962 vm_prot_t pager_prot;
4963 kern_return_t kr;
4964 #if __arm64__
4965 boolean_t fourk = vmk_flags.vmkf_fourk;
4966 #endif /* __arm64__ */
4967
4968 /*
4969 * Check arguments for validity
4970 */
4971 if ((target_map == VM_MAP_NULL) ||
4972 (cur_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4973 (max_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4974 (inheritance > VM_INHERIT_LAST_VALID) ||
4975 initial_size == 0) {
4976 return KERN_INVALID_ARGUMENT;
4977 }
4978
4979 #if __arm64__
4980 if (fourk && VM_MAP_PAGE_MASK(target_map) < PAGE_MASK) {
4981 fourk = FALSE;
4982 }
4983
4984 if (fourk) {
4985 map_addr = vm_map_trunc_page(*address,
4986 FOURK_PAGE_MASK);
4987 map_size = vm_map_round_page(initial_size,
4988 FOURK_PAGE_MASK);
4989 } else
4990 #endif /* __arm64__ */
4991 {
4992 map_addr = vm_map_trunc_page(*address,
4993 VM_MAP_PAGE_MASK(target_map));
4994 map_size = vm_map_round_page(initial_size,
4995 VM_MAP_PAGE_MASK(target_map));
4996 }
4997 size = vm_object_round_page(initial_size);
4998
4999 object = memory_object_control_to_vm_object(control);
5000
5001 if (object == VM_OBJECT_NULL) {
5002 return KERN_INVALID_OBJECT;
5003 }
5004
5005 if (object == kernel_object) {
5006 printf("Warning: Attempt to map kernel object"
5007 " by a non-private kernel entity\n");
5008 return KERN_INVALID_OBJECT;
5009 }
5010
5011 vm_object_lock(object);
5012 object->ref_count++;
5013
5014 /*
5015 * For "named" VM objects, let the pager know that the
5016 * memory object is being mapped. Some pagers need to keep
5017 * track of this, to know when they can reclaim the memory
5018 * object, for example.
5019 * VM calls memory_object_map() for each mapping (specifying
5020 * the protection of each mapping) and calls
5021 * memory_object_last_unmap() when all the mappings are gone.
5022 */
5023 pager_prot = max_protection;
5024 if (copy) {
5025 pager_prot &= ~VM_PROT_WRITE;
5026 }
5027 pager = object->pager;
5028 if (object->named &&
5029 pager != MEMORY_OBJECT_NULL &&
5030 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
5031 assert(object->pager_ready);
5032 vm_object_mapping_wait(object, THREAD_UNINT);
5033 vm_object_mapping_begin(object);
5034 vm_object_unlock(object);
5035
5036 kr = memory_object_map(pager, pager_prot);
5037 assert(kr == KERN_SUCCESS);
5038
5039 vm_object_lock(object);
5040 vm_object_mapping_end(object);
5041 }
5042 vm_object_unlock(object);
5043
5044 /*
5045 * Perform the copy if requested
5046 */
5047
5048 if (copy) {
5049 vm_object_t new_object;
5050 vm_object_offset_t new_offset;
5051
5052 result = vm_object_copy_strategically(object, offset, size,
5053 &new_object, &new_offset,
5054 ©);
5055
5056
5057 if (result == KERN_MEMORY_RESTART_COPY) {
5058 boolean_t success;
5059 boolean_t src_needs_copy;
5060
5061 /*
5062 * XXX
5063 * We currently ignore src_needs_copy.
5064 * This really is the issue of how to make
5065 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
5066 * non-kernel users to use. Solution forthcoming.
5067 * In the meantime, since we don't allow non-kernel
5068 * memory managers to specify symmetric copy,
5069 * we won't run into problems here.
5070 */
5071 new_object = object;
5072 new_offset = offset;
5073 success = vm_object_copy_quickly(new_object,
5074 new_offset, size,
5075 &src_needs_copy,
5076 ©);
5077 assert(success);
5078 result = KERN_SUCCESS;
5079 }
5080 /*
5081 * Throw away the reference to the
5082 * original object, as it won't be mapped.
5083 */
5084
5085 vm_object_deallocate(object);
5086
5087 if (result != KERN_SUCCESS) {
5088 return result;
5089 }
5090
5091 object = new_object;
5092 offset = new_offset;
5093 }
5094
5095 #if __arm64__
5096 if (fourk) {
5097 result = vm_map_enter_fourk(target_map,
5098 &map_addr,
5099 map_size,
5100 (vm_map_offset_t)mask,
5101 flags,
5102 vmk_flags,
5103 tag,
5104 object, offset,
5105 copy,
5106 cur_protection, max_protection,
5107 inheritance);
5108 } else
5109 #endif /* __arm64__ */
5110 {
5111 result = vm_map_enter(target_map,
5112 &map_addr, map_size,
5113 (vm_map_offset_t)mask,
5114 flags,
5115 vmk_flags,
5116 tag,
5117 object, offset,
5118 copy,
5119 cur_protection, max_protection,
5120 inheritance);
5121 }
5122 if (result != KERN_SUCCESS) {
5123 vm_object_deallocate(object);
5124 }
5125 *address = map_addr;
5126
5127 return result;
5128 }
5129
5130
5131 #if VM_CPM
5132
5133 #ifdef MACH_ASSERT
5134 extern pmap_paddr_t avail_start, avail_end;
5135 #endif
5136
5137 /*
5138 * Allocate memory in the specified map, with the caveat that
5139 * the memory is physically contiguous. This call may fail
5140 * if the system can't find sufficient contiguous memory.
5141 * This call may cause or lead to heart-stopping amounts of
5142 * paging activity.
5143 *
5144 * Memory obtained from this call should be freed in the
5145 * normal way, viz., via vm_deallocate.
5146 */
5147 kern_return_t
vm_map_enter_cpm(vm_map_t map,vm_map_offset_t * addr,vm_map_size_t size,int flags,vm_map_kernel_flags_t vmk_flags)5148 vm_map_enter_cpm(
5149 vm_map_t map,
5150 vm_map_offset_t *addr,
5151 vm_map_size_t size,
5152 int flags,
5153 vm_map_kernel_flags_t vmk_flags)
5154 {
5155 vm_object_t cpm_obj;
5156 pmap_t pmap;
5157 vm_page_t m, pages;
5158 kern_return_t kr;
5159 vm_map_offset_t va, start, end, offset;
5160 #if MACH_ASSERT
5161 vm_map_offset_t prev_addr = 0;
5162 #endif /* MACH_ASSERT */
5163
5164 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
5165 vm_tag_t tag;
5166
5167 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
5168 /* XXX TODO4K do we need to support this? */
5169 *addr = 0;
5170 return KERN_NOT_SUPPORTED;
5171 }
5172
5173 VM_GET_FLAGS_ALIAS(flags, tag);
5174
5175 if (size == 0) {
5176 *addr = 0;
5177 return KERN_SUCCESS;
5178 }
5179 if (anywhere) {
5180 *addr = vm_map_min(map);
5181 } else {
5182 *addr = vm_map_trunc_page(*addr,
5183 VM_MAP_PAGE_MASK(map));
5184 }
5185 size = vm_map_round_page(size,
5186 VM_MAP_PAGE_MASK(map));
5187
5188 /*
5189 * LP64todo - cpm_allocate should probably allow
5190 * allocations of >4GB, but not with the current
5191 * algorithm, so just cast down the size for now.
5192 */
5193 if (size > VM_MAX_ADDRESS) {
5194 return KERN_RESOURCE_SHORTAGE;
5195 }
5196 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
5197 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS) {
5198 return kr;
5199 }
5200
5201 cpm_obj = vm_object_allocate((vm_object_size_t)size);
5202 assert(cpm_obj != VM_OBJECT_NULL);
5203 assert(cpm_obj->internal);
5204 assert(cpm_obj->vo_size == (vm_object_size_t)size);
5205 assert(cpm_obj->can_persist == FALSE);
5206 assert(cpm_obj->pager_created == FALSE);
5207 assert(cpm_obj->pageout == FALSE);
5208 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5209
5210 /*
5211 * Insert pages into object.
5212 */
5213
5214 vm_object_lock(cpm_obj);
5215 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5216 m = pages;
5217 pages = NEXT_PAGE(m);
5218 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
5219
5220 assert(!m->vmp_gobbled);
5221 assert(!m->vmp_wanted);
5222 assert(!m->vmp_pageout);
5223 assert(!m->vmp_tabled);
5224 assert(VM_PAGE_WIRED(m));
5225 assert(m->vmp_busy);
5226 assert(VM_PAGE_GET_PHYS_PAGE(m) >= (avail_start >> PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m) <= (avail_end >> PAGE_SHIFT));
5227
5228 m->vmp_busy = FALSE;
5229 vm_page_insert(m, cpm_obj, offset);
5230 }
5231 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
5232 vm_object_unlock(cpm_obj);
5233
5234 /*
5235 * Hang onto a reference on the object in case a
5236 * multi-threaded application for some reason decides
5237 * to deallocate the portion of the address space into
5238 * which we will insert this object.
5239 *
5240 * Unfortunately, we must insert the object now before
5241 * we can talk to the pmap module about which addresses
5242 * must be wired down. Hence, the race with a multi-
5243 * threaded app.
5244 */
5245 vm_object_reference(cpm_obj);
5246
5247 /*
5248 * Insert object into map.
5249 */
5250
5251 kr = vm_map_enter(
5252 map,
5253 addr,
5254 size,
5255 (vm_map_offset_t)0,
5256 flags,
5257 vmk_flags,
5258 cpm_obj,
5259 (vm_object_offset_t)0,
5260 FALSE,
5261 VM_PROT_ALL,
5262 VM_PROT_ALL,
5263 VM_INHERIT_DEFAULT);
5264
5265 if (kr != KERN_SUCCESS) {
5266 /*
5267 * A CPM object doesn't have can_persist set,
5268 * so all we have to do is deallocate it to
5269 * free up these pages.
5270 */
5271 assert(cpm_obj->pager_created == FALSE);
5272 assert(cpm_obj->can_persist == FALSE);
5273 assert(cpm_obj->pageout == FALSE);
5274 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5275 vm_object_deallocate(cpm_obj); /* kill acquired ref */
5276 vm_object_deallocate(cpm_obj); /* kill creation ref */
5277 }
5278
5279 /*
5280 * Inform the physical mapping system that the
5281 * range of addresses may not fault, so that
5282 * page tables and such can be locked down as well.
5283 */
5284 start = *addr;
5285 end = start + size;
5286 pmap = vm_map_pmap(map);
5287 pmap_pageable(pmap, start, end, FALSE);
5288
5289 /*
5290 * Enter each page into the pmap, to avoid faults.
5291 * Note that this loop could be coded more efficiently,
5292 * if the need arose, rather than looking up each page
5293 * again.
5294 */
5295 for (offset = 0, va = start; offset < size;
5296 va += PAGE_SIZE, offset += PAGE_SIZE) {
5297 int type_of_fault;
5298
5299 vm_object_lock(cpm_obj);
5300 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5301 assert(m != VM_PAGE_NULL);
5302
5303 vm_page_zero_fill(m);
5304
5305 type_of_fault = DBG_ZERO_FILL_FAULT;
5306
5307 vm_fault_enter(m, pmap, va,
5308 PAGE_SIZE, 0,
5309 VM_PROT_ALL, VM_PROT_WRITE,
5310 VM_PAGE_WIRED(m),
5311 FALSE, /* change_wiring */
5312 VM_KERN_MEMORY_NONE, /* tag - not wiring */
5313 FALSE, /* no_cache */
5314 FALSE, /* cs_bypass */
5315 0, /* user_tag */
5316 0, /* pmap_options */
5317 NULL, /* need_retry */
5318 &type_of_fault);
5319
5320 vm_object_unlock(cpm_obj);
5321 }
5322
5323 #if MACH_ASSERT
5324 /*
5325 * Verify ordering in address space.
5326 */
5327 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5328 vm_object_lock(cpm_obj);
5329 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5330 vm_object_unlock(cpm_obj);
5331 if (m == VM_PAGE_NULL) {
5332 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
5333 cpm_obj, (uint64_t)offset);
5334 }
5335 assert(m->vmp_tabled);
5336 assert(!m->vmp_busy);
5337 assert(!m->vmp_wanted);
5338 assert(!m->vmp_fictitious);
5339 assert(!m->vmp_private);
5340 assert(!m->vmp_absent);
5341 assert(!m->vmp_cleaning);
5342 assert(!m->vmp_laundry);
5343 assert(!m->vmp_precious);
5344 assert(!m->vmp_clustered);
5345 if (offset != 0) {
5346 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
5347 printf("start 0x%llx end 0x%llx va 0x%llx\n",
5348 (uint64_t)start, (uint64_t)end, (uint64_t)va);
5349 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
5350 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
5351 panic("vm_allocate_cpm: pages not contig!");
5352 }
5353 }
5354 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
5355 }
5356 #endif /* MACH_ASSERT */
5357
5358 vm_object_deallocate(cpm_obj); /* kill extra ref */
5359
5360 return kr;
5361 }
5362
5363
5364 #else /* VM_CPM */
5365
5366 /*
5367 * Interface is defined in all cases, but unless the kernel
5368 * is built explicitly for this option, the interface does
5369 * nothing.
5370 */
5371
5372 kern_return_t
vm_map_enter_cpm(__unused vm_map_t map,__unused vm_map_offset_t * addr,__unused vm_map_size_t size,__unused int flags,__unused vm_map_kernel_flags_t vmk_flags)5373 vm_map_enter_cpm(
5374 __unused vm_map_t map,
5375 __unused vm_map_offset_t *addr,
5376 __unused vm_map_size_t size,
5377 __unused int flags,
5378 __unused vm_map_kernel_flags_t vmk_flags)
5379 {
5380 return KERN_FAILURE;
5381 }
5382 #endif /* VM_CPM */
5383
5384 /* Not used without nested pmaps */
5385 #ifndef NO_NESTED_PMAP
5386 /*
5387 * Clip and unnest a portion of a nested submap mapping.
5388 */
5389
5390
5391 static void
vm_map_clip_unnest(vm_map_t map,vm_map_entry_t entry,vm_map_offset_t start_unnest,vm_map_offset_t end_unnest)5392 vm_map_clip_unnest(
5393 vm_map_t map,
5394 vm_map_entry_t entry,
5395 vm_map_offset_t start_unnest,
5396 vm_map_offset_t end_unnest)
5397 {
5398 vm_map_offset_t old_start_unnest = start_unnest;
5399 vm_map_offset_t old_end_unnest = end_unnest;
5400
5401 assert(entry->is_sub_map);
5402 assert(VME_SUBMAP(entry) != NULL);
5403 assert(entry->use_pmap);
5404
5405 /*
5406 * Query the platform for the optimal unnest range.
5407 * DRK: There's some duplication of effort here, since
5408 * callers may have adjusted the range to some extent. This
5409 * routine was introduced to support 1GiB subtree nesting
5410 * for x86 platforms, which can also nest on 2MiB boundaries
5411 * depending on size/alignment.
5412 */
5413 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
5414 assert(VME_SUBMAP(entry)->is_nested_map);
5415 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
5416 log_unnest_badness(map,
5417 old_start_unnest,
5418 old_end_unnest,
5419 VME_SUBMAP(entry)->is_nested_map,
5420 (entry->vme_start +
5421 VME_SUBMAP(entry)->lowest_unnestable_start -
5422 VME_OFFSET(entry)));
5423 }
5424
5425 if (entry->vme_start > start_unnest ||
5426 entry->vme_end < end_unnest) {
5427 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
5428 "bad nested entry: start=0x%llx end=0x%llx\n",
5429 (long long)start_unnest, (long long)end_unnest,
5430 (long long)entry->vme_start, (long long)entry->vme_end);
5431 }
5432
5433 if (start_unnest > entry->vme_start) {
5434 _vm_map_clip_start(&map->hdr,
5435 entry,
5436 start_unnest);
5437 if (map->holelistenabled) {
5438 vm_map_store_update_first_free(map, NULL, FALSE);
5439 } else {
5440 vm_map_store_update_first_free(map, map->first_free, FALSE);
5441 }
5442 }
5443 if (entry->vme_end > end_unnest) {
5444 _vm_map_clip_end(&map->hdr,
5445 entry,
5446 end_unnest);
5447 if (map->holelistenabled) {
5448 vm_map_store_update_first_free(map, NULL, FALSE);
5449 } else {
5450 vm_map_store_update_first_free(map, map->first_free, FALSE);
5451 }
5452 }
5453
5454 pmap_unnest(map->pmap,
5455 entry->vme_start,
5456 entry->vme_end - entry->vme_start);
5457 if ((map->mapped_in_other_pmaps) && os_ref_get_count_raw(&map->map_refcnt) != 0) {
5458 /* clean up parent map/maps */
5459 vm_map_submap_pmap_clean(
5460 map, entry->vme_start,
5461 entry->vme_end,
5462 VME_SUBMAP(entry),
5463 VME_OFFSET(entry));
5464 }
5465 entry->use_pmap = FALSE;
5466 if ((map->pmap != kernel_pmap) &&
5467 (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
5468 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
5469 }
5470 }
5471 #endif /* NO_NESTED_PMAP */
5472
5473 __abortlike
5474 static void
__vm_map_clip_atomic_entry_panic(vm_map_t map,vm_map_entry_t entry,vm_map_offset_t where)5475 __vm_map_clip_atomic_entry_panic(
5476 vm_map_t map,
5477 vm_map_entry_t entry,
5478 vm_map_offset_t where)
5479 {
5480 panic("vm_map_clip(%p): Attempting to clip an atomic VM map entry "
5481 "%p [0x%llx:0x%llx] at 0x%llx", map, entry,
5482 (uint64_t)entry->vme_start,
5483 (uint64_t)entry->vme_end,
5484 (uint64_t)where);
5485 }
5486
5487 /*
5488 * vm_map_clip_start: [ internal use only ]
5489 *
5490 * Asserts that the given entry begins at or after
5491 * the specified address; if necessary,
5492 * it splits the entry into two.
5493 */
5494 void
vm_map_clip_start(vm_map_t map,vm_map_entry_t entry,vm_map_offset_t startaddr)5495 vm_map_clip_start(
5496 vm_map_t map,
5497 vm_map_entry_t entry,
5498 vm_map_offset_t startaddr)
5499 {
5500 #ifndef NO_NESTED_PMAP
5501 if (entry->is_sub_map &&
5502 entry->use_pmap &&
5503 startaddr >= entry->vme_start) {
5504 vm_map_offset_t start_unnest, end_unnest;
5505
5506 /*
5507 * Make sure "startaddr" is no longer in a nested range
5508 * before we clip. Unnest only the minimum range the platform
5509 * can handle.
5510 * vm_map_clip_unnest may perform additional adjustments to
5511 * the unnest range.
5512 */
5513 start_unnest = startaddr & ~(pmap_shared_region_size_min(map->pmap) - 1);
5514 end_unnest = start_unnest + pmap_shared_region_size_min(map->pmap);
5515 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5516 }
5517 #endif /* NO_NESTED_PMAP */
5518 if (startaddr > entry->vme_start) {
5519 if (!entry->is_sub_map &&
5520 VME_OBJECT(entry) &&
5521 VME_OBJECT(entry)->phys_contiguous) {
5522 pmap_remove(map->pmap,
5523 (addr64_t)(entry->vme_start),
5524 (addr64_t)(entry->vme_end));
5525 }
5526 if (entry->vme_atomic) {
5527 __vm_map_clip_atomic_entry_panic(map, entry, startaddr);
5528 }
5529
5530 DTRACE_VM5(
5531 vm_map_clip_start,
5532 vm_map_t, map,
5533 vm_map_offset_t, entry->vme_start,
5534 vm_map_offset_t, entry->vme_end,
5535 vm_map_offset_t, startaddr,
5536 int, VME_ALIAS(entry));
5537
5538 _vm_map_clip_start(&map->hdr, entry, startaddr);
5539 if (map->holelistenabled) {
5540 vm_map_store_update_first_free(map, NULL, FALSE);
5541 } else {
5542 vm_map_store_update_first_free(map, map->first_free, FALSE);
5543 }
5544 }
5545 }
5546
5547
5548 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5549 MACRO_BEGIN \
5550 if ((startaddr) > (entry)->vme_start) \
5551 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5552 MACRO_END
5553
5554 /*
5555 * This routine is called only when it is known that
5556 * the entry must be split.
5557 */
5558 static void
_vm_map_clip_start(struct vm_map_header * map_header,vm_map_entry_t entry,vm_map_offset_t start)5559 _vm_map_clip_start(
5560 struct vm_map_header *map_header,
5561 vm_map_entry_t entry,
5562 vm_map_offset_t start)
5563 {
5564 vm_map_entry_t new_entry;
5565
5566 /*
5567 * Split off the front portion --
5568 * note that we must insert the new
5569 * entry BEFORE this one, so that
5570 * this entry has the specified starting
5571 * address.
5572 */
5573
5574 if (entry->map_aligned) {
5575 assert(VM_MAP_PAGE_ALIGNED(start,
5576 VM_MAP_HDR_PAGE_MASK(map_header)));
5577 }
5578
5579 new_entry = _vm_map_entry_create(map_header);
5580 vm_map_entry_copy_full(new_entry, entry);
5581
5582 new_entry->vme_end = start;
5583 assert(new_entry->vme_start < new_entry->vme_end);
5584 VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
5585 assert(start < entry->vme_end);
5586 entry->vme_start = start;
5587
5588 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
5589
5590 if (entry->is_sub_map) {
5591 vm_map_reference(VME_SUBMAP(new_entry));
5592 } else {
5593 vm_object_reference(VME_OBJECT(new_entry));
5594 }
5595 }
5596
5597
5598 /*
5599 * vm_map_clip_end: [ internal use only ]
5600 *
5601 * Asserts that the given entry ends at or before
5602 * the specified address; if necessary,
5603 * it splits the entry into two.
5604 */
5605 void
vm_map_clip_end(vm_map_t map,vm_map_entry_t entry,vm_map_offset_t endaddr)5606 vm_map_clip_end(
5607 vm_map_t map,
5608 vm_map_entry_t entry,
5609 vm_map_offset_t endaddr)
5610 {
5611 if (endaddr > entry->vme_end) {
5612 /*
5613 * Within the scope of this clipping, limit "endaddr" to
5614 * the end of this map entry...
5615 */
5616 endaddr = entry->vme_end;
5617 }
5618 #ifndef NO_NESTED_PMAP
5619 if (entry->is_sub_map && entry->use_pmap) {
5620 vm_map_offset_t start_unnest, end_unnest;
5621
5622 /*
5623 * Make sure the range between the start of this entry and
5624 * the new "endaddr" is no longer nested before we clip.
5625 * Unnest only the minimum range the platform can handle.
5626 * vm_map_clip_unnest may perform additional adjustments to
5627 * the unnest range.
5628 */
5629 start_unnest = entry->vme_start;
5630 end_unnest =
5631 (endaddr + pmap_shared_region_size_min(map->pmap) - 1) &
5632 ~(pmap_shared_region_size_min(map->pmap) - 1);
5633 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5634 }
5635 #endif /* NO_NESTED_PMAP */
5636 if (endaddr < entry->vme_end) {
5637 if (!entry->is_sub_map &&
5638 VME_OBJECT(entry) &&
5639 VME_OBJECT(entry)->phys_contiguous) {
5640 pmap_remove(map->pmap,
5641 (addr64_t)(entry->vme_start),
5642 (addr64_t)(entry->vme_end));
5643 }
5644 if (entry->vme_atomic) {
5645 __vm_map_clip_atomic_entry_panic(map, entry, endaddr);
5646 }
5647 DTRACE_VM5(
5648 vm_map_clip_end,
5649 vm_map_t, map,
5650 vm_map_offset_t, entry->vme_start,
5651 vm_map_offset_t, entry->vme_end,
5652 vm_map_offset_t, endaddr,
5653 int, VME_ALIAS(entry));
5654
5655 _vm_map_clip_end(&map->hdr, entry, endaddr);
5656 if (map->holelistenabled) {
5657 vm_map_store_update_first_free(map, NULL, FALSE);
5658 } else {
5659 vm_map_store_update_first_free(map, map->first_free, FALSE);
5660 }
5661 }
5662 }
5663
5664
5665 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5666 MACRO_BEGIN \
5667 if ((endaddr) < (entry)->vme_end) \
5668 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5669 MACRO_END
5670
5671 /*
5672 * This routine is called only when it is known that
5673 * the entry must be split.
5674 */
5675 static void
_vm_map_clip_end(struct vm_map_header * map_header,vm_map_entry_t entry,vm_map_offset_t end)5676 _vm_map_clip_end(
5677 struct vm_map_header *map_header,
5678 vm_map_entry_t entry,
5679 vm_map_offset_t end)
5680 {
5681 vm_map_entry_t new_entry;
5682
5683 /*
5684 * Create a new entry and insert it
5685 * AFTER the specified entry
5686 */
5687
5688 if (entry->map_aligned) {
5689 assert(VM_MAP_PAGE_ALIGNED(end,
5690 VM_MAP_HDR_PAGE_MASK(map_header)));
5691 }
5692
5693 new_entry = _vm_map_entry_create(map_header);
5694 vm_map_entry_copy_full(new_entry, entry);
5695
5696 assert(entry->vme_start < end);
5697 new_entry->vme_start = entry->vme_end = end;
5698 VME_OFFSET_SET(new_entry,
5699 VME_OFFSET(new_entry) + (end - entry->vme_start));
5700 assert(new_entry->vme_start < new_entry->vme_end);
5701
5702 _vm_map_store_entry_link(map_header, entry, new_entry);
5703
5704 if (entry->is_sub_map) {
5705 vm_map_reference(VME_SUBMAP(new_entry));
5706 } else {
5707 vm_object_reference(VME_OBJECT(new_entry));
5708 }
5709 }
5710
5711
5712 /*
5713 * VM_MAP_RANGE_CHECK: [ internal use only ]
5714 *
5715 * Asserts that the starting and ending region
5716 * addresses fall within the valid range of the map.
5717 */
5718 #define VM_MAP_RANGE_CHECK(map, start, end) \
5719 MACRO_BEGIN \
5720 if (start < vm_map_min(map)) \
5721 start = vm_map_min(map); \
5722 if (end > vm_map_max(map)) \
5723 end = vm_map_max(map); \
5724 if (start > end) \
5725 start = end; \
5726 MACRO_END
5727
5728 /*
5729 * vm_map_range_check: [ internal use only ]
5730 *
5731 * Check that the region defined by the specified start and
5732 * end addresses are wholly contained within a single map
5733 * entry or set of adjacent map entries of the spacified map,
5734 * i.e. the specified region contains no unmapped space.
5735 * If any or all of the region is unmapped, FALSE is returned.
5736 * Otherwise, TRUE is returned and if the output argument 'entry'
5737 * is not NULL it points to the map entry containing the start
5738 * of the region.
5739 *
5740 * The map is locked for reading on entry and is left locked.
5741 */
5742 static boolean_t
vm_map_range_check(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_map_entry_t * entry)5743 vm_map_range_check(
5744 vm_map_t map,
5745 vm_map_offset_t start,
5746 vm_map_offset_t end,
5747 vm_map_entry_t *entry)
5748 {
5749 vm_map_entry_t cur;
5750 vm_map_offset_t prev;
5751
5752 /*
5753 * Basic sanity checks first
5754 */
5755 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
5756 return FALSE;
5757 }
5758
5759 /*
5760 * Check first if the region starts within a valid
5761 * mapping for the map.
5762 */
5763 if (!vm_map_lookup_entry(map, start, &cur)) {
5764 return FALSE;
5765 }
5766
5767 /*
5768 * Optimize for the case that the region is contained
5769 * in a single map entry.
5770 */
5771 if (entry != (vm_map_entry_t *) NULL) {
5772 *entry = cur;
5773 }
5774 if (end <= cur->vme_end) {
5775 return TRUE;
5776 }
5777
5778 /*
5779 * If the region is not wholly contained within a
5780 * single entry, walk the entries looking for holes.
5781 */
5782 prev = cur->vme_end;
5783 cur = cur->vme_next;
5784 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
5785 if (end <= cur->vme_end) {
5786 return TRUE;
5787 }
5788 prev = cur->vme_end;
5789 cur = cur->vme_next;
5790 }
5791 return FALSE;
5792 }
5793
5794 /*
5795 * vm_map_protect:
5796 *
5797 * Sets the protection of the specified address
5798 * region in the target map. If "set_max" is
5799 * specified, the maximum protection is to be set;
5800 * otherwise, only the current protection is affected.
5801 */
5802 kern_return_t
vm_map_protect(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t new_prot,boolean_t set_max)5803 vm_map_protect(
5804 vm_map_t map,
5805 vm_map_offset_t start,
5806 vm_map_offset_t end,
5807 vm_prot_t new_prot,
5808 boolean_t set_max)
5809 {
5810 vm_map_entry_t current;
5811 vm_map_offset_t prev;
5812 vm_map_entry_t entry;
5813 vm_prot_t new_max;
5814 int pmap_options = 0;
5815 kern_return_t kr;
5816
5817 if (new_prot & VM_PROT_COPY) {
5818 vm_map_offset_t new_start;
5819 vm_prot_t cur_prot, max_prot;
5820 vm_map_kernel_flags_t kflags;
5821
5822 /* LP64todo - see below */
5823 if (start >= map->max_offset) {
5824 return KERN_INVALID_ADDRESS;
5825 }
5826
5827 if ((new_prot & VM_PROT_ALLEXEC) &&
5828 map->pmap != kernel_pmap &&
5829 (vm_map_cs_enforcement(map)
5830 #if XNU_TARGET_OS_OSX && __arm64__
5831 || !VM_MAP_IS_EXOTIC(map)
5832 #endif /* XNU_TARGET_OS_OSX && __arm64__ */
5833 ) &&
5834 VM_MAP_POLICY_WX_FAIL(map)) {
5835 DTRACE_VM3(cs_wx,
5836 uint64_t, (uint64_t) start,
5837 uint64_t, (uint64_t) end,
5838 vm_prot_t, new_prot);
5839 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5840 proc_selfpid(),
5841 (get_bsdtask_info(current_task())
5842 ? proc_name_address(get_bsdtask_info(current_task()))
5843 : "?"),
5844 __FUNCTION__);
5845 return KERN_PROTECTION_FAILURE;
5846 }
5847
5848 /*
5849 * Let vm_map_remap_extract() know that it will need to:
5850 * + make a copy of the mapping
5851 * + add VM_PROT_WRITE to the max protections
5852 * + remove any protections that are no longer allowed from the
5853 * max protections (to avoid any WRITE/EXECUTE conflict, for
5854 * example).
5855 * Note that "max_prot" is an IN/OUT parameter only for this
5856 * specific (VM_PROT_COPY) case. It's usually an OUT parameter
5857 * only.
5858 */
5859 max_prot = new_prot & (VM_PROT_ALL | VM_PROT_ALLEXEC);
5860 cur_prot = VM_PROT_NONE;
5861 kflags = VM_MAP_KERNEL_FLAGS_NONE;
5862 kflags.vmkf_remap_prot_copy = TRUE;
5863 new_start = start;
5864 kr = vm_map_remap(map,
5865 &new_start,
5866 end - start,
5867 0, /* mask */
5868 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
5869 kflags,
5870 0,
5871 map,
5872 start,
5873 TRUE, /* copy-on-write remapping! */
5874 &cur_prot, /* IN/OUT */
5875 &max_prot, /* IN/OUT */
5876 VM_INHERIT_DEFAULT);
5877 if (kr != KERN_SUCCESS) {
5878 return kr;
5879 }
5880 new_prot &= ~VM_PROT_COPY;
5881 }
5882
5883 vm_map_lock(map);
5884
5885 /* LP64todo - remove this check when vm_map_commpage64()
5886 * no longer has to stuff in a map_entry for the commpage
5887 * above the map's max_offset.
5888 */
5889 if (start >= map->max_offset) {
5890 vm_map_unlock(map);
5891 return KERN_INVALID_ADDRESS;
5892 }
5893
5894 while (1) {
5895 /*
5896 * Lookup the entry. If it doesn't start in a valid
5897 * entry, return an error.
5898 */
5899 if (!vm_map_lookup_entry(map, start, &entry)) {
5900 vm_map_unlock(map);
5901 return KERN_INVALID_ADDRESS;
5902 }
5903
5904 if (entry->superpage_size && (start & (SUPERPAGE_SIZE - 1))) { /* extend request to whole entry */
5905 start = SUPERPAGE_ROUND_DOWN(start);
5906 continue;
5907 }
5908 break;
5909 }
5910 if (entry->superpage_size) {
5911 end = SUPERPAGE_ROUND_UP(end);
5912 }
5913
5914 /*
5915 * Make a first pass to check for protection and address
5916 * violations.
5917 */
5918
5919 current = entry;
5920 prev = current->vme_start;
5921 while ((current != vm_map_to_entry(map)) &&
5922 (current->vme_start < end)) {
5923 /*
5924 * If there is a hole, return an error.
5925 */
5926 if (current->vme_start != prev) {
5927 vm_map_unlock(map);
5928 return KERN_INVALID_ADDRESS;
5929 }
5930
5931 new_max = current->max_protection;
5932
5933 #if defined(__x86_64__)
5934 /* Allow max mask to include execute prot bits if this map doesn't enforce CS */
5935 if (set_max && (new_prot & VM_PROT_ALLEXEC) && !vm_map_cs_enforcement(map)) {
5936 new_max = (new_max & ~VM_PROT_ALLEXEC) | (new_prot & VM_PROT_ALLEXEC);
5937 }
5938 #endif
5939 if ((new_prot & new_max) != new_prot) {
5940 vm_map_unlock(map);
5941 return KERN_PROTECTION_FAILURE;
5942 }
5943
5944 if (current->used_for_jit &&
5945 pmap_has_prot_policy(map->pmap, current->translated_allow_execute, current->protection)) {
5946 vm_map_unlock(map);
5947 return KERN_PROTECTION_FAILURE;
5948 }
5949
5950 #if __arm64e__
5951 /* Disallow remapping hw assisted TPRO mappings */
5952 if (current->used_for_tpro) {
5953 vm_map_unlock(map);
5954 return KERN_PROTECTION_FAILURE;
5955 }
5956 #endif /* __arm64e__ */
5957
5958
5959 if ((new_prot & VM_PROT_WRITE) &&
5960 (new_prot & VM_PROT_ALLEXEC) &&
5961 #if XNU_TARGET_OS_OSX
5962 map->pmap != kernel_pmap &&
5963 (vm_map_cs_enforcement(map)
5964 #if __arm64__
5965 || !VM_MAP_IS_EXOTIC(map)
5966 #endif /* __arm64__ */
5967 ) &&
5968 #endif /* XNU_TARGET_OS_OSX */
5969 !(current->used_for_jit)) {
5970 DTRACE_VM3(cs_wx,
5971 uint64_t, (uint64_t) current->vme_start,
5972 uint64_t, (uint64_t) current->vme_end,
5973 vm_prot_t, new_prot);
5974 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5975 proc_selfpid(),
5976 (get_bsdtask_info(current_task())
5977 ? proc_name_address(get_bsdtask_info(current_task()))
5978 : "?"),
5979 __FUNCTION__);
5980 new_prot &= ~VM_PROT_ALLEXEC;
5981 if (VM_MAP_POLICY_WX_FAIL(map)) {
5982 vm_map_unlock(map);
5983 return KERN_PROTECTION_FAILURE;
5984 }
5985 }
5986
5987 /*
5988 * If the task has requested executable lockdown,
5989 * deny both:
5990 * - adding executable protections OR
5991 * - adding write protections to an existing executable mapping.
5992 */
5993 if (map->map_disallow_new_exec == TRUE) {
5994 if ((new_prot & VM_PROT_ALLEXEC) ||
5995 ((current->protection & VM_PROT_EXECUTE) && (new_prot & VM_PROT_WRITE))) {
5996 vm_map_unlock(map);
5997 return KERN_PROTECTION_FAILURE;
5998 }
5999 }
6000
6001 prev = current->vme_end;
6002 current = current->vme_next;
6003 }
6004
6005 #if __arm64__
6006 if (end > prev &&
6007 end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
6008 vm_map_entry_t prev_entry;
6009
6010 prev_entry = current->vme_prev;
6011 if (prev_entry != vm_map_to_entry(map) &&
6012 !prev_entry->map_aligned &&
6013 (vm_map_round_page(prev_entry->vme_end,
6014 VM_MAP_PAGE_MASK(map))
6015 == end)) {
6016 /*
6017 * The last entry in our range is not "map-aligned"
6018 * but it would have reached all the way to "end"
6019 * if it had been map-aligned, so this is not really
6020 * a hole in the range and we can proceed.
6021 */
6022 prev = end;
6023 }
6024 }
6025 #endif /* __arm64__ */
6026
6027 if (end > prev) {
6028 vm_map_unlock(map);
6029 return KERN_INVALID_ADDRESS;
6030 }
6031
6032 /*
6033 * Go back and fix up protections.
6034 * Clip to start here if the range starts within
6035 * the entry.
6036 */
6037
6038 current = entry;
6039 if (current != vm_map_to_entry(map)) {
6040 /* clip and unnest if necessary */
6041 vm_map_clip_start(map, current, start);
6042 }
6043
6044 while ((current != vm_map_to_entry(map)) &&
6045 (current->vme_start < end)) {
6046 vm_prot_t old_prot;
6047
6048 vm_map_clip_end(map, current, end);
6049
6050 if (current->is_sub_map) {
6051 /* clipping did unnest if needed */
6052 assert(!current->use_pmap);
6053 }
6054
6055 old_prot = current->protection;
6056
6057 if (set_max) {
6058 current->max_protection = new_prot;
6059 /* Consider either EXECUTE or UEXEC as EXECUTE for this masking */
6060 current->protection = (new_prot & old_prot);
6061 } else {
6062 current->protection = new_prot;
6063 }
6064
6065 /*
6066 * Update physical map if necessary.
6067 * If the request is to turn off write protection,
6068 * we won't do it for real (in pmap). This is because
6069 * it would cause copy-on-write to fail. We've already
6070 * set, the new protection in the map, so if a
6071 * write-protect fault occurred, it will be fixed up
6072 * properly, COW or not.
6073 */
6074 if (current->protection != old_prot) {
6075 /* Look one level in we support nested pmaps */
6076 /* from mapped submaps which are direct entries */
6077 /* in our map */
6078
6079 vm_prot_t prot;
6080
6081 prot = current->protection;
6082 if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
6083 prot &= ~VM_PROT_WRITE;
6084 } else {
6085 assert(!VME_OBJECT(current)->code_signed);
6086 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
6087 if (prot & VM_PROT_WRITE) {
6088 /*
6089 * For write requests on the
6090 * compressor, we wil ask the
6091 * pmap layer to prevent us from
6092 * taking a write fault when we
6093 * attempt to access the mapping
6094 * next.
6095 */
6096 pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
6097 }
6098 }
6099
6100 if (override_nx(map, VME_ALIAS(current)) && prot) {
6101 prot |= VM_PROT_EXECUTE;
6102 }
6103
6104 #if DEVELOPMENT || DEBUG
6105 if (!(old_prot & VM_PROT_EXECUTE) &&
6106 (prot & VM_PROT_EXECUTE) &&
6107 panic_on_unsigned_execute &&
6108 (proc_selfcsflags() & CS_KILL)) {
6109 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
6110 }
6111 #endif /* DEVELOPMENT || DEBUG */
6112
6113 if (pmap_has_prot_policy(map->pmap, current->translated_allow_execute, prot)) {
6114 if (current->wired_count) {
6115 panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x",
6116 map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
6117 }
6118
6119 /* If the pmap layer cares about this
6120 * protection type, force a fault for
6121 * each page so that vm_fault will
6122 * repopulate the page with the full
6123 * set of protections.
6124 */
6125 /*
6126 * TODO: We don't seem to need this,
6127 * but this is due to an internal
6128 * implementation detail of
6129 * pmap_protect. Do we want to rely
6130 * on this?
6131 */
6132 prot = VM_PROT_NONE;
6133 }
6134
6135 if (current->is_sub_map && current->use_pmap) {
6136 pmap_protect(VME_SUBMAP(current)->pmap,
6137 current->vme_start,
6138 current->vme_end,
6139 prot);
6140 } else {
6141 pmap_protect_options(map->pmap,
6142 current->vme_start,
6143 current->vme_end,
6144 prot,
6145 pmap_options,
6146 NULL);
6147 }
6148 }
6149 current = current->vme_next;
6150 }
6151
6152 current = entry;
6153 while ((current != vm_map_to_entry(map)) &&
6154 (current->vme_start <= end)) {
6155 vm_map_simplify_entry(map, current);
6156 current = current->vme_next;
6157 }
6158
6159 vm_map_unlock(map);
6160 return KERN_SUCCESS;
6161 }
6162
6163 /*
6164 * vm_map_inherit:
6165 *
6166 * Sets the inheritance of the specified address
6167 * range in the target map. Inheritance
6168 * affects how the map will be shared with
6169 * child maps at the time of vm_map_fork.
6170 */
6171 kern_return_t
vm_map_inherit(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_inherit_t new_inheritance)6172 vm_map_inherit(
6173 vm_map_t map,
6174 vm_map_offset_t start,
6175 vm_map_offset_t end,
6176 vm_inherit_t new_inheritance)
6177 {
6178 vm_map_entry_t entry;
6179 vm_map_entry_t temp_entry;
6180
6181 vm_map_lock(map);
6182
6183 VM_MAP_RANGE_CHECK(map, start, end);
6184
6185 if (vm_map_lookup_entry(map, start, &temp_entry)) {
6186 entry = temp_entry;
6187 } else {
6188 temp_entry = temp_entry->vme_next;
6189 entry = temp_entry;
6190 }
6191
6192 /* first check entire range for submaps which can't support the */
6193 /* given inheritance. */
6194 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6195 if (entry->is_sub_map) {
6196 if (new_inheritance == VM_INHERIT_COPY) {
6197 vm_map_unlock(map);
6198 return KERN_INVALID_ARGUMENT;
6199 }
6200 }
6201
6202 entry = entry->vme_next;
6203 }
6204
6205 entry = temp_entry;
6206 if (entry != vm_map_to_entry(map)) {
6207 /* clip and unnest if necessary */
6208 vm_map_clip_start(map, entry, start);
6209 }
6210
6211 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6212 vm_map_clip_end(map, entry, end);
6213 if (entry->is_sub_map) {
6214 /* clip did unnest if needed */
6215 assert(!entry->use_pmap);
6216 }
6217
6218 entry->inheritance = new_inheritance;
6219
6220 entry = entry->vme_next;
6221 }
6222
6223 vm_map_unlock(map);
6224 return KERN_SUCCESS;
6225 }
6226
6227 /*
6228 * Update the accounting for the amount of wired memory in this map. If the user has
6229 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
6230 */
6231
6232 static kern_return_t
add_wire_counts(vm_map_t map,vm_map_entry_t entry,boolean_t user_wire)6233 add_wire_counts(
6234 vm_map_t map,
6235 vm_map_entry_t entry,
6236 boolean_t user_wire)
6237 {
6238 vm_map_size_t size;
6239
6240 if (user_wire) {
6241 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
6242
6243 /*
6244 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
6245 * this map entry.
6246 */
6247
6248 if (entry->user_wired_count == 0) {
6249 size = entry->vme_end - entry->vme_start;
6250
6251 /*
6252 * Since this is the first time the user is wiring this map entry, check to see if we're
6253 * exceeding the user wire limits. There is a per map limit which is the smaller of either
6254 * the process's rlimit or the global vm_per_task_user_wire_limit which caps this value. There is also
6255 * a system-wide limit on the amount of memory all users can wire. If the user is over either
6256 * limit, then we fail.
6257 */
6258
6259 if (size + map->user_wire_size > MIN(map->user_wire_limit, vm_per_task_user_wire_limit) ||
6260 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6261 if (size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6262 #if DEVELOPMENT || DEBUG
6263 if (panic_on_mlock_failure) {
6264 panic("mlock: Over global wire limit. %llu bytes wired and requested to wire %llu bytes more", ptoa_64(total_wire_count), (uint64_t) size);
6265 }
6266 #endif /* DEVELOPMENT || DEBUG */
6267 os_atomic_inc(&vm_add_wire_count_over_global_limit, relaxed);
6268 } else {
6269 os_atomic_inc(&vm_add_wire_count_over_user_limit, relaxed);
6270 #if DEVELOPMENT || DEBUG
6271 if (panic_on_mlock_failure) {
6272 panic("mlock: Over process wire limit. %llu bytes wired and requested to wire %llu bytes more", (uint64_t) map->user_wire_size, (uint64_t) size);
6273 }
6274 #endif /* DEVELOPMENT || DEBUG */
6275 }
6276 return KERN_RESOURCE_SHORTAGE;
6277 }
6278
6279 /*
6280 * The first time the user wires an entry, we also increment the wired_count and add this to
6281 * the total that has been wired in the map.
6282 */
6283
6284 if (entry->wired_count >= MAX_WIRE_COUNT) {
6285 return KERN_FAILURE;
6286 }
6287
6288 entry->wired_count++;
6289 map->user_wire_size += size;
6290 }
6291
6292 if (entry->user_wired_count >= MAX_WIRE_COUNT) {
6293 return KERN_FAILURE;
6294 }
6295
6296 entry->user_wired_count++;
6297 } else {
6298 /*
6299 * The kernel's wiring the memory. Just bump the count and continue.
6300 */
6301
6302 if (entry->wired_count >= MAX_WIRE_COUNT) {
6303 panic("vm_map_wire: too many wirings");
6304 }
6305
6306 entry->wired_count++;
6307 }
6308
6309 return KERN_SUCCESS;
6310 }
6311
6312 /*
6313 * Update the memory wiring accounting now that the given map entry is being unwired.
6314 */
6315
6316 static void
subtract_wire_counts(vm_map_t map,vm_map_entry_t entry,boolean_t user_wire)6317 subtract_wire_counts(
6318 vm_map_t map,
6319 vm_map_entry_t entry,
6320 boolean_t user_wire)
6321 {
6322 if (user_wire) {
6323 /*
6324 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
6325 */
6326
6327 if (entry->user_wired_count == 1) {
6328 /*
6329 * We're removing the last user wire reference. Decrement the wired_count and the total
6330 * user wired memory for this map.
6331 */
6332
6333 assert(entry->wired_count >= 1);
6334 entry->wired_count--;
6335 map->user_wire_size -= entry->vme_end - entry->vme_start;
6336 }
6337
6338 assert(entry->user_wired_count >= 1);
6339 entry->user_wired_count--;
6340 } else {
6341 /*
6342 * The kernel is unwiring the memory. Just update the count.
6343 */
6344
6345 assert(entry->wired_count >= 1);
6346 entry->wired_count--;
6347 }
6348 }
6349
6350 int cs_executable_wire = 0;
6351
6352 /*
6353 * vm_map_wire:
6354 *
6355 * Sets the pageability of the specified address range in the
6356 * target map as wired. Regions specified as not pageable require
6357 * locked-down physical memory and physical page maps. The
6358 * access_type variable indicates types of accesses that must not
6359 * generate page faults. This is checked against protection of
6360 * memory being locked-down.
6361 *
6362 * The map must not be locked, but a reference must remain to the
6363 * map throughout the call.
6364 */
6365 static kern_return_t
vm_map_wire_nested(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t caller_prot,vm_tag_t tag,boolean_t user_wire,pmap_t map_pmap,vm_map_offset_t pmap_addr,ppnum_t * physpage_p)6366 vm_map_wire_nested(
6367 vm_map_t map,
6368 vm_map_offset_t start,
6369 vm_map_offset_t end,
6370 vm_prot_t caller_prot,
6371 vm_tag_t tag,
6372 boolean_t user_wire,
6373 pmap_t map_pmap,
6374 vm_map_offset_t pmap_addr,
6375 ppnum_t *physpage_p)
6376 {
6377 vm_map_entry_t entry;
6378 vm_prot_t access_type;
6379 struct vm_map_entry *first_entry, tmp_entry;
6380 vm_map_t real_map;
6381 vm_map_offset_t s, e;
6382 kern_return_t rc;
6383 boolean_t need_wakeup;
6384 boolean_t main_map = FALSE;
6385 wait_interrupt_t interruptible_state;
6386 thread_t cur_thread;
6387 unsigned int last_timestamp;
6388 vm_map_size_t size;
6389 boolean_t wire_and_extract;
6390 vm_prot_t extra_prots;
6391
6392 extra_prots = VM_PROT_COPY;
6393 extra_prots |= VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6394 #if XNU_TARGET_OS_OSX
6395 if (map->pmap == kernel_pmap ||
6396 !vm_map_cs_enforcement(map)) {
6397 extra_prots &= ~VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6398 }
6399 #endif /* XNU_TARGET_OS_OSX */
6400
6401 access_type = (caller_prot & (VM_PROT_ALL | VM_PROT_ALLEXEC));
6402
6403 wire_and_extract = FALSE;
6404 if (physpage_p != NULL) {
6405 /*
6406 * The caller wants the physical page number of the
6407 * wired page. We return only one physical page number
6408 * so this works for only one page at a time.
6409 */
6410 if ((end - start) != PAGE_SIZE) {
6411 return KERN_INVALID_ARGUMENT;
6412 }
6413 wire_and_extract = TRUE;
6414 *physpage_p = 0;
6415 }
6416
6417 vm_map_lock(map);
6418 if (map_pmap == NULL) {
6419 main_map = TRUE;
6420 }
6421 last_timestamp = map->timestamp;
6422
6423 VM_MAP_RANGE_CHECK(map, start, end);
6424 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6425 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
6426
6427 if (start == end) {
6428 /* We wired what the caller asked for, zero pages */
6429 vm_map_unlock(map);
6430 return KERN_SUCCESS;
6431 }
6432
6433 need_wakeup = FALSE;
6434 cur_thread = current_thread();
6435
6436 s = start;
6437 rc = KERN_SUCCESS;
6438
6439 if (vm_map_lookup_entry(map, s, &first_entry)) {
6440 entry = first_entry;
6441 /*
6442 * vm_map_clip_start will be done later.
6443 * We don't want to unnest any nested submaps here !
6444 */
6445 } else {
6446 /* Start address is not in map */
6447 rc = KERN_INVALID_ADDRESS;
6448 goto done;
6449 }
6450
6451 while ((entry != vm_map_to_entry(map)) && (s < end)) {
6452 /*
6453 * At this point, we have wired from "start" to "s".
6454 * We still need to wire from "s" to "end".
6455 *
6456 * "entry" hasn't been clipped, so it could start before "s"
6457 * and/or end after "end".
6458 */
6459
6460 /* "e" is how far we want to wire in this entry */
6461 e = entry->vme_end;
6462 if (e > end) {
6463 e = end;
6464 }
6465
6466 /*
6467 * If another thread is wiring/unwiring this entry then
6468 * block after informing other thread to wake us up.
6469 */
6470 if (entry->in_transition) {
6471 wait_result_t wait_result;
6472
6473 /*
6474 * We have not clipped the entry. Make sure that
6475 * the start address is in range so that the lookup
6476 * below will succeed.
6477 * "s" is the current starting point: we've already
6478 * wired from "start" to "s" and we still have
6479 * to wire from "s" to "end".
6480 */
6481
6482 entry->needs_wakeup = TRUE;
6483
6484 /*
6485 * wake up anybody waiting on entries that we have
6486 * already wired.
6487 */
6488 if (need_wakeup) {
6489 vm_map_entry_wakeup(map);
6490 need_wakeup = FALSE;
6491 }
6492 /*
6493 * User wiring is interruptible
6494 */
6495 wait_result = vm_map_entry_wait(map,
6496 (user_wire) ? THREAD_ABORTSAFE :
6497 THREAD_UNINT);
6498 if (user_wire && wait_result == THREAD_INTERRUPTED) {
6499 /*
6500 * undo the wirings we have done so far
6501 * We do not clear the needs_wakeup flag,
6502 * because we cannot tell if we were the
6503 * only one waiting.
6504 */
6505 rc = KERN_FAILURE;
6506 goto done;
6507 }
6508
6509 /*
6510 * Cannot avoid a lookup here. reset timestamp.
6511 */
6512 last_timestamp = map->timestamp;
6513
6514 /*
6515 * The entry could have been clipped, look it up again.
6516 * Worse that can happen is, it may not exist anymore.
6517 */
6518 if (!vm_map_lookup_entry(map, s, &first_entry)) {
6519 /*
6520 * User: undo everything upto the previous
6521 * entry. let vm_map_unwire worry about
6522 * checking the validity of the range.
6523 */
6524 rc = KERN_FAILURE;
6525 goto done;
6526 }
6527 entry = first_entry;
6528 continue;
6529 }
6530
6531 if (entry->is_sub_map) {
6532 vm_map_offset_t sub_start;
6533 vm_map_offset_t sub_end;
6534 vm_map_offset_t local_start;
6535 vm_map_offset_t local_end;
6536 pmap_t pmap;
6537
6538 if (wire_and_extract) {
6539 /*
6540 * Wiring would result in copy-on-write
6541 * which would not be compatible with
6542 * the sharing we have with the original
6543 * provider of this memory.
6544 */
6545 rc = KERN_INVALID_ARGUMENT;
6546 goto done;
6547 }
6548
6549 vm_map_clip_start(map, entry, s);
6550 vm_map_clip_end(map, entry, end);
6551
6552 sub_start = VME_OFFSET(entry);
6553 sub_end = entry->vme_end;
6554 sub_end += VME_OFFSET(entry) - entry->vme_start;
6555
6556 local_end = entry->vme_end;
6557 if (map_pmap == NULL) {
6558 vm_object_t object;
6559 vm_object_offset_t offset;
6560 vm_prot_t prot;
6561 boolean_t wired;
6562 vm_map_entry_t local_entry;
6563 vm_map_version_t version;
6564 vm_map_t lookup_map;
6565
6566 if (entry->use_pmap) {
6567 pmap = VME_SUBMAP(entry)->pmap;
6568 /* ppc implementation requires that */
6569 /* submaps pmap address ranges line */
6570 /* up with parent map */
6571 #ifdef notdef
6572 pmap_addr = sub_start;
6573 #endif
6574 pmap_addr = s;
6575 } else {
6576 pmap = map->pmap;
6577 pmap_addr = s;
6578 }
6579
6580 if (entry->wired_count) {
6581 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6582 goto done;
6583 }
6584
6585 /*
6586 * The map was not unlocked:
6587 * no need to goto re-lookup.
6588 * Just go directly to next entry.
6589 */
6590 entry = entry->vme_next;
6591 s = entry->vme_start;
6592 continue;
6593 }
6594
6595 /* call vm_map_lookup_and_lock_object to */
6596 /* cause any needs copy to be */
6597 /* evaluated */
6598 local_start = entry->vme_start;
6599 lookup_map = map;
6600 vm_map_lock_write_to_read(map);
6601 rc = vm_map_lookup_and_lock_object(
6602 &lookup_map, local_start,
6603 (access_type | extra_prots),
6604 OBJECT_LOCK_EXCLUSIVE,
6605 &version, &object,
6606 &offset, &prot, &wired,
6607 NULL,
6608 &real_map, NULL);
6609 if (rc != KERN_SUCCESS) {
6610 vm_map_unlock_read(lookup_map);
6611 assert(map_pmap == NULL);
6612 vm_map_unwire(map, start,
6613 s, user_wire);
6614 return rc;
6615 }
6616 vm_object_unlock(object);
6617 if (real_map != lookup_map) {
6618 vm_map_unlock(real_map);
6619 }
6620 vm_map_unlock_read(lookup_map);
6621 vm_map_lock(map);
6622
6623 /* we unlocked, so must re-lookup */
6624 if (!vm_map_lookup_entry(map,
6625 local_start,
6626 &local_entry)) {
6627 rc = KERN_FAILURE;
6628 goto done;
6629 }
6630
6631 /*
6632 * entry could have been "simplified",
6633 * so re-clip
6634 */
6635 entry = local_entry;
6636 assert(s == local_start);
6637 vm_map_clip_start(map, entry, s);
6638 vm_map_clip_end(map, entry, end);
6639 /* re-compute "e" */
6640 e = entry->vme_end;
6641 if (e > end) {
6642 e = end;
6643 }
6644
6645 /* did we have a change of type? */
6646 if (!entry->is_sub_map) {
6647 last_timestamp = map->timestamp;
6648 continue;
6649 }
6650 } else {
6651 local_start = entry->vme_start;
6652 pmap = map_pmap;
6653 }
6654
6655 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6656 goto done;
6657 }
6658
6659 entry->in_transition = TRUE;
6660
6661 vm_map_unlock(map);
6662 rc = vm_map_wire_nested(VME_SUBMAP(entry),
6663 sub_start, sub_end,
6664 caller_prot, tag,
6665 user_wire, pmap, pmap_addr,
6666 NULL);
6667 vm_map_lock(map);
6668
6669 /*
6670 * Find the entry again. It could have been clipped
6671 * after we unlocked the map.
6672 */
6673 if (!vm_map_lookup_entry(map, local_start,
6674 &first_entry)) {
6675 panic("vm_map_wire: re-lookup failed");
6676 }
6677 entry = first_entry;
6678
6679 assert(local_start == s);
6680 /* re-compute "e" */
6681 e = entry->vme_end;
6682 if (e > end) {
6683 e = end;
6684 }
6685
6686 last_timestamp = map->timestamp;
6687 while ((entry != vm_map_to_entry(map)) &&
6688 (entry->vme_start < e)) {
6689 assert(entry->in_transition);
6690 entry->in_transition = FALSE;
6691 if (entry->needs_wakeup) {
6692 entry->needs_wakeup = FALSE;
6693 need_wakeup = TRUE;
6694 }
6695 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
6696 subtract_wire_counts(map, entry, user_wire);
6697 }
6698 entry = entry->vme_next;
6699 }
6700 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
6701 goto done;
6702 }
6703
6704 /* no need to relookup again */
6705 s = entry->vme_start;
6706 continue;
6707 }
6708
6709 /*
6710 * If this entry is already wired then increment
6711 * the appropriate wire reference count.
6712 */
6713 if (entry->wired_count) {
6714 if ((entry->protection & access_type) != access_type) {
6715 /* found a protection problem */
6716
6717 /*
6718 * XXX FBDP
6719 * We should always return an error
6720 * in this case but since we didn't
6721 * enforce it before, let's do
6722 * it only for the new "wire_and_extract"
6723 * code path for now...
6724 */
6725 if (wire_and_extract) {
6726 rc = KERN_PROTECTION_FAILURE;
6727 goto done;
6728 }
6729 }
6730
6731 /*
6732 * entry is already wired down, get our reference
6733 * after clipping to our range.
6734 */
6735 vm_map_clip_start(map, entry, s);
6736 vm_map_clip_end(map, entry, end);
6737
6738 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6739 goto done;
6740 }
6741
6742 if (wire_and_extract) {
6743 vm_object_t object;
6744 vm_object_offset_t offset;
6745 vm_page_t m;
6746
6747 /*
6748 * We don't have to "wire" the page again
6749 * bit we still have to "extract" its
6750 * physical page number, after some sanity
6751 * checks.
6752 */
6753 assert((entry->vme_end - entry->vme_start)
6754 == PAGE_SIZE);
6755 assert(!entry->needs_copy);
6756 assert(!entry->is_sub_map);
6757 assert(VME_OBJECT(entry));
6758 if (((entry->vme_end - entry->vme_start)
6759 != PAGE_SIZE) ||
6760 entry->needs_copy ||
6761 entry->is_sub_map ||
6762 VME_OBJECT(entry) == VM_OBJECT_NULL) {
6763 rc = KERN_INVALID_ARGUMENT;
6764 goto done;
6765 }
6766
6767 object = VME_OBJECT(entry);
6768 offset = VME_OFFSET(entry);
6769 /* need exclusive lock to update m->dirty */
6770 if (entry->protection & VM_PROT_WRITE) {
6771 vm_object_lock(object);
6772 } else {
6773 vm_object_lock_shared(object);
6774 }
6775 m = vm_page_lookup(object, offset);
6776 assert(m != VM_PAGE_NULL);
6777 assert(VM_PAGE_WIRED(m));
6778 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6779 *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
6780 if (entry->protection & VM_PROT_WRITE) {
6781 vm_object_lock_assert_exclusive(
6782 object);
6783 m->vmp_dirty = TRUE;
6784 }
6785 } else {
6786 /* not already wired !? */
6787 *physpage_p = 0;
6788 }
6789 vm_object_unlock(object);
6790 }
6791
6792 /* map was not unlocked: no need to relookup */
6793 entry = entry->vme_next;
6794 s = entry->vme_start;
6795 continue;
6796 }
6797
6798 /*
6799 * Unwired entry or wire request transmitted via submap
6800 */
6801
6802 /*
6803 * Wiring would copy the pages to the shadow object.
6804 * The shadow object would not be code-signed so
6805 * attempting to execute code from these copied pages
6806 * would trigger a code-signing violation.
6807 */
6808
6809 if ((entry->protection & VM_PROT_EXECUTE)
6810 #if XNU_TARGET_OS_OSX
6811 &&
6812 map->pmap != kernel_pmap &&
6813 (vm_map_cs_enforcement(map)
6814 #if __arm64__
6815 || !VM_MAP_IS_EXOTIC(map)
6816 #endif /* __arm64__ */
6817 )
6818 #endif /* XNU_TARGET_OS_OSX */
6819 ) {
6820 #if MACH_ASSERT
6821 printf("pid %d[%s] wiring executable range from "
6822 "0x%llx to 0x%llx: rejected to preserve "
6823 "code-signing\n",
6824 proc_selfpid(),
6825 (get_bsdtask_info(current_task())
6826 ? proc_name_address(get_bsdtask_info(current_task()))
6827 : "?"),
6828 (uint64_t) entry->vme_start,
6829 (uint64_t) entry->vme_end);
6830 #endif /* MACH_ASSERT */
6831 DTRACE_VM2(cs_executable_wire,
6832 uint64_t, (uint64_t)entry->vme_start,
6833 uint64_t, (uint64_t)entry->vme_end);
6834 cs_executable_wire++;
6835 rc = KERN_PROTECTION_FAILURE;
6836 goto done;
6837 }
6838
6839 /*
6840 * Perform actions of vm_map_lookup that need the write
6841 * lock on the map: create a shadow object for a
6842 * copy-on-write region, or an object for a zero-fill
6843 * region.
6844 */
6845 size = entry->vme_end - entry->vme_start;
6846 /*
6847 * If wiring a copy-on-write page, we need to copy it now
6848 * even if we're only (currently) requesting read access.
6849 * This is aggressive, but once it's wired we can't move it.
6850 */
6851 if (entry->needs_copy) {
6852 if (wire_and_extract) {
6853 /*
6854 * We're supposed to share with the original
6855 * provider so should not be "needs_copy"
6856 */
6857 rc = KERN_INVALID_ARGUMENT;
6858 goto done;
6859 }
6860
6861 VME_OBJECT_SHADOW(entry, size,
6862 vm_map_always_shadow(map));
6863 entry->needs_copy = FALSE;
6864 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
6865 if (wire_and_extract) {
6866 /*
6867 * We're supposed to share with the original
6868 * provider so should already have an object.
6869 */
6870 rc = KERN_INVALID_ARGUMENT;
6871 goto done;
6872 }
6873 VME_OBJECT_SET(entry, vm_object_allocate(size), false, 0);
6874 VME_OFFSET_SET(entry, (vm_object_offset_t)0);
6875 assert(entry->use_pmap);
6876 } else if (VME_OBJECT(entry)->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
6877 if (wire_and_extract) {
6878 /*
6879 * We're supposed to share with the original
6880 * provider so should not be COPY_SYMMETRIC.
6881 */
6882 rc = KERN_INVALID_ARGUMENT;
6883 goto done;
6884 }
6885 /*
6886 * Force an unrequested "copy-on-write" but only for
6887 * the range we're wiring.
6888 */
6889 // printf("FBDP %s:%d map %p entry %p [ 0x%llx 0x%llx ] s 0x%llx end 0x%llx wire&extract=%d\n", __FUNCTION__, __LINE__, map, entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, (uint64_t)s, (uint64_t)end, wire_and_extract);
6890 vm_map_clip_start(map, entry, s);
6891 vm_map_clip_end(map, entry, end);
6892 /* recompute "size" */
6893 size = entry->vme_end - entry->vme_start;
6894 /* make a shadow object */
6895 vm_object_t orig_object;
6896 vm_object_offset_t orig_offset;
6897 orig_object = VME_OBJECT(entry);
6898 orig_offset = VME_OFFSET(entry);
6899 VME_OBJECT_SHADOW(entry, size, vm_map_always_shadow(map));
6900 if (VME_OBJECT(entry) != orig_object) {
6901 /*
6902 * This mapping has not been shared (or it would be
6903 * COPY_DELAY instead of COPY_SYMMETRIC) and it has
6904 * not been copied-on-write (or it would be marked
6905 * as "needs_copy" and would have been handled above
6906 * and also already write-protected).
6907 * We still need to write-protect here to prevent
6908 * other threads from modifying these pages while
6909 * we're in the process of copying and wiring
6910 * the copied pages.
6911 * Since the mapping is neither shared nor COWed,
6912 * we only need to write-protect the PTEs for this
6913 * mapping.
6914 */
6915 vm_object_pmap_protect(orig_object,
6916 orig_offset,
6917 size,
6918 map->pmap,
6919 VM_MAP_PAGE_SIZE(map),
6920 entry->vme_start,
6921 entry->protection & ~VM_PROT_WRITE);
6922 }
6923 }
6924 if (VME_OBJECT(entry)->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
6925 /*
6926 * Make the object COPY_DELAY to get a stable object
6927 * to wire.
6928 * That should avoid creating long shadow chains while
6929 * wiring/unwiring the same range repeatedly.
6930 * That also prevents part of the object from being
6931 * wired while another part is "needs_copy", which
6932 * could result in conflicting rules wrt copy-on-write.
6933 */
6934 vm_object_t object;
6935
6936 object = VME_OBJECT(entry);
6937 vm_object_lock(object);
6938 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
6939 assertf(vm_object_round_page(VME_OFFSET(entry) + size) - vm_object_trunc_page(VME_OFFSET(entry)) == object->vo_size,
6940 "object %p size 0x%llx entry %p [0x%llx:0x%llx:0x%llx] size 0x%llx\n",
6941 object, (uint64_t)object->vo_size,
6942 entry,
6943 (uint64_t)entry->vme_start,
6944 (uint64_t)entry->vme_end,
6945 (uint64_t)VME_OFFSET(entry),
6946 (uint64_t)size);
6947 assertf(object->ref_count == 1,
6948 "object %p ref_count %d\n",
6949 object, object->ref_count);
6950 assertf(!entry->needs_copy,
6951 "entry %p\n", entry);
6952 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
6953 object->true_share = TRUE;
6954 }
6955 vm_object_unlock(object);
6956 }
6957
6958 vm_map_clip_start(map, entry, s);
6959 vm_map_clip_end(map, entry, end);
6960
6961 /* re-compute "e" */
6962 e = entry->vme_end;
6963 if (e > end) {
6964 e = end;
6965 }
6966
6967 /*
6968 * Check for holes and protection mismatch.
6969 * Holes: Next entry should be contiguous unless this
6970 * is the end of the region.
6971 * Protection: Access requested must be allowed, unless
6972 * wiring is by protection class
6973 */
6974 if ((entry->vme_end < end) &&
6975 ((entry->vme_next == vm_map_to_entry(map)) ||
6976 (entry->vme_next->vme_start > entry->vme_end))) {
6977 /* found a hole */
6978 rc = KERN_INVALID_ADDRESS;
6979 goto done;
6980 }
6981 if ((entry->protection & access_type) != access_type) {
6982 /* found a protection problem */
6983 rc = KERN_PROTECTION_FAILURE;
6984 goto done;
6985 }
6986
6987 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
6988
6989 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6990 goto done;
6991 }
6992
6993 entry->in_transition = TRUE;
6994
6995 /*
6996 * This entry might get split once we unlock the map.
6997 * In vm_fault_wire(), we need the current range as
6998 * defined by this entry. In order for this to work
6999 * along with a simultaneous clip operation, we make a
7000 * temporary copy of this entry and use that for the
7001 * wiring. Note that the underlying objects do not
7002 * change during a clip.
7003 */
7004 tmp_entry = *entry;
7005
7006 /*
7007 * The in_transition state guarentees that the entry
7008 * (or entries for this range, if split occured) will be
7009 * there when the map lock is acquired for the second time.
7010 */
7011 vm_map_unlock(map);
7012
7013 if (!user_wire && cur_thread != THREAD_NULL) {
7014 interruptible_state = thread_interrupt_level(THREAD_UNINT);
7015 } else {
7016 interruptible_state = THREAD_UNINT;
7017 }
7018
7019 if (map_pmap) {
7020 rc = vm_fault_wire(map,
7021 &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
7022 physpage_p);
7023 } else {
7024 rc = vm_fault_wire(map,
7025 &tmp_entry, caller_prot, tag, map->pmap,
7026 tmp_entry.vme_start,
7027 physpage_p);
7028 }
7029
7030 if (!user_wire && cur_thread != THREAD_NULL) {
7031 thread_interrupt_level(interruptible_state);
7032 }
7033
7034 vm_map_lock(map);
7035
7036 if (last_timestamp + 1 != map->timestamp) {
7037 /*
7038 * Find the entry again. It could have been clipped
7039 * after we unlocked the map.
7040 */
7041 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7042 &first_entry)) {
7043 panic("vm_map_wire: re-lookup failed");
7044 }
7045
7046 entry = first_entry;
7047 }
7048
7049 last_timestamp = map->timestamp;
7050
7051 while ((entry != vm_map_to_entry(map)) &&
7052 (entry->vme_start < tmp_entry.vme_end)) {
7053 assert(entry->in_transition);
7054 entry->in_transition = FALSE;
7055 if (entry->needs_wakeup) {
7056 entry->needs_wakeup = FALSE;
7057 need_wakeup = TRUE;
7058 }
7059 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
7060 subtract_wire_counts(map, entry, user_wire);
7061 }
7062 entry = entry->vme_next;
7063 }
7064
7065 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
7066 goto done;
7067 }
7068
7069 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
7070 (tmp_entry.vme_end != end) && /* AND, we are not at the end of the requested range */
7071 (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
7072 /* found a "new" hole */
7073 s = tmp_entry.vme_end;
7074 rc = KERN_INVALID_ADDRESS;
7075 goto done;
7076 }
7077
7078 s = entry->vme_start;
7079 } /* end while loop through map entries */
7080
7081 done:
7082 if (rc == KERN_SUCCESS) {
7083 /* repair any damage we may have made to the VM map */
7084 vm_map_simplify_range(map, start, end);
7085 }
7086
7087 vm_map_unlock(map);
7088
7089 /*
7090 * wake up anybody waiting on entries we wired.
7091 */
7092 if (need_wakeup) {
7093 vm_map_entry_wakeup(map);
7094 }
7095
7096 if (rc != KERN_SUCCESS) {
7097 /* undo what has been wired so far */
7098 vm_map_unwire_nested(map, start, s, user_wire,
7099 map_pmap, pmap_addr);
7100 if (physpage_p) {
7101 *physpage_p = 0;
7102 }
7103 }
7104
7105 return rc;
7106 }
7107
7108 kern_return_t
vm_map_wire_external(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t caller_prot,boolean_t user_wire)7109 vm_map_wire_external(
7110 vm_map_t map,
7111 vm_map_offset_t start,
7112 vm_map_offset_t end,
7113 vm_prot_t caller_prot,
7114 boolean_t user_wire)
7115 {
7116 kern_return_t kret;
7117
7118 kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
7119 user_wire, (pmap_t)NULL, 0, NULL);
7120 return kret;
7121 }
7122
7123 kern_return_t
vm_map_wire_kernel(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t caller_prot,vm_tag_t tag,boolean_t user_wire)7124 vm_map_wire_kernel(
7125 vm_map_t map,
7126 vm_map_offset_t start,
7127 vm_map_offset_t end,
7128 vm_prot_t caller_prot,
7129 vm_tag_t tag,
7130 boolean_t user_wire)
7131 {
7132 kern_return_t kret;
7133
7134 kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
7135 user_wire, (pmap_t)NULL, 0, NULL);
7136 return kret;
7137 }
7138
7139 kern_return_t
vm_map_wire_and_extract_external(vm_map_t map,vm_map_offset_t start,vm_prot_t caller_prot,boolean_t user_wire,ppnum_t * physpage_p)7140 vm_map_wire_and_extract_external(
7141 vm_map_t map,
7142 vm_map_offset_t start,
7143 vm_prot_t caller_prot,
7144 boolean_t user_wire,
7145 ppnum_t *physpage_p)
7146 {
7147 kern_return_t kret;
7148
7149 kret = vm_map_wire_nested(map,
7150 start,
7151 start + VM_MAP_PAGE_SIZE(map),
7152 caller_prot,
7153 vm_tag_bt(),
7154 user_wire,
7155 (pmap_t)NULL,
7156 0,
7157 physpage_p);
7158 if (kret != KERN_SUCCESS &&
7159 physpage_p != NULL) {
7160 *physpage_p = 0;
7161 }
7162 return kret;
7163 }
7164
7165 kern_return_t
vm_map_wire_and_extract_kernel(vm_map_t map,vm_map_offset_t start,vm_prot_t caller_prot,vm_tag_t tag,boolean_t user_wire,ppnum_t * physpage_p)7166 vm_map_wire_and_extract_kernel(
7167 vm_map_t map,
7168 vm_map_offset_t start,
7169 vm_prot_t caller_prot,
7170 vm_tag_t tag,
7171 boolean_t user_wire,
7172 ppnum_t *physpage_p)
7173 {
7174 kern_return_t kret;
7175
7176 kret = vm_map_wire_nested(map,
7177 start,
7178 start + VM_MAP_PAGE_SIZE(map),
7179 caller_prot,
7180 tag,
7181 user_wire,
7182 (pmap_t)NULL,
7183 0,
7184 physpage_p);
7185 if (kret != KERN_SUCCESS &&
7186 physpage_p != NULL) {
7187 *physpage_p = 0;
7188 }
7189 return kret;
7190 }
7191
7192 /*
7193 * vm_map_unwire:
7194 *
7195 * Sets the pageability of the specified address range in the target
7196 * as pageable. Regions specified must have been wired previously.
7197 *
7198 * The map must not be locked, but a reference must remain to the map
7199 * throughout the call.
7200 *
7201 * Kernel will panic on failures. User unwire ignores holes and
7202 * unwired and intransition entries to avoid losing memory by leaving
7203 * it unwired.
7204 */
7205 static kern_return_t
vm_map_unwire_nested(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,boolean_t user_wire,pmap_t map_pmap,vm_map_offset_t pmap_addr)7206 vm_map_unwire_nested(
7207 vm_map_t map,
7208 vm_map_offset_t start,
7209 vm_map_offset_t end,
7210 boolean_t user_wire,
7211 pmap_t map_pmap,
7212 vm_map_offset_t pmap_addr)
7213 {
7214 vm_map_entry_t entry;
7215 struct vm_map_entry *first_entry, tmp_entry;
7216 boolean_t need_wakeup;
7217 boolean_t main_map = FALSE;
7218 unsigned int last_timestamp;
7219
7220 vm_map_lock(map);
7221 if (map_pmap == NULL) {
7222 main_map = TRUE;
7223 }
7224 last_timestamp = map->timestamp;
7225
7226 VM_MAP_RANGE_CHECK(map, start, end);
7227 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
7228 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
7229
7230 if (start == end) {
7231 /* We unwired what the caller asked for: zero pages */
7232 vm_map_unlock(map);
7233 return KERN_SUCCESS;
7234 }
7235
7236 if (vm_map_lookup_entry(map, start, &first_entry)) {
7237 entry = first_entry;
7238 /*
7239 * vm_map_clip_start will be done later.
7240 * We don't want to unnest any nested sub maps here !
7241 */
7242 } else {
7243 if (!user_wire) {
7244 panic("vm_map_unwire: start not found");
7245 }
7246 /* Start address is not in map. */
7247 vm_map_unlock(map);
7248 return KERN_INVALID_ADDRESS;
7249 }
7250
7251 if (entry->superpage_size) {
7252 /* superpages are always wired */
7253 vm_map_unlock(map);
7254 return KERN_INVALID_ADDRESS;
7255 }
7256
7257 need_wakeup = FALSE;
7258 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
7259 if (entry->in_transition) {
7260 /*
7261 * 1)
7262 * Another thread is wiring down this entry. Note
7263 * that if it is not for the other thread we would
7264 * be unwiring an unwired entry. This is not
7265 * permitted. If we wait, we will be unwiring memory
7266 * we did not wire.
7267 *
7268 * 2)
7269 * Another thread is unwiring this entry. We did not
7270 * have a reference to it, because if we did, this
7271 * entry will not be getting unwired now.
7272 */
7273 if (!user_wire) {
7274 /*
7275 * XXX FBDP
7276 * This could happen: there could be some
7277 * overlapping vslock/vsunlock operations
7278 * going on.
7279 * We should probably just wait and retry,
7280 * but then we have to be careful that this
7281 * entry could get "simplified" after
7282 * "in_transition" gets unset and before
7283 * we re-lookup the entry, so we would
7284 * have to re-clip the entry to avoid
7285 * re-unwiring what we have already unwired...
7286 * See vm_map_wire_nested().
7287 *
7288 * Or we could just ignore "in_transition"
7289 * here and proceed to decement the wired
7290 * count(s) on this entry. That should be fine
7291 * as long as "wired_count" doesn't drop all
7292 * the way to 0 (and we should panic if THAT
7293 * happens).
7294 */
7295 panic("vm_map_unwire: in_transition entry");
7296 }
7297
7298 entry = entry->vme_next;
7299 continue;
7300 }
7301
7302 if (entry->is_sub_map) {
7303 vm_map_offset_t sub_start;
7304 vm_map_offset_t sub_end;
7305 vm_map_offset_t local_end;
7306 pmap_t pmap;
7307
7308 vm_map_clip_start(map, entry, start);
7309 vm_map_clip_end(map, entry, end);
7310
7311 sub_start = VME_OFFSET(entry);
7312 sub_end = entry->vme_end - entry->vme_start;
7313 sub_end += VME_OFFSET(entry);
7314 local_end = entry->vme_end;
7315 if (map_pmap == NULL) {
7316 if (entry->use_pmap) {
7317 pmap = VME_SUBMAP(entry)->pmap;
7318 pmap_addr = sub_start;
7319 } else {
7320 pmap = map->pmap;
7321 pmap_addr = start;
7322 }
7323 if (entry->wired_count == 0 ||
7324 (user_wire && entry->user_wired_count == 0)) {
7325 if (!user_wire) {
7326 panic("vm_map_unwire: entry is unwired");
7327 }
7328 entry = entry->vme_next;
7329 continue;
7330 }
7331
7332 /*
7333 * Check for holes
7334 * Holes: Next entry should be contiguous unless
7335 * this is the end of the region.
7336 */
7337 if (((entry->vme_end < end) &&
7338 ((entry->vme_next == vm_map_to_entry(map)) ||
7339 (entry->vme_next->vme_start
7340 > entry->vme_end)))) {
7341 if (!user_wire) {
7342 panic("vm_map_unwire: non-contiguous region");
7343 }
7344 /*
7345 * entry = entry->vme_next;
7346 * continue;
7347 */
7348 }
7349
7350 subtract_wire_counts(map, entry, user_wire);
7351
7352 if (entry->wired_count != 0) {
7353 entry = entry->vme_next;
7354 continue;
7355 }
7356
7357 entry->in_transition = TRUE;
7358 tmp_entry = *entry;/* see comment in vm_map_wire() */
7359
7360 /*
7361 * We can unlock the map now. The in_transition state
7362 * guarantees existance of the entry.
7363 */
7364 vm_map_unlock(map);
7365 vm_map_unwire_nested(VME_SUBMAP(entry),
7366 sub_start, sub_end, user_wire, pmap, pmap_addr);
7367 vm_map_lock(map);
7368
7369 if (last_timestamp + 1 != map->timestamp) {
7370 /*
7371 * Find the entry again. It could have been
7372 * clipped or deleted after we unlocked the map.
7373 */
7374 if (!vm_map_lookup_entry(map,
7375 tmp_entry.vme_start,
7376 &first_entry)) {
7377 if (!user_wire) {
7378 panic("vm_map_unwire: re-lookup failed");
7379 }
7380 entry = first_entry->vme_next;
7381 } else {
7382 entry = first_entry;
7383 }
7384 }
7385 last_timestamp = map->timestamp;
7386
7387 /*
7388 * clear transition bit for all constituent entries
7389 * that were in the original entry (saved in
7390 * tmp_entry). Also check for waiters.
7391 */
7392 while ((entry != vm_map_to_entry(map)) &&
7393 (entry->vme_start < tmp_entry.vme_end)) {
7394 assert(entry->in_transition);
7395 entry->in_transition = FALSE;
7396 if (entry->needs_wakeup) {
7397 entry->needs_wakeup = FALSE;
7398 need_wakeup = TRUE;
7399 }
7400 entry = entry->vme_next;
7401 }
7402 continue;
7403 } else {
7404 tmp_entry = *entry;
7405 vm_map_unlock(map);
7406 vm_map_unwire_nested(VME_SUBMAP(entry),
7407 sub_start, sub_end, user_wire, map_pmap,
7408 pmap_addr);
7409 vm_map_lock(map);
7410
7411 if (last_timestamp + 1 != map->timestamp) {
7412 /*
7413 * Find the entry again. It could have been
7414 * clipped or deleted after we unlocked the map.
7415 */
7416 if (!vm_map_lookup_entry(map,
7417 tmp_entry.vme_start,
7418 &first_entry)) {
7419 if (!user_wire) {
7420 panic("vm_map_unwire: re-lookup failed");
7421 }
7422 entry = first_entry->vme_next;
7423 } else {
7424 entry = first_entry;
7425 }
7426 }
7427 last_timestamp = map->timestamp;
7428 }
7429 }
7430
7431
7432 if ((entry->wired_count == 0) ||
7433 (user_wire && entry->user_wired_count == 0)) {
7434 if (!user_wire) {
7435 panic("vm_map_unwire: entry is unwired");
7436 }
7437
7438 entry = entry->vme_next;
7439 continue;
7440 }
7441
7442 assert(entry->wired_count > 0 &&
7443 (!user_wire || entry->user_wired_count > 0));
7444
7445 vm_map_clip_start(map, entry, start);
7446 vm_map_clip_end(map, entry, end);
7447
7448 /*
7449 * Check for holes
7450 * Holes: Next entry should be contiguous unless
7451 * this is the end of the region.
7452 */
7453 if (((entry->vme_end < end) &&
7454 ((entry->vme_next == vm_map_to_entry(map)) ||
7455 (entry->vme_next->vme_start > entry->vme_end)))) {
7456 if (!user_wire) {
7457 panic("vm_map_unwire: non-contiguous region");
7458 }
7459 entry = entry->vme_next;
7460 continue;
7461 }
7462
7463 subtract_wire_counts(map, entry, user_wire);
7464
7465 if (entry->wired_count != 0) {
7466 entry = entry->vme_next;
7467 continue;
7468 }
7469
7470 if (entry->zero_wired_pages) {
7471 entry->zero_wired_pages = FALSE;
7472 }
7473
7474 entry->in_transition = TRUE;
7475 tmp_entry = *entry; /* see comment in vm_map_wire() */
7476
7477 /*
7478 * We can unlock the map now. The in_transition state
7479 * guarantees existance of the entry.
7480 */
7481 vm_map_unlock(map);
7482 if (map_pmap) {
7483 vm_fault_unwire(map,
7484 &tmp_entry, FALSE, map_pmap, pmap_addr);
7485 } else {
7486 vm_fault_unwire(map,
7487 &tmp_entry, FALSE, map->pmap,
7488 tmp_entry.vme_start);
7489 }
7490 vm_map_lock(map);
7491
7492 if (last_timestamp + 1 != map->timestamp) {
7493 /*
7494 * Find the entry again. It could have been clipped
7495 * or deleted after we unlocked the map.
7496 */
7497 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7498 &first_entry)) {
7499 if (!user_wire) {
7500 panic("vm_map_unwire: re-lookup failed");
7501 }
7502 entry = first_entry->vme_next;
7503 } else {
7504 entry = first_entry;
7505 }
7506 }
7507 last_timestamp = map->timestamp;
7508
7509 /*
7510 * clear transition bit for all constituent entries that
7511 * were in the original entry (saved in tmp_entry). Also
7512 * check for waiters.
7513 */
7514 while ((entry != vm_map_to_entry(map)) &&
7515 (entry->vme_start < tmp_entry.vme_end)) {
7516 assert(entry->in_transition);
7517 entry->in_transition = FALSE;
7518 if (entry->needs_wakeup) {
7519 entry->needs_wakeup = FALSE;
7520 need_wakeup = TRUE;
7521 }
7522 entry = entry->vme_next;
7523 }
7524 }
7525
7526 /*
7527 * We might have fragmented the address space when we wired this
7528 * range of addresses. Attempt to re-coalesce these VM map entries
7529 * with their neighbors now that they're no longer wired.
7530 * Under some circumstances, address space fragmentation can
7531 * prevent VM object shadow chain collapsing, which can cause
7532 * swap space leaks.
7533 */
7534 vm_map_simplify_range(map, start, end);
7535
7536 vm_map_unlock(map);
7537 /*
7538 * wake up anybody waiting on entries that we have unwired.
7539 */
7540 if (need_wakeup) {
7541 vm_map_entry_wakeup(map);
7542 }
7543 return KERN_SUCCESS;
7544 }
7545
7546 kern_return_t
vm_map_unwire(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,boolean_t user_wire)7547 vm_map_unwire(
7548 vm_map_t map,
7549 vm_map_offset_t start,
7550 vm_map_offset_t end,
7551 boolean_t user_wire)
7552 {
7553 return vm_map_unwire_nested(map, start, end,
7554 user_wire, (pmap_t)NULL, 0);
7555 }
7556
7557
7558 /*
7559 * vm_map_entry_zap: [ internal use only ]
7560 *
7561 * Remove the entry from the target map
7562 * and put it on a zap list.
7563 */
7564 static void
vm_map_entry_zap(vm_map_t map,vm_map_entry_t entry,vm_map_zap_t zap)7565 vm_map_entry_zap(
7566 vm_map_t map,
7567 vm_map_entry_t entry,
7568 vm_map_zap_t zap)
7569 {
7570 vm_map_offset_t s, e;
7571
7572 s = entry->vme_start;
7573 e = entry->vme_end;
7574 assert(VM_MAP_PAGE_ALIGNED(s, FOURK_PAGE_MASK));
7575 assert(VM_MAP_PAGE_ALIGNED(e, FOURK_PAGE_MASK));
7576 if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK) {
7577 assert(page_aligned(s));
7578 assert(page_aligned(e));
7579 }
7580 if (entry->map_aligned == TRUE) {
7581 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
7582 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
7583 }
7584 assert(entry->wired_count == 0);
7585 assert(entry->user_wired_count == 0);
7586 assert(!entry->vme_permanent);
7587
7588 vm_map_store_entry_unlink(map, entry, false);
7589 map->size -= e - s;
7590
7591 vm_map_zap_append(zap, entry);
7592 }
7593
7594 static void
vm_map_submap_pmap_clean(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_map_t sub_map,vm_map_offset_t offset)7595 vm_map_submap_pmap_clean(
7596 vm_map_t map,
7597 vm_map_offset_t start,
7598 vm_map_offset_t end,
7599 vm_map_t sub_map,
7600 vm_map_offset_t offset)
7601 {
7602 vm_map_offset_t submap_start;
7603 vm_map_offset_t submap_end;
7604 vm_map_size_t remove_size;
7605 vm_map_entry_t entry;
7606
7607 submap_end = offset + (end - start);
7608 submap_start = offset;
7609
7610 vm_map_lock_read(sub_map);
7611 if (vm_map_lookup_entry(sub_map, offset, &entry)) {
7612 remove_size = (entry->vme_end - entry->vme_start);
7613 if (offset > entry->vme_start) {
7614 remove_size -= offset - entry->vme_start;
7615 }
7616
7617
7618 if (submap_end < entry->vme_end) {
7619 remove_size -=
7620 entry->vme_end - submap_end;
7621 }
7622 if (entry->is_sub_map) {
7623 vm_map_submap_pmap_clean(
7624 sub_map,
7625 start,
7626 start + remove_size,
7627 VME_SUBMAP(entry),
7628 VME_OFFSET(entry));
7629 } else {
7630 if (map->mapped_in_other_pmaps &&
7631 os_ref_get_count_raw(&map->map_refcnt) != 0 &&
7632 VME_OBJECT(entry) != NULL) {
7633 vm_object_pmap_protect_options(
7634 VME_OBJECT(entry),
7635 (VME_OFFSET(entry) +
7636 offset -
7637 entry->vme_start),
7638 remove_size,
7639 PMAP_NULL,
7640 PAGE_SIZE,
7641 entry->vme_start,
7642 VM_PROT_NONE,
7643 PMAP_OPTIONS_REMOVE);
7644 } else {
7645 pmap_remove(map->pmap,
7646 (addr64_t)start,
7647 (addr64_t)(start + remove_size));
7648 }
7649 }
7650 }
7651
7652 entry = entry->vme_next;
7653
7654 while ((entry != vm_map_to_entry(sub_map))
7655 && (entry->vme_start < submap_end)) {
7656 remove_size = (entry->vme_end - entry->vme_start);
7657 if (submap_end < entry->vme_end) {
7658 remove_size -= entry->vme_end - submap_end;
7659 }
7660 if (entry->is_sub_map) {
7661 vm_map_submap_pmap_clean(
7662 sub_map,
7663 (start + entry->vme_start) - offset,
7664 ((start + entry->vme_start) - offset) + remove_size,
7665 VME_SUBMAP(entry),
7666 VME_OFFSET(entry));
7667 } else {
7668 if (map->mapped_in_other_pmaps &&
7669 os_ref_get_count_raw(&map->map_refcnt) != 0 &&
7670 VME_OBJECT(entry) != NULL) {
7671 vm_object_pmap_protect_options(
7672 VME_OBJECT(entry),
7673 VME_OFFSET(entry),
7674 remove_size,
7675 PMAP_NULL,
7676 PAGE_SIZE,
7677 entry->vme_start,
7678 VM_PROT_NONE,
7679 PMAP_OPTIONS_REMOVE);
7680 } else {
7681 pmap_remove(map->pmap,
7682 (addr64_t)((start + entry->vme_start)
7683 - offset),
7684 (addr64_t)(((start + entry->vme_start)
7685 - offset) + remove_size));
7686 }
7687 }
7688 entry = entry->vme_next;
7689 }
7690 vm_map_unlock_read(sub_map);
7691 return;
7692 }
7693
7694 /*
7695 * virt_memory_guard_ast:
7696 *
7697 * Handle the AST callout for a virtual memory guard.
7698 * raise an EXC_GUARD exception and terminate the task
7699 * if configured to do so.
7700 */
7701 void
virt_memory_guard_ast(thread_t thread,mach_exception_data_type_t code,mach_exception_data_type_t subcode)7702 virt_memory_guard_ast(
7703 thread_t thread,
7704 mach_exception_data_type_t code,
7705 mach_exception_data_type_t subcode)
7706 {
7707 task_t task = get_threadtask(thread);
7708 assert(task != kernel_task);
7709 assert(task == current_task());
7710 kern_return_t sync_exception_result;
7711 uint32_t behavior;
7712
7713 behavior = task->task_exc_guard;
7714
7715 /* Is delivery enabled */
7716 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7717 return;
7718 }
7719
7720 /* If only once, make sure we're that once */
7721 while (behavior & TASK_EXC_GUARD_VM_ONCE) {
7722 uint32_t new_behavior = behavior & ~TASK_EXC_GUARD_VM_DELIVER;
7723
7724 if (OSCompareAndSwap(behavior, new_behavior, &task->task_exc_guard)) {
7725 break;
7726 }
7727 behavior = task->task_exc_guard;
7728 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7729 return;
7730 }
7731 }
7732
7733 /* Raise exception synchronously and see if handler claimed it */
7734 sync_exception_result = task_exception_notify(EXC_GUARD, code, subcode);
7735
7736 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7737 /*
7738 * If Synchronous EXC_GUARD delivery was successful then
7739 * kill the process and return, else kill the process
7740 * and deliver the exception via EXC_CORPSE_NOTIFY.
7741 */
7742 if (sync_exception_result == KERN_SUCCESS) {
7743 task_bsdtask_kill(current_task());
7744 } else {
7745 exit_with_guard_exception(current_proc(), code, subcode);
7746 }
7747 } else if (task->task_exc_guard & TASK_EXC_GUARD_VM_CORPSE) {
7748 /*
7749 * If the synchronous EXC_GUARD delivery was not successful,
7750 * raise a simulated crash.
7751 */
7752 if (sync_exception_result != KERN_SUCCESS) {
7753 task_violated_guard(code, subcode, NULL, FALSE);
7754 }
7755 }
7756 }
7757
7758 /*
7759 * vm_map_guard_exception:
7760 *
7761 * Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7762 *
7763 * Right now, we do this when we find nothing mapped, or a
7764 * gap in the mapping when a user address space deallocate
7765 * was requested. We report the address of the first gap found.
7766 */
7767 static void
vm_map_guard_exception(vm_map_offset_t gap_start,unsigned reason)7768 vm_map_guard_exception(
7769 vm_map_offset_t gap_start,
7770 unsigned reason)
7771 {
7772 mach_exception_code_t code = 0;
7773 unsigned int guard_type = GUARD_TYPE_VIRT_MEMORY;
7774 unsigned int target = 0; /* should we pass in pid associated with map? */
7775 mach_exception_data_type_t subcode = (uint64_t)gap_start;
7776 boolean_t fatal = FALSE;
7777
7778 task_t task = current_task_early();
7779
7780 /* Can't deliver exceptions to a NULL task (early boot) or kernel task */
7781 if (task == NULL || task == kernel_task) {
7782 return;
7783 }
7784
7785 EXC_GUARD_ENCODE_TYPE(code, guard_type);
7786 EXC_GUARD_ENCODE_FLAVOR(code, reason);
7787 EXC_GUARD_ENCODE_TARGET(code, target);
7788
7789 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7790 fatal = TRUE;
7791 }
7792 thread_guard_violation(current_thread(), code, subcode, fatal);
7793 }
7794
7795 static kern_return_t
vm_map_delete_submap_recurse(vm_map_t submap,vm_map_offset_t submap_start,vm_map_offset_t submap_end)7796 vm_map_delete_submap_recurse(
7797 vm_map_t submap,
7798 vm_map_offset_t submap_start,
7799 vm_map_offset_t submap_end)
7800 {
7801 vm_map_entry_t submap_entry;
7802
7803 /*
7804 * Verify that the submap does not contain any "permanent" entries
7805 * within the specified range.
7806 * We do not care about gaps.
7807 */
7808
7809 vm_map_lock(submap);
7810
7811 if (!vm_map_lookup_entry(submap, submap_start, &submap_entry)) {
7812 submap_entry = submap_entry->vme_next;
7813 }
7814
7815 for (;
7816 submap_entry != vm_map_to_entry(submap) &&
7817 submap_entry->vme_start < submap_end;
7818 submap_entry = submap_entry->vme_next) {
7819 if (submap_entry->vme_permanent) {
7820 /* "permanent" entry -> fail */
7821 vm_map_unlock(submap);
7822 return KERN_PROTECTION_FAILURE;
7823 }
7824 }
7825 /* no "permanent" entries in the range -> success */
7826 vm_map_unlock(submap);
7827 return KERN_SUCCESS;
7828 }
7829
7830 __abortlike
7831 static void
__vm_map_delete_misaligned_panic(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)7832 __vm_map_delete_misaligned_panic(
7833 vm_map_t map,
7834 vm_map_offset_t start,
7835 vm_map_offset_t end)
7836 {
7837 panic("vm_map_delete(%p,0x%llx,0x%llx): start is not aligned to 0x%x",
7838 map, (uint64_t)start, (uint64_t)end, VM_MAP_PAGE_SIZE(map));
7839 }
7840
7841 __abortlike
7842 static void
__vm_map_delete_failed_panic(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,kern_return_t kr)7843 __vm_map_delete_failed_panic(
7844 vm_map_t map,
7845 vm_map_offset_t start,
7846 vm_map_offset_t end,
7847 kern_return_t kr)
7848 {
7849 panic("vm_map_delete(%p,0x%llx,0x%llx): failed unexpected with %d",
7850 map, (uint64_t)start, (uint64_t)end, kr);
7851 }
7852
7853 __abortlike
7854 static void
__vm_map_delete_gap_panic(vm_map_t map,vm_map_offset_t where,vm_map_offset_t start,vm_map_offset_t end)7855 __vm_map_delete_gap_panic(
7856 vm_map_t map,
7857 vm_map_offset_t where,
7858 vm_map_offset_t start,
7859 vm_map_offset_t end)
7860 {
7861 panic("vm_map_delete(%p,0x%llx,0x%llx): no map entry at 0x%llx",
7862 map, (uint64_t)start, (uint64_t)end, (uint64_t)where);
7863 }
7864
7865 __abortlike
7866 static void
__vm_map_delete_permanent_panic(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_map_entry_t entry)7867 __vm_map_delete_permanent_panic(
7868 vm_map_t map,
7869 vm_map_offset_t start,
7870 vm_map_offset_t end,
7871 vm_map_entry_t entry)
7872 {
7873 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7874 "Attempting to remove permanent VM map entry %p [0x%llx:0x%llx]",
7875 map, (uint64_t)start, (uint64_t)end, entry,
7876 (uint64_t)entry->vme_start,
7877 (uint64_t)entry->vme_end);
7878 }
7879
7880 __options_decl(vm_map_delete_state_t, uint32_t, {
7881 VMDS_NONE = 0x0000,
7882
7883 VMDS_FOUND_GAP = 0x0001,
7884 VMDS_GAPS_OK = 0x0002,
7885
7886 VMDS_KERNEL_PMAP = 0x0004,
7887 VMDS_NEEDS_LOOKUP = 0x0008,
7888 VMDS_NEEDS_WAKEUP = 0x0010,
7889 });
7890
7891 /*
7892 * vm_map_delete: [ internal use only ]
7893 *
7894 * Deallocates the given address range from the target map.
7895 * Removes all user wirings. Unwires one kernel wiring if
7896 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
7897 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
7898 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7899 *
7900 *
7901 * When the map is a kernel map, then any error in removing mappings
7902 * will lead to a panic so that clients do not have to repeat the panic
7903 * code at each call site. If VM_MAP_REMOVE_INTERRUPTIBLE
7904 * is also passed, then KERN_ABORTED will not lead to a panic.
7905 *
7906 * This routine is called with map locked and leaves map locked.
7907 */
7908 static kmem_return_t
vm_map_delete(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vmr_flags_t flags,kmem_guard_t guard,vm_map_zap_t zap_list)7909 vm_map_delete(
7910 vm_map_t map,
7911 vm_map_offset_t start,
7912 vm_map_offset_t end,
7913 vmr_flags_t flags,
7914 kmem_guard_t guard,
7915 vm_map_zap_t zap_list)
7916 {
7917 vm_map_entry_t entry, next;
7918 int interruptible;
7919 vm_map_offset_t gap_start = 0;
7920 vm_map_offset_t clear_in_transition_end = 0;
7921 __unused vm_map_offset_t save_start = start;
7922 __unused vm_map_offset_t save_end = end;
7923 vm_map_delete_state_t state = VMDS_NONE;
7924 kmem_return_t ret = { };
7925
7926 if (vm_map_pmap(map) == kernel_pmap) {
7927 state |= VMDS_KERNEL_PMAP;
7928 }
7929
7930 if (map->terminated || os_ref_get_count_raw(&map->map_refcnt) == 0) {
7931 state |= VMDS_GAPS_OK;
7932 }
7933
7934 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
7935 THREAD_ABORTSAFE : THREAD_UNINT;
7936
7937 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) == 0 &&
7938 (start & VM_MAP_PAGE_MASK(map))) {
7939 __vm_map_delete_misaligned_panic(map, start, end);
7940 }
7941
7942 if ((state & VMDS_GAPS_OK) == 0) {
7943 /*
7944 * If the map isn't terminated then all deletions must have
7945 * no gaps, and be within the [min, max) of the map.
7946 *
7947 * We got here without VM_MAP_RANGE_CHECK() being called,
7948 * and hence must validate bounds manually.
7949 *
7950 * It is worth noting that because vm_deallocate() will
7951 * round_page() the deallocation size, it's possible for "end"
7952 * to be 0 here due to overflow. We hence must treat it as being
7953 * beyond vm_map_max(map).
7954 *
7955 * Similarly, end < start means some wrap around happend,
7956 * which should cause an error or panic.
7957 */
7958 if (end == 0 || end > vm_map_max(map)) {
7959 state |= VMDS_FOUND_GAP;
7960 gap_start = vm_map_max(map);
7961 if (state & VMDS_KERNEL_PMAP) {
7962 __vm_map_delete_gap_panic(map,
7963 gap_start, start, end);
7964 }
7965 goto out;
7966 }
7967
7968 if (end < start) {
7969 if (state & VMDS_KERNEL_PMAP) {
7970 __vm_map_delete_gap_panic(map,
7971 vm_map_max(map), start, end);
7972 }
7973 ret.kmr_return = KERN_INVALID_ARGUMENT;
7974 goto out;
7975 }
7976
7977 if (start < vm_map_min(map)) {
7978 state |= VMDS_FOUND_GAP;
7979 gap_start = start;
7980 if (state & VMDS_KERNEL_PMAP) {
7981 __vm_map_delete_gap_panic(map,
7982 gap_start, start, end);
7983 }
7984 goto out;
7985 }
7986 } else {
7987 /*
7988 * If the map is terminated, we must accept start/end
7989 * being beyond the boundaries of the map as this is
7990 * how some of the mappings like commpage mappings
7991 * can be destroyed (they're outside of those bounds).
7992 *
7993 * end < start is still something we can't cope with,
7994 * so just bail.
7995 */
7996 if (end < start) {
7997 goto out;
7998 }
7999 }
8000
8001
8002 /*
8003 * Find the start of the region.
8004 *
8005 * If in a superpage, extend the range
8006 * to include the start of the mapping.
8007 */
8008 while (vm_map_lookup_entry_or_next(map, start, &entry)) {
8009 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) {
8010 start = SUPERPAGE_ROUND_DOWN(start);
8011 } else {
8012 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8013 break;
8014 }
8015 }
8016
8017 if (entry->superpage_size) {
8018 end = SUPERPAGE_ROUND_UP(end);
8019 }
8020
8021 /*
8022 * Step through all entries in this region
8023 */
8024 for (vm_map_offset_t s = start; s < end;) {
8025 /*
8026 * At this point, we have deleted all the memory entries
8027 * in [start, s) and are proceeding with the [s, end) range.
8028 *
8029 * This loop might drop the map lock, and it is possible that
8030 * some memory was already reallocated within [start, s)
8031 * and we don't want to mess with those entries.
8032 *
8033 * Some of those entries could even have been re-assembled
8034 * with an entry after "s" (in vm_map_simplify_entry()), so
8035 * we may have to vm_map_clip_start() again.
8036 *
8037 * When clear_in_transition_end is set, the we had marked
8038 * [start, clear_in_transition_end) as "in_transition"
8039 * during a previous iteration and we need to clear it.
8040 */
8041
8042 /*
8043 * Step 1: If needed (because we dropped locks),
8044 * lookup the entry again.
8045 *
8046 * If we're coming back from unwiring (Step 5),
8047 * we also need to mark the entries as no longer
8048 * in transition after that.
8049 */
8050
8051 if (state & VMDS_NEEDS_LOOKUP) {
8052 state &= ~VMDS_NEEDS_LOOKUP;
8053
8054 if (vm_map_lookup_entry_or_next(map, s, &entry)) {
8055 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8056 }
8057 }
8058
8059 if (clear_in_transition_end) {
8060 for (vm_map_entry_t it = entry;
8061 it != vm_map_to_entry(map) &&
8062 it->vme_start < clear_in_transition_end;
8063 it = it->vme_next) {
8064 assert(it->in_transition);
8065 it->in_transition = FALSE;
8066 if (it->needs_wakeup) {
8067 it->needs_wakeup = FALSE;
8068 state |= VMDS_NEEDS_WAKEUP;
8069 }
8070 }
8071
8072 clear_in_transition_end = 0;
8073 }
8074
8075
8076 /*
8077 * Step 2: Perform various policy checks
8078 * before we do _anything_ to this entry.
8079 */
8080
8081 if (entry == vm_map_to_entry(map) || s < entry->vme_start) {
8082 if (state & (VMDS_GAPS_OK | VMDS_FOUND_GAP)) {
8083 /*
8084 * Either we found a gap already,
8085 * or we are tearing down a map,
8086 * keep going.
8087 */
8088 } else if (state & VMDS_KERNEL_PMAP) {
8089 __vm_map_delete_gap_panic(map, s, start, end);
8090 } else if (vm_map_round_page(s, VM_MAP_PAGE_MASK(map)) < end) {
8091 /*
8092 * The vm_map_round_page() is needed since an entry
8093 * can be less than VM_MAP_PAGE_MASK() sized.
8094 *
8095 * For example, devices which have h/w 4K pages,
8096 * but entry sizes are all now 16K.
8097 */
8098 state |= VMDS_FOUND_GAP;
8099 gap_start = s;
8100 }
8101
8102 if (entry == vm_map_to_entry(map) ||
8103 end <= entry->vme_start) {
8104 break;
8105 }
8106
8107 s = entry->vme_start;
8108 }
8109
8110 if (state & VMDS_KERNEL_PMAP) {
8111 /*
8112 * In the kernel map and its submaps,
8113 * permanent entries never die, even
8114 * if VM_MAP_REMOVE_IMMUTABLE is passed.
8115 */
8116 if (entry->vme_permanent) {
8117 __vm_map_delete_permanent_panic(map, start, end, entry);
8118 }
8119
8120 if (flags & VM_MAP_REMOVE_GUESS_SIZE) {
8121 end = entry->vme_end;
8122 flags &= ~VM_MAP_REMOVE_GUESS_SIZE;
8123 }
8124
8125 /*
8126 * In the kernel map and its submaps,
8127 * the removal of an atomic/guarded entry is strict.
8128 *
8129 * An atomic entry is processed only if it was
8130 * specifically targeted.
8131 *
8132 * We might have deleted non-atomic entries before
8133 * we reach this this point however...
8134 */
8135 kmem_entry_validate_guard(map, entry,
8136 start, end - start, guard);
8137 }
8138
8139 /*
8140 * Step 2.1: handle "permanent" and "submap" entries
8141 * *before* clipping to avoid triggering some unnecessary
8142 * un-nesting of the shared region.
8143 */
8144 if (entry->vme_permanent && entry->is_sub_map) {
8145 // printf("FBDP %s:%d permanent submap...\n", __FUNCTION__, __LINE__);
8146 /*
8147 * Un-mapping a "permanent" mapping of a user-space
8148 * submap is not allowed unless...
8149 */
8150 if (flags & VM_MAP_REMOVE_IMMUTABLE) {
8151 /*
8152 * a. explicitly requested by the kernel caller.
8153 */
8154 // printf("FBDP %s:%d flags & REMOVE_IMMUTABLE\n", __FUNCTION__, __LINE__);
8155 } else if ((flags & VM_MAP_REMOVE_IMMUTABLE_CODE) &&
8156 developer_mode_state()) {
8157 /*
8158 * b. we're in "developer" mode (for
8159 * breakpoints, dtrace probes, ...).
8160 */
8161 // printf("FBDP %s:%d flags & REMOVE_IMMUTABLE_CODE\n", __FUNCTION__, __LINE__);
8162 } else if (map->terminated) {
8163 /*
8164 * c. this is the final address space cleanup.
8165 */
8166 // printf("FBDP %s:%d map->terminated\n", __FUNCTION__, __LINE__);
8167 } else {
8168 vm_map_offset_t submap_start, submap_end;
8169 kern_return_t submap_kr;
8170
8171 /*
8172 * Check if there are any "permanent" mappings
8173 * in this range in the submap.
8174 */
8175 if (entry->in_transition) {
8176 /* can that even happen ? */
8177 goto in_transition;
8178 }
8179 /* compute the clipped range in the submap */
8180 submap_start = s - entry->vme_start;
8181 submap_start += VME_OFFSET(entry);
8182 submap_end = end - entry->vme_start;
8183 submap_end += VME_OFFSET(entry);
8184 submap_kr = vm_map_delete_submap_recurse(
8185 VME_SUBMAP(entry),
8186 submap_start,
8187 submap_end);
8188 if (submap_kr != KERN_SUCCESS) {
8189 /*
8190 * There are some "permanent" mappings
8191 * in the submap: we are not allowed
8192 * to remove this range.
8193 */
8194 printf("%d[%s] removing permanent submap entry "
8195 "%p [0x%llx:0x%llx] prot 0x%x/0x%x -> KERN_PROT_FAILURE\n",
8196 proc_selfpid(),
8197 (get_bsdtask_info(current_task())
8198 ? proc_name_address(get_bsdtask_info(current_task()))
8199 : "?"), entry,
8200 (uint64_t)entry->vme_start,
8201 (uint64_t)entry->vme_end,
8202 entry->protection,
8203 entry->max_protection);
8204 DTRACE_VM6(vm_map_delete_permanent_deny_submap,
8205 vm_map_entry_t, entry,
8206 vm_map_offset_t, entry->vme_start,
8207 vm_map_offset_t, entry->vme_end,
8208 vm_prot_t, entry->protection,
8209 vm_prot_t, entry->max_protection,
8210 int, VME_ALIAS(entry));
8211 ret.kmr_return = KERN_PROTECTION_FAILURE;
8212 goto out;
8213 }
8214 /* no permanent mappings: proceed */
8215 }
8216 }
8217
8218 /*
8219 * Step 3: Perform any clipping needed.
8220 *
8221 * After this, "entry" starts at "s", ends before "end"
8222 */
8223
8224 if (entry->vme_start < s) {
8225 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
8226 entry->map_aligned &&
8227 !VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map))) {
8228 /*
8229 * The entry will no longer be map-aligned
8230 * after clipping and the caller said it's OK.
8231 */
8232 entry->map_aligned = FALSE;
8233 }
8234 vm_map_clip_start(map, entry, s);
8235 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8236 }
8237
8238 if (end < entry->vme_end) {
8239 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
8240 entry->map_aligned &&
8241 !VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map))) {
8242 /*
8243 * The entry will no longer be map-aligned
8244 * after clipping and the caller said it's OK.
8245 */
8246 entry->map_aligned = FALSE;
8247 }
8248 vm_map_clip_end(map, entry, end);
8249 }
8250
8251 if (entry->vme_permanent && entry->is_sub_map) {
8252 /*
8253 * We already went through step 2.1 which did not deny
8254 * the removal of this "permanent" and "is_sub_map"
8255 * entry.
8256 * Now that we've clipped what we actually want to
8257 * delete, undo the "permanent" part to allow the
8258 * removal to proceed.
8259 */
8260 DTRACE_VM6(vm_map_delete_permanent_allow_submap,
8261 vm_map_entry_t, entry,
8262 vm_map_offset_t, entry->vme_start,
8263 vm_map_offset_t, entry->vme_end,
8264 vm_prot_t, entry->protection,
8265 vm_prot_t, entry->max_protection,
8266 int, VME_ALIAS(entry));
8267 entry->vme_permanent = false;
8268 }
8269
8270 assert(s == entry->vme_start);
8271 assert(entry->vme_end <= end);
8272
8273
8274 /*
8275 * Step 4: If the entry is in flux, wait for this to resolve.
8276 */
8277
8278 if (entry->in_transition) {
8279 wait_result_t wait_result;
8280
8281 in_transition:
8282 /*
8283 * Another thread is wiring/unwiring this entry.
8284 * Let the other thread know we are waiting.
8285 */
8286
8287 entry->needs_wakeup = TRUE;
8288
8289 /*
8290 * wake up anybody waiting on entries that we have
8291 * already unwired/deleted.
8292 */
8293 if (state & VMDS_NEEDS_WAKEUP) {
8294 vm_map_entry_wakeup(map);
8295 state &= ~VMDS_NEEDS_WAKEUP;
8296 }
8297
8298 wait_result = vm_map_entry_wait(map, interruptible);
8299
8300 if (interruptible &&
8301 wait_result == THREAD_INTERRUPTED) {
8302 /*
8303 * We do not clear the needs_wakeup flag,
8304 * since we cannot tell if we were the only one.
8305 */
8306 ret.kmr_return = KERN_ABORTED;
8307 return ret;
8308 }
8309
8310 /*
8311 * The entry could have been clipped or it
8312 * may not exist anymore. Look it up again.
8313 */
8314 state |= VMDS_NEEDS_LOOKUP;
8315 continue;
8316 }
8317
8318
8319 /*
8320 * Step 5: Handle wiring
8321 */
8322
8323 if (entry->wired_count) {
8324 struct vm_map_entry tmp_entry;
8325 boolean_t user_wire;
8326 unsigned int last_timestamp;
8327
8328 user_wire = entry->user_wired_count > 0;
8329
8330 /*
8331 * Remove a kernel wiring if requested
8332 */
8333 if (flags & VM_MAP_REMOVE_KUNWIRE) {
8334 entry->wired_count--;
8335 }
8336
8337 /*
8338 * Remove all user wirings for proper accounting
8339 */
8340 while (entry->user_wired_count) {
8341 subtract_wire_counts(map, entry, user_wire);
8342 }
8343
8344 /*
8345 * All our DMA I/O operations in IOKit are currently
8346 * done by wiring through the map entries of the task
8347 * requesting the I/O.
8348 *
8349 * Because of this, we must always wait for kernel wirings
8350 * to go away on the entries before deleting them.
8351 *
8352 * Any caller who wants to actually remove a kernel wiring
8353 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
8354 * properly remove one wiring instead of blasting through
8355 * them all.
8356 */
8357 if (entry->wired_count != 0) {
8358 assert(map != kernel_map);
8359 /*
8360 * Cannot continue. Typical case is when
8361 * a user thread has physical io pending on
8362 * on this page. Either wait for the
8363 * kernel wiring to go away or return an
8364 * error.
8365 */
8366 wait_result_t wait_result;
8367
8368 entry->needs_wakeup = TRUE;
8369 wait_result = vm_map_entry_wait(map,
8370 interruptible);
8371
8372 if (interruptible &&
8373 wait_result == THREAD_INTERRUPTED) {
8374 /*
8375 * We do not clear the
8376 * needs_wakeup flag, since we
8377 * cannot tell if we were the
8378 * only one.
8379 */
8380 ret.kmr_return = KERN_ABORTED;
8381 return ret;
8382 }
8383
8384
8385 /*
8386 * The entry could have been clipped or
8387 * it may not exist anymore. Look it
8388 * up again.
8389 */
8390 state |= VMDS_NEEDS_LOOKUP;
8391 continue;
8392 }
8393
8394 /*
8395 * We can unlock the map now.
8396 *
8397 * The entry might be split once we unlock the map,
8398 * but we need the range as defined by this entry
8399 * to be stable. So we must make a local copy.
8400 *
8401 * The underlying objects do not change during clips,
8402 * and the in_transition state guarentees existence
8403 * of the entry.
8404 */
8405 last_timestamp = map->timestamp;
8406 entry->in_transition = TRUE;
8407 tmp_entry = *entry;
8408 vm_map_unlock(map);
8409
8410 if (tmp_entry.is_sub_map) {
8411 vm_map_t sub_map;
8412 vm_map_offset_t sub_start, sub_end;
8413 pmap_t pmap;
8414 vm_map_offset_t pmap_addr;
8415
8416
8417 sub_map = VME_SUBMAP(&tmp_entry);
8418 sub_start = VME_OFFSET(&tmp_entry);
8419 sub_end = sub_start + (tmp_entry.vme_end -
8420 tmp_entry.vme_start);
8421 if (tmp_entry.use_pmap) {
8422 pmap = sub_map->pmap;
8423 pmap_addr = tmp_entry.vme_start;
8424 } else {
8425 pmap = map->pmap;
8426 pmap_addr = tmp_entry.vme_start;
8427 }
8428 (void) vm_map_unwire_nested(sub_map,
8429 sub_start, sub_end,
8430 user_wire,
8431 pmap, pmap_addr);
8432 } else {
8433 if (tmp_entry.vme_kernel_object) {
8434 pmap_protect_options(
8435 map->pmap,
8436 tmp_entry.vme_start,
8437 tmp_entry.vme_end,
8438 VM_PROT_NONE,
8439 PMAP_OPTIONS_REMOVE,
8440 NULL);
8441 }
8442 vm_fault_unwire(map, &tmp_entry,
8443 tmp_entry.vme_kernel_object,
8444 map->pmap, tmp_entry.vme_start);
8445 }
8446
8447 vm_map_lock(map);
8448
8449 /*
8450 * Unwiring happened, we can now go back to deleting
8451 * them (after we clear the in_transition bit for the range).
8452 */
8453 if (last_timestamp + 1 != map->timestamp) {
8454 state |= VMDS_NEEDS_LOOKUP;
8455 }
8456 clear_in_transition_end = tmp_entry.vme_end;
8457 continue;
8458 }
8459
8460 assert(entry->wired_count == 0);
8461 assert(entry->user_wired_count == 0);
8462
8463
8464 /*
8465 * Step 6: Entry is unwired and ready for us to delete !
8466 */
8467
8468 if (!entry->vme_permanent) {
8469 /*
8470 * Typical case: the entry really shouldn't be permanent
8471 */
8472 } else if ((flags & VM_MAP_REMOVE_IMMUTABLE_CODE) &&
8473 (entry->protection & VM_PROT_EXECUTE) &&
8474 developer_mode_state()) {
8475 /*
8476 * Allow debuggers to undo executable mappings
8477 * when developer mode is on.
8478 */
8479 #if 0
8480 printf("FBDP %d[%s] removing permanent executable entry "
8481 "%p [0x%llx:0x%llx] prot 0x%x/0x%x\n",
8482 proc_selfpid(),
8483 (current_task()->bsd_info
8484 ? proc_name_address(current_task()->bsd_info)
8485 : "?"), entry,
8486 (uint64_t)entry->vme_start,
8487 (uint64_t)entry->vme_end,
8488 entry->protection,
8489 entry->max_protection);
8490 #endif
8491 entry->vme_permanent = FALSE;
8492 } else if ((flags & VM_MAP_REMOVE_IMMUTABLE) || map->terminated) {
8493 #if 0
8494 printf("FBDP %d[%s] removing permanent entry "
8495 "%p [0x%llx:0x%llx] prot 0x%x/0x%x\n",
8496 proc_selfpid(),
8497 (current_task()->bsd_info
8498 ? proc_name_address(current_task()->bsd_info)
8499 : "?"), entry,
8500 (uint64_t)entry->vme_start,
8501 (uint64_t)entry->vme_end,
8502 entry->protection,
8503 entry->max_protection);
8504 #endif
8505 entry->vme_permanent = FALSE;
8506 } else {
8507 DTRACE_VM6(vm_map_delete_permanent,
8508 vm_map_entry_t, entry,
8509 vm_map_offset_t, entry->vme_start,
8510 vm_map_offset_t, entry->vme_end,
8511 vm_prot_t, entry->protection,
8512 vm_prot_t, entry->max_protection,
8513 int, VME_ALIAS(entry));
8514 }
8515
8516 if (entry->is_sub_map) {
8517 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) >= VM_MAP_PAGE_SHIFT(map),
8518 "map %p (%d) entry %p submap %p (%d)\n",
8519 map, VM_MAP_PAGE_SHIFT(map), entry,
8520 VME_SUBMAP(entry),
8521 VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
8522 if (entry->use_pmap) {
8523 #ifndef NO_NESTED_PMAP
8524 int pmap_flags;
8525
8526 if (map->terminated) {
8527 /*
8528 * This is the final cleanup of the
8529 * address space being terminated.
8530 * No new mappings are expected and
8531 * we don't really need to unnest the
8532 * shared region (and lose the "global"
8533 * pmap mappings, if applicable).
8534 *
8535 * Tell the pmap layer that we're
8536 * "clean" wrt nesting.
8537 */
8538 pmap_flags = PMAP_UNNEST_CLEAN;
8539 } else {
8540 /*
8541 * We're unmapping part of the nested
8542 * shared region, so we can't keep the
8543 * nested pmap.
8544 */
8545 pmap_flags = 0;
8546 }
8547 pmap_unnest_options(
8548 map->pmap,
8549 (addr64_t)entry->vme_start,
8550 entry->vme_end - entry->vme_start,
8551 pmap_flags);
8552 #endif /* NO_NESTED_PMAP */
8553 if (map->mapped_in_other_pmaps &&
8554 os_ref_get_count_raw(&map->map_refcnt) != 0) {
8555 /* clean up parent map/maps */
8556 vm_map_submap_pmap_clean(
8557 map, entry->vme_start,
8558 entry->vme_end,
8559 VME_SUBMAP(entry),
8560 VME_OFFSET(entry));
8561 }
8562 } else {
8563 vm_map_submap_pmap_clean(
8564 map, entry->vme_start, entry->vme_end,
8565 VME_SUBMAP(entry),
8566 VME_OFFSET(entry));
8567 }
8568 } else if (entry->vme_kernel_object ||
8569 VME_OBJECT(entry) == compressor_object) {
8570 /*
8571 * nothing to do
8572 */
8573 } else if (map->mapped_in_other_pmaps &&
8574 os_ref_get_count_raw(&map->map_refcnt) != 0) {
8575 vm_object_pmap_protect_options(
8576 VME_OBJECT(entry), VME_OFFSET(entry),
8577 entry->vme_end - entry->vme_start,
8578 PMAP_NULL,
8579 PAGE_SIZE,
8580 entry->vme_start,
8581 VM_PROT_NONE,
8582 PMAP_OPTIONS_REMOVE);
8583 } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
8584 (state & VMDS_KERNEL_PMAP)) {
8585 /* Remove translations associated
8586 * with this range unless the entry
8587 * does not have an object, or
8588 * it's the kernel map or a descendant
8589 * since the platform could potentially
8590 * create "backdoor" mappings invisible
8591 * to the VM. It is expected that
8592 * objectless, non-kernel ranges
8593 * do not have such VM invisible
8594 * translations.
8595 */
8596 pmap_remove_options(map->pmap,
8597 (addr64_t)entry->vme_start,
8598 (addr64_t)entry->vme_end,
8599 PMAP_OPTIONS_REMOVE);
8600 }
8601
8602 #if DEBUG
8603 /*
8604 * All pmap mappings for this map entry must have been
8605 * cleared by now.
8606 */
8607 assert(pmap_is_empty(map->pmap,
8608 entry->vme_start,
8609 entry->vme_end));
8610 #endif /* DEBUG */
8611
8612 if (entry->iokit_acct) {
8613 /* alternate accounting */
8614 DTRACE_VM4(vm_map_iokit_unmapped_region,
8615 vm_map_t, map,
8616 vm_map_offset_t, entry->vme_start,
8617 vm_map_offset_t, entry->vme_end,
8618 int, VME_ALIAS(entry));
8619 vm_map_iokit_unmapped_region(map,
8620 (entry->vme_end -
8621 entry->vme_start));
8622 entry->iokit_acct = FALSE;
8623 entry->use_pmap = FALSE;
8624 }
8625
8626 s = entry->vme_end;
8627 next = entry->vme_next;
8628 ret.kmr_size += entry->vme_end - entry->vme_start;
8629
8630 if (entry->vme_permanent) {
8631 /*
8632 * A permanent entry can not be removed, so leave it
8633 * in place but remove all access permissions.
8634 */
8635 if (!entry->pmap_cs_associated) {
8636 printf("%s:%d %d[%s] map %p entry %p [ 0x%llx - 0x%llx ] submap %d prot 0x%x/0x%x -> 0/0\n",
8637 __FUNCTION__, __LINE__,
8638 proc_selfpid(),
8639 (get_bsdtask_info(current_task())
8640 ? proc_name_address(get_bsdtask_info(current_task()))
8641 : "?"),
8642 map,
8643 entry,
8644 (uint64_t)entry->vme_start,
8645 (uint64_t)entry->vme_end,
8646 entry->is_sub_map,
8647 entry->protection,
8648 entry->max_protection);
8649 }
8650 DTRACE_VM6(vm_map_delete_permanent_prot_none,
8651 vm_map_entry_t, entry,
8652 vm_map_offset_t, entry->vme_start,
8653 vm_map_offset_t, entry->vme_end,
8654 vm_prot_t, entry->protection,
8655 vm_prot_t, entry->max_protection,
8656 int, VME_ALIAS(entry));
8657 entry->protection = VM_PROT_NONE;
8658 entry->max_protection = VM_PROT_NONE;
8659 } else {
8660 vm_map_entry_zap(map, entry, zap_list);
8661 }
8662
8663 entry = next;
8664
8665 if ((flags & VM_MAP_REMOVE_NO_YIELD) == 0 && s < end) {
8666 unsigned int last_timestamp = map->timestamp++;
8667
8668 if (lck_rw_lock_yield_exclusive(&map->lock,
8669 LCK_RW_YIELD_ANY_WAITER)) {
8670 if (last_timestamp != map->timestamp + 1) {
8671 state |= VMDS_NEEDS_LOOKUP;
8672 }
8673 } else {
8674 /* we didn't yield, undo our change */
8675 map->timestamp--;
8676 }
8677 }
8678 }
8679
8680 if (map->wait_for_space) {
8681 thread_wakeup((event_t) map);
8682 }
8683
8684 if (state & VMDS_NEEDS_WAKEUP) {
8685 vm_map_entry_wakeup(map);
8686 }
8687
8688 out:
8689 if ((state & VMDS_KERNEL_PMAP) && ret.kmr_return) {
8690 __vm_map_delete_failed_panic(map, start, end, ret.kmr_return);
8691 }
8692
8693 if (state & VMDS_FOUND_GAP) {
8694 DTRACE_VM3(kern_vm_deallocate_gap,
8695 vm_map_offset_t, gap_start,
8696 vm_map_offset_t, save_start,
8697 vm_map_offset_t, save_end);
8698 if (flags & VM_MAP_REMOVE_GAPS_FAIL) {
8699 ret.kmr_return = KERN_INVALID_VALUE;
8700 } else {
8701 vm_map_guard_exception(gap_start, kGUARD_EXC_DEALLOC_GAP);
8702 }
8703 }
8704
8705 return ret;
8706 }
8707
8708 kmem_return_t
vm_map_remove_and_unlock(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vmr_flags_t flags,kmem_guard_t guard)8709 vm_map_remove_and_unlock(
8710 vm_map_t map,
8711 vm_map_offset_t start,
8712 vm_map_offset_t end,
8713 vmr_flags_t flags,
8714 kmem_guard_t guard)
8715 {
8716 kmem_return_t ret;
8717 VM_MAP_ZAP_DECLARE(zap);
8718
8719 ret = vm_map_delete(map, start, end, flags, guard, &zap);
8720 vm_map_unlock(map);
8721
8722 vm_map_zap_dispose(&zap);
8723
8724 return ret;
8725 }
8726
8727 /*
8728 * vm_map_remove_guard:
8729 *
8730 * Remove the given address range from the target map.
8731 * This is the exported form of vm_map_delete.
8732 */
8733 kmem_return_t
vm_map_remove_guard(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vmr_flags_t flags,kmem_guard_t guard)8734 vm_map_remove_guard(
8735 vm_map_t map,
8736 vm_map_offset_t start,
8737 vm_map_offset_t end,
8738 vmr_flags_t flags,
8739 kmem_guard_t guard)
8740 {
8741 vm_map_lock(map);
8742 return vm_map_remove_and_unlock(map, start, end, flags, guard);
8743 }
8744
8745 /*
8746 * vm_map_terminate:
8747 *
8748 * Clean out a task's map.
8749 */
8750 kern_return_t
vm_map_terminate(vm_map_t map)8751 vm_map_terminate(
8752 vm_map_t map)
8753 {
8754 vm_map_lock(map);
8755 map->terminated = TRUE;
8756 vm_map_disable_hole_optimization(map);
8757 (void)vm_map_remove_and_unlock(map, map->min_offset, map->max_offset,
8758 VM_MAP_REMOVE_NO_FLAGS, KMEM_GUARD_NONE);
8759 return KERN_SUCCESS;
8760 }
8761
8762 /*
8763 * Routine: vm_map_copy_allocate
8764 *
8765 * Description:
8766 * Allocates and initializes a map copy object.
8767 */
8768 static vm_map_copy_t
vm_map_copy_allocate(void)8769 vm_map_copy_allocate(void)
8770 {
8771 vm_map_copy_t new_copy;
8772
8773 new_copy = zalloc_id(ZONE_ID_VM_MAP_COPY, Z_WAITOK | Z_ZERO);
8774 new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8775 vm_map_copy_first_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8776 vm_map_copy_last_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8777 return new_copy;
8778 }
8779
8780 /*
8781 * Routine: vm_map_copy_discard
8782 *
8783 * Description:
8784 * Dispose of a map copy object (returned by
8785 * vm_map_copyin).
8786 */
8787 void
vm_map_copy_discard(vm_map_copy_t copy)8788 vm_map_copy_discard(
8789 vm_map_copy_t copy)
8790 {
8791 if (copy == VM_MAP_COPY_NULL) {
8792 return;
8793 }
8794
8795 /*
8796 * Assert that the vm_map_copy is coming from the right
8797 * zone and hasn't been forged
8798 */
8799 vm_map_copy_require(copy);
8800
8801 switch (copy->type) {
8802 case VM_MAP_COPY_ENTRY_LIST:
8803 while (vm_map_copy_first_entry(copy) !=
8804 vm_map_copy_to_entry(copy)) {
8805 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
8806
8807 vm_map_copy_entry_unlink(copy, entry);
8808 if (entry->is_sub_map) {
8809 vm_map_deallocate(VME_SUBMAP(entry));
8810 } else {
8811 vm_object_deallocate(VME_OBJECT(entry));
8812 }
8813 vm_map_copy_entry_dispose(entry);
8814 }
8815 break;
8816 case VM_MAP_COPY_OBJECT:
8817 vm_object_deallocate(copy->cpy_object);
8818 break;
8819 case VM_MAP_COPY_KERNEL_BUFFER:
8820
8821 /*
8822 * The vm_map_copy_t and possibly the data buffer were
8823 * allocated by a single call to kalloc_data(), i.e. the
8824 * vm_map_copy_t was not allocated out of the zone.
8825 */
8826 if (copy->size > msg_ool_size_small || copy->offset) {
8827 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8828 (long long)copy->size, (long long)copy->offset);
8829 }
8830 kfree_data(copy->cpy_kdata, copy->size);
8831 }
8832 zfree_id(ZONE_ID_VM_MAP_COPY, copy);
8833 }
8834
8835 /*
8836 * Routine: vm_map_copy_copy
8837 *
8838 * Description:
8839 * Move the information in a map copy object to
8840 * a new map copy object, leaving the old one
8841 * empty.
8842 *
8843 * This is used by kernel routines that need
8844 * to look at out-of-line data (in copyin form)
8845 * before deciding whether to return SUCCESS.
8846 * If the routine returns FAILURE, the original
8847 * copy object will be deallocated; therefore,
8848 * these routines must make a copy of the copy
8849 * object and leave the original empty so that
8850 * deallocation will not fail.
8851 */
8852 vm_map_copy_t
vm_map_copy_copy(vm_map_copy_t copy)8853 vm_map_copy_copy(
8854 vm_map_copy_t copy)
8855 {
8856 vm_map_copy_t new_copy;
8857
8858 if (copy == VM_MAP_COPY_NULL) {
8859 return VM_MAP_COPY_NULL;
8860 }
8861
8862 /*
8863 * Assert that the vm_map_copy is coming from the right
8864 * zone and hasn't been forged
8865 */
8866 vm_map_copy_require(copy);
8867
8868 /*
8869 * Allocate a new copy object, and copy the information
8870 * from the old one into it.
8871 */
8872
8873 new_copy = zalloc_id(ZONE_ID_VM_MAP_COPY, Z_WAITOK | Z_ZERO | Z_NOFAIL);
8874 memcpy((void *) new_copy, (void *) copy, sizeof(struct vm_map_copy));
8875 #if __has_feature(ptrauth_calls)
8876 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8877 new_copy->cpy_kdata = copy->cpy_kdata;
8878 }
8879 #endif
8880
8881 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
8882 /*
8883 * The links in the entry chain must be
8884 * changed to point to the new copy object.
8885 */
8886 vm_map_copy_first_entry(copy)->vme_prev
8887 = vm_map_copy_to_entry(new_copy);
8888 vm_map_copy_last_entry(copy)->vme_next
8889 = vm_map_copy_to_entry(new_copy);
8890 }
8891
8892 /*
8893 * Change the old copy object into one that contains
8894 * nothing to be deallocated.
8895 */
8896 copy->type = VM_MAP_COPY_OBJECT;
8897 copy->cpy_object = VM_OBJECT_NULL;
8898
8899 /*
8900 * Return the new object.
8901 */
8902 return new_copy;
8903 }
8904
8905 static boolean_t
vm_map_entry_is_overwritable(vm_map_t dst_map __unused,vm_map_entry_t entry)8906 vm_map_entry_is_overwritable(
8907 vm_map_t dst_map __unused,
8908 vm_map_entry_t entry)
8909 {
8910 if (!(entry->protection & VM_PROT_WRITE)) {
8911 /* can't overwrite if not writable */
8912 return FALSE;
8913 }
8914 #if !__x86_64__
8915 if (entry->used_for_jit &&
8916 vm_map_cs_enforcement(dst_map) &&
8917 !dst_map->cs_debugged) {
8918 /*
8919 * Can't overwrite a JIT region while cs_enforced
8920 * and not cs_debugged.
8921 */
8922 return FALSE;
8923 }
8924
8925 #if __arm64e__
8926 /* Do not allow overwrite HW assisted TPRO entries */
8927 if (entry->used_for_tpro) {
8928 return FALSE;
8929 }
8930 #endif /* __arm64e__ */
8931
8932 if (entry->vme_permanent) {
8933 if (entry->is_sub_map) {
8934 /*
8935 * We can't tell if the submap contains "permanent"
8936 * entries within the range targeted by the caller.
8937 * The caller will have to check for that with
8938 * vm_map_overwrite_submap_recurse() for example.
8939 */
8940 } else {
8941 /*
8942 * Do not allow overwriting of a "permanent"
8943 * entry.
8944 */
8945 DTRACE_VM6(vm_map_delete_permanent_deny_overwrite,
8946 vm_map_entry_t, entry,
8947 vm_map_offset_t, entry->vme_start,
8948 vm_map_offset_t, entry->vme_end,
8949 vm_prot_t, entry->protection,
8950 vm_prot_t, entry->max_protection,
8951 int, VME_ALIAS(entry));
8952 return FALSE;
8953 }
8954 }
8955 #endif /* !__x86_64__ */
8956 return TRUE;
8957 }
8958
8959 static kern_return_t
vm_map_overwrite_submap_recurse(vm_map_t dst_map,vm_map_offset_t dst_addr,vm_map_size_t dst_size)8960 vm_map_overwrite_submap_recurse(
8961 vm_map_t dst_map,
8962 vm_map_offset_t dst_addr,
8963 vm_map_size_t dst_size)
8964 {
8965 vm_map_offset_t dst_end;
8966 vm_map_entry_t tmp_entry;
8967 vm_map_entry_t entry;
8968 kern_return_t result;
8969 boolean_t encountered_sub_map = FALSE;
8970
8971
8972
8973 /*
8974 * Verify that the destination is all writeable
8975 * initially. We have to trunc the destination
8976 * address and round the copy size or we'll end up
8977 * splitting entries in strange ways.
8978 */
8979
8980 dst_end = vm_map_round_page(dst_addr + dst_size,
8981 VM_MAP_PAGE_MASK(dst_map));
8982 vm_map_lock(dst_map);
8983
8984 start_pass_1:
8985 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8986 vm_map_unlock(dst_map);
8987 return KERN_INVALID_ADDRESS;
8988 }
8989
8990 vm_map_clip_start(dst_map,
8991 tmp_entry,
8992 vm_map_trunc_page(dst_addr,
8993 VM_MAP_PAGE_MASK(dst_map)));
8994 if (tmp_entry->is_sub_map) {
8995 /* clipping did unnest if needed */
8996 assert(!tmp_entry->use_pmap);
8997 }
8998
8999 for (entry = tmp_entry;;) {
9000 vm_map_entry_t next;
9001
9002 next = entry->vme_next;
9003 while (entry->is_sub_map) {
9004 vm_map_offset_t sub_start;
9005 vm_map_offset_t sub_end;
9006 vm_map_offset_t local_end;
9007
9008 if (entry->in_transition) {
9009 /*
9010 * Say that we are waiting, and wait for entry.
9011 */
9012 entry->needs_wakeup = TRUE;
9013 vm_map_entry_wait(dst_map, THREAD_UNINT);
9014
9015 goto start_pass_1;
9016 }
9017
9018 encountered_sub_map = TRUE;
9019 sub_start = VME_OFFSET(entry);
9020
9021 if (entry->vme_end < dst_end) {
9022 sub_end = entry->vme_end;
9023 } else {
9024 sub_end = dst_end;
9025 }
9026 sub_end -= entry->vme_start;
9027 sub_end += VME_OFFSET(entry);
9028 local_end = entry->vme_end;
9029 vm_map_unlock(dst_map);
9030
9031 result = vm_map_overwrite_submap_recurse(
9032 VME_SUBMAP(entry),
9033 sub_start,
9034 sub_end - sub_start);
9035
9036 if (result != KERN_SUCCESS) {
9037 return result;
9038 }
9039 if (dst_end <= entry->vme_end) {
9040 return KERN_SUCCESS;
9041 }
9042 vm_map_lock(dst_map);
9043 if (!vm_map_lookup_entry(dst_map, local_end,
9044 &tmp_entry)) {
9045 vm_map_unlock(dst_map);
9046 return KERN_INVALID_ADDRESS;
9047 }
9048 entry = tmp_entry;
9049 next = entry->vme_next;
9050 }
9051
9052 if (!(entry->protection & VM_PROT_WRITE)) {
9053 vm_map_unlock(dst_map);
9054 return KERN_PROTECTION_FAILURE;
9055 }
9056
9057 if (!vm_map_entry_is_overwritable(dst_map, entry)) {
9058 vm_map_unlock(dst_map);
9059 return KERN_PROTECTION_FAILURE;
9060 }
9061
9062 /*
9063 * If the entry is in transition, we must wait
9064 * for it to exit that state. Anything could happen
9065 * when we unlock the map, so start over.
9066 */
9067 if (entry->in_transition) {
9068 /*
9069 * Say that we are waiting, and wait for entry.
9070 */
9071 entry->needs_wakeup = TRUE;
9072 vm_map_entry_wait(dst_map, THREAD_UNINT);
9073
9074 goto start_pass_1;
9075 }
9076
9077 /*
9078 * our range is contained completely within this map entry
9079 */
9080 if (dst_end <= entry->vme_end) {
9081 vm_map_unlock(dst_map);
9082 return KERN_SUCCESS;
9083 }
9084 /*
9085 * check that range specified is contiguous region
9086 */
9087 if ((next == vm_map_to_entry(dst_map)) ||
9088 (next->vme_start != entry->vme_end)) {
9089 vm_map_unlock(dst_map);
9090 return KERN_INVALID_ADDRESS;
9091 }
9092
9093 /*
9094 * Check for permanent objects in the destination.
9095 */
9096 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
9097 ((!VME_OBJECT(entry)->internal) ||
9098 (VME_OBJECT(entry)->true_share))) {
9099 if (encountered_sub_map) {
9100 vm_map_unlock(dst_map);
9101 return KERN_FAILURE;
9102 }
9103 }
9104
9105
9106 entry = next;
9107 }/* for */
9108 vm_map_unlock(dst_map);
9109 return KERN_SUCCESS;
9110 }
9111
9112 /*
9113 * Routine: vm_map_copy_overwrite
9114 *
9115 * Description:
9116 * Copy the memory described by the map copy
9117 * object (copy; returned by vm_map_copyin) onto
9118 * the specified destination region (dst_map, dst_addr).
9119 * The destination must be writeable.
9120 *
9121 * Unlike vm_map_copyout, this routine actually
9122 * writes over previously-mapped memory. If the
9123 * previous mapping was to a permanent (user-supplied)
9124 * memory object, it is preserved.
9125 *
9126 * The attributes (protection and inheritance) of the
9127 * destination region are preserved.
9128 *
9129 * If successful, consumes the copy object.
9130 * Otherwise, the caller is responsible for it.
9131 *
9132 * Implementation notes:
9133 * To overwrite aligned temporary virtual memory, it is
9134 * sufficient to remove the previous mapping and insert
9135 * the new copy. This replacement is done either on
9136 * the whole region (if no permanent virtual memory
9137 * objects are embedded in the destination region) or
9138 * in individual map entries.
9139 *
9140 * To overwrite permanent virtual memory , it is necessary
9141 * to copy each page, as the external memory management
9142 * interface currently does not provide any optimizations.
9143 *
9144 * Unaligned memory also has to be copied. It is possible
9145 * to use 'vm_trickery' to copy the aligned data. This is
9146 * not done but not hard to implement.
9147 *
9148 * Once a page of permanent memory has been overwritten,
9149 * it is impossible to interrupt this function; otherwise,
9150 * the call would be neither atomic nor location-independent.
9151 * The kernel-state portion of a user thread must be
9152 * interruptible.
9153 *
9154 * It may be expensive to forward all requests that might
9155 * overwrite permanent memory (vm_write, vm_copy) to
9156 * uninterruptible kernel threads. This routine may be
9157 * called by interruptible threads; however, success is
9158 * not guaranteed -- if the request cannot be performed
9159 * atomically and interruptibly, an error indication is
9160 * returned.
9161 *
9162 * Callers of this function must call vm_map_copy_require on
9163 * previously created vm_map_copy_t or pass a newly created
9164 * one to ensure that it hasn't been forged.
9165 */
9166
9167 static kern_return_t
vm_map_copy_overwrite_nested(vm_map_t dst_map,vm_map_address_t dst_addr,vm_map_copy_t copy,boolean_t interruptible,pmap_t pmap,boolean_t discard_on_success)9168 vm_map_copy_overwrite_nested(
9169 vm_map_t dst_map,
9170 vm_map_address_t dst_addr,
9171 vm_map_copy_t copy,
9172 boolean_t interruptible,
9173 pmap_t pmap,
9174 boolean_t discard_on_success)
9175 {
9176 vm_map_offset_t dst_end;
9177 vm_map_entry_t tmp_entry;
9178 vm_map_entry_t entry;
9179 kern_return_t kr;
9180 boolean_t aligned = TRUE;
9181 boolean_t contains_permanent_objects = FALSE;
9182 boolean_t encountered_sub_map = FALSE;
9183 vm_map_offset_t base_addr;
9184 vm_map_size_t copy_size;
9185 vm_map_size_t total_size;
9186 uint16_t copy_page_shift;
9187
9188 /*
9189 * Check for special kernel buffer allocated
9190 * by new_ipc_kmsg_copyin.
9191 */
9192
9193 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
9194 return vm_map_copyout_kernel_buffer(
9195 dst_map, &dst_addr,
9196 copy, copy->size, TRUE, discard_on_success);
9197 }
9198
9199 /*
9200 * Only works for entry lists at the moment. Will
9201 * support page lists later.
9202 */
9203
9204 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9205
9206 if (copy->size == 0) {
9207 if (discard_on_success) {
9208 vm_map_copy_discard(copy);
9209 }
9210 return KERN_SUCCESS;
9211 }
9212
9213 copy_page_shift = copy->cpy_hdr.page_shift;
9214
9215 /*
9216 * Verify that the destination is all writeable
9217 * initially. We have to trunc the destination
9218 * address and round the copy size or we'll end up
9219 * splitting entries in strange ways.
9220 */
9221
9222 if (!VM_MAP_PAGE_ALIGNED(copy->size,
9223 VM_MAP_PAGE_MASK(dst_map)) ||
9224 !VM_MAP_PAGE_ALIGNED(copy->offset,
9225 VM_MAP_PAGE_MASK(dst_map)) ||
9226 !VM_MAP_PAGE_ALIGNED(dst_addr,
9227 VM_MAP_PAGE_MASK(dst_map)) ||
9228 copy_page_shift != VM_MAP_PAGE_SHIFT(dst_map)) {
9229 aligned = FALSE;
9230 dst_end = vm_map_round_page(dst_addr + copy->size,
9231 VM_MAP_PAGE_MASK(dst_map));
9232 } else {
9233 dst_end = dst_addr + copy->size;
9234 }
9235
9236 vm_map_lock(dst_map);
9237
9238 /* LP64todo - remove this check when vm_map_commpage64()
9239 * no longer has to stuff in a map_entry for the commpage
9240 * above the map's max_offset.
9241 */
9242 if (dst_addr >= dst_map->max_offset) {
9243 vm_map_unlock(dst_map);
9244 return KERN_INVALID_ADDRESS;
9245 }
9246
9247 start_pass_1:
9248 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
9249 vm_map_unlock(dst_map);
9250 return KERN_INVALID_ADDRESS;
9251 }
9252 vm_map_clip_start(dst_map,
9253 tmp_entry,
9254 vm_map_trunc_page(dst_addr,
9255 VM_MAP_PAGE_MASK(dst_map)));
9256 for (entry = tmp_entry;;) {
9257 vm_map_entry_t next = entry->vme_next;
9258
9259 while (entry->is_sub_map) {
9260 vm_map_offset_t sub_start;
9261 vm_map_offset_t sub_end;
9262 vm_map_offset_t local_end;
9263
9264 if (entry->in_transition) {
9265 /*
9266 * Say that we are waiting, and wait for entry.
9267 */
9268 entry->needs_wakeup = TRUE;
9269 vm_map_entry_wait(dst_map, THREAD_UNINT);
9270
9271 goto start_pass_1;
9272 }
9273
9274 local_end = entry->vme_end;
9275 if (!(entry->needs_copy)) {
9276 /* if needs_copy we are a COW submap */
9277 /* in such a case we just replace so */
9278 /* there is no need for the follow- */
9279 /* ing check. */
9280 encountered_sub_map = TRUE;
9281 sub_start = VME_OFFSET(entry);
9282
9283 if (entry->vme_end < dst_end) {
9284 sub_end = entry->vme_end;
9285 } else {
9286 sub_end = dst_end;
9287 }
9288 sub_end -= entry->vme_start;
9289 sub_end += VME_OFFSET(entry);
9290 vm_map_unlock(dst_map);
9291
9292 kr = vm_map_overwrite_submap_recurse(
9293 VME_SUBMAP(entry),
9294 sub_start,
9295 sub_end - sub_start);
9296 if (kr != KERN_SUCCESS) {
9297 return kr;
9298 }
9299 vm_map_lock(dst_map);
9300 }
9301
9302 if (dst_end <= entry->vme_end) {
9303 goto start_overwrite;
9304 }
9305 if (!vm_map_lookup_entry(dst_map, local_end,
9306 &entry)) {
9307 vm_map_unlock(dst_map);
9308 return KERN_INVALID_ADDRESS;
9309 }
9310 next = entry->vme_next;
9311 }
9312
9313 if (!(entry->protection & VM_PROT_WRITE)) {
9314 vm_map_unlock(dst_map);
9315 return KERN_PROTECTION_FAILURE;
9316 }
9317
9318 if (!vm_map_entry_is_overwritable(dst_map, entry)) {
9319 vm_map_unlock(dst_map);
9320 return KERN_PROTECTION_FAILURE;
9321 }
9322
9323 /*
9324 * If the entry is in transition, we must wait
9325 * for it to exit that state. Anything could happen
9326 * when we unlock the map, so start over.
9327 */
9328 if (entry->in_transition) {
9329 /*
9330 * Say that we are waiting, and wait for entry.
9331 */
9332 entry->needs_wakeup = TRUE;
9333 vm_map_entry_wait(dst_map, THREAD_UNINT);
9334
9335 goto start_pass_1;
9336 }
9337
9338 /*
9339 * our range is contained completely within this map entry
9340 */
9341 if (dst_end <= entry->vme_end) {
9342 break;
9343 }
9344 /*
9345 * check that range specified is contiguous region
9346 */
9347 if ((next == vm_map_to_entry(dst_map)) ||
9348 (next->vme_start != entry->vme_end)) {
9349 vm_map_unlock(dst_map);
9350 return KERN_INVALID_ADDRESS;
9351 }
9352
9353
9354 /*
9355 * Check for permanent objects in the destination.
9356 */
9357 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
9358 ((!VME_OBJECT(entry)->internal) ||
9359 (VME_OBJECT(entry)->true_share))) {
9360 contains_permanent_objects = TRUE;
9361 }
9362
9363 entry = next;
9364 }/* for */
9365
9366 start_overwrite:
9367 /*
9368 * If there are permanent objects in the destination, then
9369 * the copy cannot be interrupted.
9370 */
9371
9372 if (interruptible && contains_permanent_objects) {
9373 vm_map_unlock(dst_map);
9374 return KERN_FAILURE; /* XXX */
9375 }
9376
9377 /*
9378 *
9379 * Make a second pass, overwriting the data
9380 * At the beginning of each loop iteration,
9381 * the next entry to be overwritten is "tmp_entry"
9382 * (initially, the value returned from the lookup above),
9383 * and the starting address expected in that entry
9384 * is "start".
9385 */
9386
9387 total_size = copy->size;
9388 if (encountered_sub_map) {
9389 copy_size = 0;
9390 /* re-calculate tmp_entry since we've had the map */
9391 /* unlocked */
9392 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
9393 vm_map_unlock(dst_map);
9394 return KERN_INVALID_ADDRESS;
9395 }
9396 } else {
9397 copy_size = copy->size;
9398 }
9399
9400 base_addr = dst_addr;
9401 while (TRUE) {
9402 /* deconstruct the copy object and do in parts */
9403 /* only in sub_map, interruptable case */
9404 vm_map_entry_t copy_entry;
9405 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
9406 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
9407 int nentries;
9408 int remaining_entries = 0;
9409 vm_map_offset_t new_offset = 0;
9410
9411 for (entry = tmp_entry; copy_size == 0;) {
9412 vm_map_entry_t next;
9413
9414 next = entry->vme_next;
9415
9416 /* tmp_entry and base address are moved along */
9417 /* each time we encounter a sub-map. Otherwise */
9418 /* entry can outpase tmp_entry, and the copy_size */
9419 /* may reflect the distance between them */
9420 /* if the current entry is found to be in transition */
9421 /* we will start over at the beginning or the last */
9422 /* encounter of a submap as dictated by base_addr */
9423 /* we will zero copy_size accordingly. */
9424 if (entry->in_transition) {
9425 /*
9426 * Say that we are waiting, and wait for entry.
9427 */
9428 entry->needs_wakeup = TRUE;
9429 vm_map_entry_wait(dst_map, THREAD_UNINT);
9430
9431 if (!vm_map_lookup_entry(dst_map, base_addr,
9432 &tmp_entry)) {
9433 vm_map_unlock(dst_map);
9434 return KERN_INVALID_ADDRESS;
9435 }
9436 copy_size = 0;
9437 entry = tmp_entry;
9438 continue;
9439 }
9440 if (entry->is_sub_map) {
9441 vm_map_offset_t sub_start;
9442 vm_map_offset_t sub_end;
9443 vm_map_offset_t local_end;
9444
9445 if (entry->needs_copy) {
9446 /* if this is a COW submap */
9447 /* just back the range with a */
9448 /* anonymous entry */
9449 assert(!entry->vme_permanent);
9450 if (entry->vme_end < dst_end) {
9451 sub_end = entry->vme_end;
9452 } else {
9453 sub_end = dst_end;
9454 }
9455 if (entry->vme_start < base_addr) {
9456 sub_start = base_addr;
9457 } else {
9458 sub_start = entry->vme_start;
9459 }
9460 vm_map_clip_end(
9461 dst_map, entry, sub_end);
9462 vm_map_clip_start(
9463 dst_map, entry, sub_start);
9464 assert(!entry->use_pmap);
9465 assert(!entry->iokit_acct);
9466 entry->use_pmap = TRUE;
9467 vm_map_deallocate(VME_SUBMAP(entry));
9468 assert(!entry->vme_permanent);
9469 VME_OBJECT_SET(entry, VM_OBJECT_NULL, false, 0);
9470 VME_OFFSET_SET(entry, 0);
9471 entry->is_shared = FALSE;
9472 entry->needs_copy = FALSE;
9473 entry->protection = VM_PROT_DEFAULT;
9474 entry->max_protection = VM_PROT_ALL;
9475 entry->wired_count = 0;
9476 entry->user_wired_count = 0;
9477 if (entry->inheritance
9478 == VM_INHERIT_SHARE) {
9479 entry->inheritance = VM_INHERIT_COPY;
9480 }
9481 continue;
9482 }
9483 /* first take care of any non-sub_map */
9484 /* entries to send */
9485 if (base_addr < entry->vme_start) {
9486 /* stuff to send */
9487 copy_size =
9488 entry->vme_start - base_addr;
9489 break;
9490 }
9491 sub_start = VME_OFFSET(entry);
9492
9493 if (entry->vme_end < dst_end) {
9494 sub_end = entry->vme_end;
9495 } else {
9496 sub_end = dst_end;
9497 }
9498 sub_end -= entry->vme_start;
9499 sub_end += VME_OFFSET(entry);
9500 local_end = entry->vme_end;
9501 vm_map_unlock(dst_map);
9502 copy_size = sub_end - sub_start;
9503
9504 /* adjust the copy object */
9505 if (total_size > copy_size) {
9506 vm_map_size_t local_size = 0;
9507 vm_map_size_t entry_size;
9508
9509 nentries = 1;
9510 new_offset = copy->offset;
9511 copy_entry = vm_map_copy_first_entry(copy);
9512 while (copy_entry !=
9513 vm_map_copy_to_entry(copy)) {
9514 entry_size = copy_entry->vme_end -
9515 copy_entry->vme_start;
9516 if ((local_size < copy_size) &&
9517 ((local_size + entry_size)
9518 >= copy_size)) {
9519 vm_map_copy_clip_end(copy,
9520 copy_entry,
9521 copy_entry->vme_start +
9522 (copy_size - local_size));
9523 entry_size = copy_entry->vme_end -
9524 copy_entry->vme_start;
9525 local_size += entry_size;
9526 new_offset += entry_size;
9527 }
9528 if (local_size >= copy_size) {
9529 next_copy = copy_entry->vme_next;
9530 copy_entry->vme_next =
9531 vm_map_copy_to_entry(copy);
9532 previous_prev =
9533 copy->cpy_hdr.links.prev;
9534 copy->cpy_hdr.links.prev = copy_entry;
9535 copy->size = copy_size;
9536 remaining_entries =
9537 copy->cpy_hdr.nentries;
9538 remaining_entries -= nentries;
9539 copy->cpy_hdr.nentries = nentries;
9540 break;
9541 } else {
9542 local_size += entry_size;
9543 new_offset += entry_size;
9544 nentries++;
9545 }
9546 copy_entry = copy_entry->vme_next;
9547 }
9548 }
9549
9550 if ((entry->use_pmap) && (pmap == NULL)) {
9551 kr = vm_map_copy_overwrite_nested(
9552 VME_SUBMAP(entry),
9553 sub_start,
9554 copy,
9555 interruptible,
9556 VME_SUBMAP(entry)->pmap,
9557 TRUE);
9558 } else if (pmap != NULL) {
9559 kr = vm_map_copy_overwrite_nested(
9560 VME_SUBMAP(entry),
9561 sub_start,
9562 copy,
9563 interruptible, pmap,
9564 TRUE);
9565 } else {
9566 kr = vm_map_copy_overwrite_nested(
9567 VME_SUBMAP(entry),
9568 sub_start,
9569 copy,
9570 interruptible,
9571 dst_map->pmap,
9572 TRUE);
9573 }
9574 if (kr != KERN_SUCCESS) {
9575 if (next_copy != NULL) {
9576 copy->cpy_hdr.nentries +=
9577 remaining_entries;
9578 copy->cpy_hdr.links.prev->vme_next =
9579 next_copy;
9580 copy->cpy_hdr.links.prev
9581 = previous_prev;
9582 copy->size = total_size;
9583 }
9584 return kr;
9585 }
9586 if (dst_end <= local_end) {
9587 return KERN_SUCCESS;
9588 }
9589 /* otherwise copy no longer exists, it was */
9590 /* destroyed after successful copy_overwrite */
9591 copy = vm_map_copy_allocate();
9592 copy->type = VM_MAP_COPY_ENTRY_LIST;
9593 copy->offset = new_offset;
9594 copy->cpy_hdr.page_shift = copy_page_shift;
9595
9596 /*
9597 * XXX FBDP
9598 * this does not seem to deal with
9599 * the VM map store (R&B tree)
9600 */
9601
9602 total_size -= copy_size;
9603 copy_size = 0;
9604 /* put back remainder of copy in container */
9605 if (next_copy != NULL) {
9606 copy->cpy_hdr.nentries = remaining_entries;
9607 copy->cpy_hdr.links.next = next_copy;
9608 copy->cpy_hdr.links.prev = previous_prev;
9609 copy->size = total_size;
9610 next_copy->vme_prev =
9611 vm_map_copy_to_entry(copy);
9612 next_copy = NULL;
9613 }
9614 base_addr = local_end;
9615 vm_map_lock(dst_map);
9616 if (!vm_map_lookup_entry(dst_map,
9617 local_end, &tmp_entry)) {
9618 vm_map_unlock(dst_map);
9619 return KERN_INVALID_ADDRESS;
9620 }
9621 entry = tmp_entry;
9622 continue;
9623 }
9624 if (dst_end <= entry->vme_end) {
9625 copy_size = dst_end - base_addr;
9626 break;
9627 }
9628
9629 if ((next == vm_map_to_entry(dst_map)) ||
9630 (next->vme_start != entry->vme_end)) {
9631 vm_map_unlock(dst_map);
9632 return KERN_INVALID_ADDRESS;
9633 }
9634
9635 entry = next;
9636 }/* for */
9637
9638 next_copy = NULL;
9639 nentries = 1;
9640
9641 /* adjust the copy object */
9642 if (total_size > copy_size) {
9643 vm_map_size_t local_size = 0;
9644 vm_map_size_t entry_size;
9645
9646 new_offset = copy->offset;
9647 copy_entry = vm_map_copy_first_entry(copy);
9648 while (copy_entry != vm_map_copy_to_entry(copy)) {
9649 entry_size = copy_entry->vme_end -
9650 copy_entry->vme_start;
9651 if ((local_size < copy_size) &&
9652 ((local_size + entry_size)
9653 >= copy_size)) {
9654 vm_map_copy_clip_end(copy, copy_entry,
9655 copy_entry->vme_start +
9656 (copy_size - local_size));
9657 entry_size = copy_entry->vme_end -
9658 copy_entry->vme_start;
9659 local_size += entry_size;
9660 new_offset += entry_size;
9661 }
9662 if (local_size >= copy_size) {
9663 next_copy = copy_entry->vme_next;
9664 copy_entry->vme_next =
9665 vm_map_copy_to_entry(copy);
9666 previous_prev =
9667 copy->cpy_hdr.links.prev;
9668 copy->cpy_hdr.links.prev = copy_entry;
9669 copy->size = copy_size;
9670 remaining_entries =
9671 copy->cpy_hdr.nentries;
9672 remaining_entries -= nentries;
9673 copy->cpy_hdr.nentries = nentries;
9674 break;
9675 } else {
9676 local_size += entry_size;
9677 new_offset += entry_size;
9678 nentries++;
9679 }
9680 copy_entry = copy_entry->vme_next;
9681 }
9682 }
9683
9684 if (aligned) {
9685 pmap_t local_pmap;
9686
9687 if (pmap) {
9688 local_pmap = pmap;
9689 } else {
9690 local_pmap = dst_map->pmap;
9691 }
9692
9693 if ((kr = vm_map_copy_overwrite_aligned(
9694 dst_map, tmp_entry, copy,
9695 base_addr, local_pmap)) != KERN_SUCCESS) {
9696 if (next_copy != NULL) {
9697 copy->cpy_hdr.nentries +=
9698 remaining_entries;
9699 copy->cpy_hdr.links.prev->vme_next =
9700 next_copy;
9701 copy->cpy_hdr.links.prev =
9702 previous_prev;
9703 copy->size += copy_size;
9704 }
9705 return kr;
9706 }
9707 vm_map_unlock(dst_map);
9708 } else {
9709 /*
9710 * Performance gain:
9711 *
9712 * if the copy and dst address are misaligned but the same
9713 * offset within the page we can copy_not_aligned the
9714 * misaligned parts and copy aligned the rest. If they are
9715 * aligned but len is unaligned we simply need to copy
9716 * the end bit unaligned. We'll need to split the misaligned
9717 * bits of the region in this case !
9718 */
9719 /* ALWAYS UNLOCKS THE dst_map MAP */
9720 kr = vm_map_copy_overwrite_unaligned(
9721 dst_map,
9722 tmp_entry,
9723 copy,
9724 base_addr,
9725 discard_on_success);
9726 if (kr != KERN_SUCCESS) {
9727 if (next_copy != NULL) {
9728 copy->cpy_hdr.nentries +=
9729 remaining_entries;
9730 copy->cpy_hdr.links.prev->vme_next =
9731 next_copy;
9732 copy->cpy_hdr.links.prev =
9733 previous_prev;
9734 copy->size += copy_size;
9735 }
9736 return kr;
9737 }
9738 }
9739 total_size -= copy_size;
9740 if (total_size == 0) {
9741 break;
9742 }
9743 base_addr += copy_size;
9744 copy_size = 0;
9745 copy->offset = new_offset;
9746 if (next_copy != NULL) {
9747 copy->cpy_hdr.nentries = remaining_entries;
9748 copy->cpy_hdr.links.next = next_copy;
9749 copy->cpy_hdr.links.prev = previous_prev;
9750 next_copy->vme_prev = vm_map_copy_to_entry(copy);
9751 copy->size = total_size;
9752 }
9753 vm_map_lock(dst_map);
9754 while (TRUE) {
9755 if (!vm_map_lookup_entry(dst_map,
9756 base_addr, &tmp_entry)) {
9757 vm_map_unlock(dst_map);
9758 return KERN_INVALID_ADDRESS;
9759 }
9760 if (tmp_entry->in_transition) {
9761 entry->needs_wakeup = TRUE;
9762 vm_map_entry_wait(dst_map, THREAD_UNINT);
9763 } else {
9764 break;
9765 }
9766 }
9767 vm_map_clip_start(dst_map,
9768 tmp_entry,
9769 vm_map_trunc_page(base_addr,
9770 VM_MAP_PAGE_MASK(dst_map)));
9771
9772 entry = tmp_entry;
9773 } /* while */
9774
9775 /*
9776 * Throw away the vm_map_copy object
9777 */
9778 if (discard_on_success) {
9779 vm_map_copy_discard(copy);
9780 }
9781
9782 return KERN_SUCCESS;
9783 }/* vm_map_copy_overwrite */
9784
9785 kern_return_t
vm_map_copy_overwrite(vm_map_t dst_map,vm_map_offset_t dst_addr,vm_map_copy_t copy,vm_map_size_t copy_size,boolean_t interruptible)9786 vm_map_copy_overwrite(
9787 vm_map_t dst_map,
9788 vm_map_offset_t dst_addr,
9789 vm_map_copy_t copy,
9790 vm_map_size_t copy_size,
9791 boolean_t interruptible)
9792 {
9793 vm_map_size_t head_size, tail_size;
9794 vm_map_copy_t head_copy, tail_copy;
9795 vm_map_offset_t head_addr, tail_addr;
9796 vm_map_entry_t entry;
9797 kern_return_t kr;
9798 vm_map_offset_t effective_page_mask, effective_page_size;
9799 uint16_t copy_page_shift;
9800
9801 head_size = 0;
9802 tail_size = 0;
9803 head_copy = NULL;
9804 tail_copy = NULL;
9805 head_addr = 0;
9806 tail_addr = 0;
9807
9808 /*
9809 * Check for null copy object.
9810 */
9811 if (copy == VM_MAP_COPY_NULL) {
9812 return KERN_SUCCESS;
9813 }
9814
9815 /*
9816 * Assert that the vm_map_copy is coming from the right
9817 * zone and hasn't been forged
9818 */
9819 vm_map_copy_require(copy);
9820
9821 if (interruptible ||
9822 copy->type != VM_MAP_COPY_ENTRY_LIST) {
9823 /*
9824 * We can't split the "copy" map if we're interruptible
9825 * or if we don't have a "copy" map...
9826 */
9827 blunt_copy:
9828 return vm_map_copy_overwrite_nested(dst_map,
9829 dst_addr,
9830 copy,
9831 interruptible,
9832 (pmap_t) NULL,
9833 TRUE);
9834 }
9835
9836 copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy);
9837 if (copy_page_shift < PAGE_SHIFT ||
9838 VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
9839 goto blunt_copy;
9840 }
9841
9842 if (VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
9843 effective_page_mask = VM_MAP_PAGE_MASK(dst_map);
9844 } else {
9845 effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
9846 effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
9847 effective_page_mask);
9848 }
9849 effective_page_size = effective_page_mask + 1;
9850
9851 if (copy_size < VM_MAP_COPY_OVERWRITE_OPTIMIZATION_THRESHOLD_PAGES * effective_page_size) {
9852 /*
9853 * Too small to bother with optimizing...
9854 */
9855 goto blunt_copy;
9856 }
9857
9858 if ((dst_addr & effective_page_mask) !=
9859 (copy->offset & effective_page_mask)) {
9860 /*
9861 * Incompatible mis-alignment of source and destination...
9862 */
9863 goto blunt_copy;
9864 }
9865
9866 /*
9867 * Proper alignment or identical mis-alignment at the beginning.
9868 * Let's try and do a small unaligned copy first (if needed)
9869 * and then an aligned copy for the rest.
9870 */
9871 if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
9872 head_addr = dst_addr;
9873 head_size = (effective_page_size -
9874 (copy->offset & effective_page_mask));
9875 head_size = MIN(head_size, copy_size);
9876 }
9877 if (!vm_map_page_aligned(copy->offset + copy_size,
9878 effective_page_mask)) {
9879 /*
9880 * Mis-alignment at the end.
9881 * Do an aligned copy up to the last page and
9882 * then an unaligned copy for the remaining bytes.
9883 */
9884 tail_size = ((copy->offset + copy_size) &
9885 effective_page_mask);
9886 tail_size = MIN(tail_size, copy_size);
9887 tail_addr = dst_addr + copy_size - tail_size;
9888 assert(tail_addr >= head_addr + head_size);
9889 }
9890 assert(head_size + tail_size <= copy_size);
9891
9892 if (head_size + tail_size == copy_size) {
9893 /*
9894 * It's all unaligned, no optimization possible...
9895 */
9896 goto blunt_copy;
9897 }
9898
9899 /*
9900 * Can't optimize if there are any submaps in the
9901 * destination due to the way we free the "copy" map
9902 * progressively in vm_map_copy_overwrite_nested()
9903 * in that case.
9904 */
9905 vm_map_lock_read(dst_map);
9906 if (!vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
9907 vm_map_unlock_read(dst_map);
9908 goto blunt_copy;
9909 }
9910 for (;
9911 (entry != vm_map_to_entry(dst_map) &&
9912 entry->vme_start < dst_addr + copy_size);
9913 entry = entry->vme_next) {
9914 if (entry->is_sub_map) {
9915 vm_map_unlock_read(dst_map);
9916 goto blunt_copy;
9917 }
9918 }
9919 vm_map_unlock_read(dst_map);
9920
9921 if (head_size) {
9922 /*
9923 * Unaligned copy of the first "head_size" bytes, to reach
9924 * a page boundary.
9925 */
9926
9927 /*
9928 * Extract "head_copy" out of "copy".
9929 */
9930 head_copy = vm_map_copy_allocate();
9931 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
9932 head_copy->cpy_hdr.entries_pageable =
9933 copy->cpy_hdr.entries_pageable;
9934 vm_map_store_init(&head_copy->cpy_hdr);
9935 head_copy->cpy_hdr.page_shift = copy_page_shift;
9936
9937 entry = vm_map_copy_first_entry(copy);
9938 if (entry->vme_end < copy->offset + head_size) {
9939 head_size = entry->vme_end - copy->offset;
9940 }
9941
9942 head_copy->offset = copy->offset;
9943 head_copy->size = head_size;
9944 copy->offset += head_size;
9945 copy->size -= head_size;
9946 copy_size -= head_size;
9947 assert(copy_size > 0);
9948
9949 vm_map_copy_clip_end(copy, entry, copy->offset);
9950 vm_map_copy_entry_unlink(copy, entry);
9951 vm_map_copy_entry_link(head_copy,
9952 vm_map_copy_to_entry(head_copy),
9953 entry);
9954
9955 /*
9956 * Do the unaligned copy.
9957 */
9958 kr = vm_map_copy_overwrite_nested(dst_map,
9959 head_addr,
9960 head_copy,
9961 interruptible,
9962 (pmap_t) NULL,
9963 FALSE);
9964 if (kr != KERN_SUCCESS) {
9965 goto done;
9966 }
9967 }
9968
9969 if (tail_size) {
9970 /*
9971 * Extract "tail_copy" out of "copy".
9972 */
9973 tail_copy = vm_map_copy_allocate();
9974 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
9975 tail_copy->cpy_hdr.entries_pageable =
9976 copy->cpy_hdr.entries_pageable;
9977 vm_map_store_init(&tail_copy->cpy_hdr);
9978 tail_copy->cpy_hdr.page_shift = copy_page_shift;
9979
9980 tail_copy->offset = copy->offset + copy_size - tail_size;
9981 tail_copy->size = tail_size;
9982
9983 copy->size -= tail_size;
9984 copy_size -= tail_size;
9985 assert(copy_size > 0);
9986
9987 entry = vm_map_copy_last_entry(copy);
9988 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
9989 entry = vm_map_copy_last_entry(copy);
9990 vm_map_copy_entry_unlink(copy, entry);
9991 vm_map_copy_entry_link(tail_copy,
9992 vm_map_copy_last_entry(tail_copy),
9993 entry);
9994 }
9995
9996 /*
9997 * If we are here from ipc_kmsg_copyout_ool_descriptor(),
9998 * we want to avoid TOCTOU issues w.r.t copy->size but
9999 * we don't need to change vm_map_copy_overwrite_nested()
10000 * and all other vm_map_copy_overwrite variants.
10001 *
10002 * So we assign the original copy_size that was passed into
10003 * this routine back to copy.
10004 *
10005 * This use of local 'copy_size' passed into this routine is
10006 * to try and protect against TOCTOU attacks where the kernel
10007 * has been exploited. We don't expect this to be an issue
10008 * during normal system operation.
10009 */
10010 assertf(copy->size == copy_size,
10011 "Mismatch of copy sizes. Expected 0x%llx, Got 0x%llx\n", (uint64_t) copy_size, (uint64_t) copy->size);
10012 copy->size = copy_size;
10013
10014 /*
10015 * Copy most (or possibly all) of the data.
10016 */
10017 kr = vm_map_copy_overwrite_nested(dst_map,
10018 dst_addr + head_size,
10019 copy,
10020 interruptible,
10021 (pmap_t) NULL,
10022 FALSE);
10023 if (kr != KERN_SUCCESS) {
10024 goto done;
10025 }
10026
10027 if (tail_size) {
10028 kr = vm_map_copy_overwrite_nested(dst_map,
10029 tail_addr,
10030 tail_copy,
10031 interruptible,
10032 (pmap_t) NULL,
10033 FALSE);
10034 }
10035
10036 done:
10037 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
10038 if (kr == KERN_SUCCESS) {
10039 /*
10040 * Discard all the copy maps.
10041 */
10042 if (head_copy) {
10043 vm_map_copy_discard(head_copy);
10044 head_copy = NULL;
10045 }
10046 vm_map_copy_discard(copy);
10047 if (tail_copy) {
10048 vm_map_copy_discard(tail_copy);
10049 tail_copy = NULL;
10050 }
10051 } else {
10052 /*
10053 * Re-assemble the original copy map.
10054 */
10055 if (head_copy) {
10056 entry = vm_map_copy_first_entry(head_copy);
10057 vm_map_copy_entry_unlink(head_copy, entry);
10058 vm_map_copy_entry_link(copy,
10059 vm_map_copy_to_entry(copy),
10060 entry);
10061 copy->offset -= head_size;
10062 copy->size += head_size;
10063 vm_map_copy_discard(head_copy);
10064 head_copy = NULL;
10065 }
10066 if (tail_copy) {
10067 entry = vm_map_copy_last_entry(tail_copy);
10068 vm_map_copy_entry_unlink(tail_copy, entry);
10069 vm_map_copy_entry_link(copy,
10070 vm_map_copy_last_entry(copy),
10071 entry);
10072 copy->size += tail_size;
10073 vm_map_copy_discard(tail_copy);
10074 tail_copy = NULL;
10075 }
10076 }
10077 return kr;
10078 }
10079
10080
10081 /*
10082 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
10083 *
10084 * Decription:
10085 * Physically copy unaligned data
10086 *
10087 * Implementation:
10088 * Unaligned parts of pages have to be physically copied. We use
10089 * a modified form of vm_fault_copy (which understands none-aligned
10090 * page offsets and sizes) to do the copy. We attempt to copy as
10091 * much memory in one go as possibly, however vm_fault_copy copies
10092 * within 1 memory object so we have to find the smaller of "amount left"
10093 * "source object data size" and "target object data size". With
10094 * unaligned data we don't need to split regions, therefore the source
10095 * (copy) object should be one map entry, the target range may be split
10096 * over multiple map entries however. In any event we are pessimistic
10097 * about these assumptions.
10098 *
10099 * Callers of this function must call vm_map_copy_require on
10100 * previously created vm_map_copy_t or pass a newly created
10101 * one to ensure that it hasn't been forged.
10102 *
10103 * Assumptions:
10104 * dst_map is locked on entry and is return locked on success,
10105 * unlocked on error.
10106 */
10107
10108 static kern_return_t
vm_map_copy_overwrite_unaligned(vm_map_t dst_map,vm_map_entry_t entry,vm_map_copy_t copy,vm_map_offset_t start,boolean_t discard_on_success)10109 vm_map_copy_overwrite_unaligned(
10110 vm_map_t dst_map,
10111 vm_map_entry_t entry,
10112 vm_map_copy_t copy,
10113 vm_map_offset_t start,
10114 boolean_t discard_on_success)
10115 {
10116 vm_map_entry_t copy_entry;
10117 vm_map_entry_t copy_entry_next;
10118 vm_map_version_t version;
10119 vm_object_t dst_object;
10120 vm_object_offset_t dst_offset;
10121 vm_object_offset_t src_offset;
10122 vm_object_offset_t entry_offset;
10123 vm_map_offset_t entry_end;
10124 vm_map_size_t src_size,
10125 dst_size,
10126 copy_size,
10127 amount_left;
10128 kern_return_t kr = KERN_SUCCESS;
10129
10130
10131 copy_entry = vm_map_copy_first_entry(copy);
10132
10133 vm_map_lock_write_to_read(dst_map);
10134
10135 src_offset = copy->offset - trunc_page_mask_64(copy->offset, VM_MAP_COPY_PAGE_MASK(copy));
10136 amount_left = copy->size;
10137 /*
10138 * unaligned so we never clipped this entry, we need the offset into
10139 * the vm_object not just the data.
10140 */
10141 while (amount_left > 0) {
10142 if (entry == vm_map_to_entry(dst_map)) {
10143 vm_map_unlock_read(dst_map);
10144 return KERN_INVALID_ADDRESS;
10145 }
10146
10147 /* "start" must be within the current map entry */
10148 assert((start >= entry->vme_start) && (start < entry->vme_end));
10149
10150 dst_offset = start - entry->vme_start;
10151
10152 dst_size = entry->vme_end - start;
10153
10154 src_size = copy_entry->vme_end -
10155 (copy_entry->vme_start + src_offset);
10156
10157 if (dst_size < src_size) {
10158 /*
10159 * we can only copy dst_size bytes before
10160 * we have to get the next destination entry
10161 */
10162 copy_size = dst_size;
10163 } else {
10164 /*
10165 * we can only copy src_size bytes before
10166 * we have to get the next source copy entry
10167 */
10168 copy_size = src_size;
10169 }
10170
10171 if (copy_size > amount_left) {
10172 copy_size = amount_left;
10173 }
10174 /*
10175 * Entry needs copy, create a shadow shadow object for
10176 * Copy on write region.
10177 */
10178 if (entry->needs_copy &&
10179 ((entry->protection & VM_PROT_WRITE) != 0)) {
10180 if (vm_map_lock_read_to_write(dst_map)) {
10181 vm_map_lock_read(dst_map);
10182 goto RetryLookup;
10183 }
10184 VME_OBJECT_SHADOW(entry,
10185 (vm_map_size_t)(entry->vme_end
10186 - entry->vme_start),
10187 vm_map_always_shadow(dst_map));
10188 entry->needs_copy = FALSE;
10189 vm_map_lock_write_to_read(dst_map);
10190 }
10191 dst_object = VME_OBJECT(entry);
10192 /*
10193 * unlike with the virtual (aligned) copy we're going
10194 * to fault on it therefore we need a target object.
10195 */
10196 if (dst_object == VM_OBJECT_NULL) {
10197 if (vm_map_lock_read_to_write(dst_map)) {
10198 vm_map_lock_read(dst_map);
10199 goto RetryLookup;
10200 }
10201 dst_object = vm_object_allocate((vm_map_size_t)
10202 entry->vme_end - entry->vme_start);
10203 VME_OBJECT_SET(entry, dst_object, false, 0);
10204 VME_OFFSET_SET(entry, 0);
10205 assert(entry->use_pmap);
10206 vm_map_lock_write_to_read(dst_map);
10207 }
10208 /*
10209 * Take an object reference and unlock map. The "entry" may
10210 * disappear or change when the map is unlocked.
10211 */
10212 vm_object_reference(dst_object);
10213 version.main_timestamp = dst_map->timestamp;
10214 entry_offset = VME_OFFSET(entry);
10215 entry_end = entry->vme_end;
10216 vm_map_unlock_read(dst_map);
10217 /*
10218 * Copy as much as possible in one pass
10219 */
10220 kr = vm_fault_copy(
10221 VME_OBJECT(copy_entry),
10222 VME_OFFSET(copy_entry) + src_offset,
10223 ©_size,
10224 dst_object,
10225 entry_offset + dst_offset,
10226 dst_map,
10227 &version,
10228 THREAD_UNINT );
10229
10230 start += copy_size;
10231 src_offset += copy_size;
10232 amount_left -= copy_size;
10233 /*
10234 * Release the object reference
10235 */
10236 vm_object_deallocate(dst_object);
10237 /*
10238 * If a hard error occurred, return it now
10239 */
10240 if (kr != KERN_SUCCESS) {
10241 return kr;
10242 }
10243
10244 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
10245 || amount_left == 0) {
10246 /*
10247 * all done with this copy entry, dispose.
10248 */
10249 copy_entry_next = copy_entry->vme_next;
10250
10251 if (discard_on_success) {
10252 vm_map_copy_entry_unlink(copy, copy_entry);
10253 assert(!copy_entry->is_sub_map);
10254 vm_object_deallocate(VME_OBJECT(copy_entry));
10255 vm_map_copy_entry_dispose(copy_entry);
10256 }
10257
10258 if (copy_entry_next == vm_map_copy_to_entry(copy) &&
10259 amount_left) {
10260 /*
10261 * not finished copying but run out of source
10262 */
10263 return KERN_INVALID_ADDRESS;
10264 }
10265
10266 copy_entry = copy_entry_next;
10267
10268 src_offset = 0;
10269 }
10270
10271 if (amount_left == 0) {
10272 return KERN_SUCCESS;
10273 }
10274
10275 vm_map_lock_read(dst_map);
10276 if (version.main_timestamp == dst_map->timestamp) {
10277 if (start == entry_end) {
10278 /*
10279 * destination region is split. Use the version
10280 * information to avoid a lookup in the normal
10281 * case.
10282 */
10283 entry = entry->vme_next;
10284 /*
10285 * should be contiguous. Fail if we encounter
10286 * a hole in the destination.
10287 */
10288 if (start != entry->vme_start) {
10289 vm_map_unlock_read(dst_map);
10290 return KERN_INVALID_ADDRESS;
10291 }
10292 }
10293 } else {
10294 /*
10295 * Map version check failed.
10296 * we must lookup the entry because somebody
10297 * might have changed the map behind our backs.
10298 */
10299 RetryLookup:
10300 if (!vm_map_lookup_entry(dst_map, start, &entry)) {
10301 vm_map_unlock_read(dst_map);
10302 return KERN_INVALID_ADDRESS;
10303 }
10304 }
10305 }/* while */
10306
10307 return KERN_SUCCESS;
10308 }/* vm_map_copy_overwrite_unaligned */
10309
10310 /*
10311 * Routine: vm_map_copy_overwrite_aligned [internal use only]
10312 *
10313 * Description:
10314 * Does all the vm_trickery possible for whole pages.
10315 *
10316 * Implementation:
10317 *
10318 * If there are no permanent objects in the destination,
10319 * and the source and destination map entry zones match,
10320 * and the destination map entry is not shared,
10321 * then the map entries can be deleted and replaced
10322 * with those from the copy. The following code is the
10323 * basic idea of what to do, but there are lots of annoying
10324 * little details about getting protection and inheritance
10325 * right. Should add protection, inheritance, and sharing checks
10326 * to the above pass and make sure that no wiring is involved.
10327 *
10328 * Callers of this function must call vm_map_copy_require on
10329 * previously created vm_map_copy_t or pass a newly created
10330 * one to ensure that it hasn't been forged.
10331 */
10332
10333 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
10334 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
10335 int vm_map_copy_overwrite_aligned_src_large = 0;
10336
10337 static kern_return_t
vm_map_copy_overwrite_aligned(vm_map_t dst_map,vm_map_entry_t tmp_entry,vm_map_copy_t copy,vm_map_offset_t start,__unused pmap_t pmap)10338 vm_map_copy_overwrite_aligned(
10339 vm_map_t dst_map,
10340 vm_map_entry_t tmp_entry,
10341 vm_map_copy_t copy,
10342 vm_map_offset_t start,
10343 __unused pmap_t pmap)
10344 {
10345 vm_object_t object;
10346 vm_map_entry_t copy_entry;
10347 vm_map_size_t copy_size;
10348 vm_map_size_t size;
10349 vm_map_entry_t entry;
10350
10351 while ((copy_entry = vm_map_copy_first_entry(copy))
10352 != vm_map_copy_to_entry(copy)) {
10353 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
10354
10355 entry = tmp_entry;
10356 if (entry->is_sub_map) {
10357 /* unnested when clipped earlier */
10358 assert(!entry->use_pmap);
10359 }
10360 if (entry == vm_map_to_entry(dst_map)) {
10361 vm_map_unlock(dst_map);
10362 return KERN_INVALID_ADDRESS;
10363 }
10364 size = (entry->vme_end - entry->vme_start);
10365 /*
10366 * Make sure that no holes popped up in the
10367 * address map, and that the protection is
10368 * still valid, in case the map was unlocked
10369 * earlier.
10370 */
10371
10372 if ((entry->vme_start != start) || ((entry->is_sub_map)
10373 && !entry->needs_copy)) {
10374 vm_map_unlock(dst_map);
10375 return KERN_INVALID_ADDRESS;
10376 }
10377 assert(entry != vm_map_to_entry(dst_map));
10378
10379 /*
10380 * Check protection again
10381 */
10382
10383 if (!(entry->protection & VM_PROT_WRITE)) {
10384 vm_map_unlock(dst_map);
10385 return KERN_PROTECTION_FAILURE;
10386 }
10387
10388 if (!vm_map_entry_is_overwritable(dst_map, entry)) {
10389 vm_map_unlock(dst_map);
10390 return KERN_PROTECTION_FAILURE;
10391 }
10392
10393 /*
10394 * Adjust to source size first
10395 */
10396
10397 if (copy_size < size) {
10398 if (entry->map_aligned &&
10399 !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
10400 VM_MAP_PAGE_MASK(dst_map))) {
10401 /* no longer map-aligned */
10402 entry->map_aligned = FALSE;
10403 }
10404 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
10405 size = copy_size;
10406 }
10407
10408 /*
10409 * Adjust to destination size
10410 */
10411
10412 if (size < copy_size) {
10413 vm_map_copy_clip_end(copy, copy_entry,
10414 copy_entry->vme_start + size);
10415 copy_size = size;
10416 }
10417
10418 assert((entry->vme_end - entry->vme_start) == size);
10419 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
10420 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
10421
10422 /*
10423 * If the destination contains temporary unshared memory,
10424 * we can perform the copy by throwing it away and
10425 * installing the source data.
10426 */
10427
10428 object = VME_OBJECT(entry);
10429 if ((!entry->is_shared &&
10430 ((object == VM_OBJECT_NULL) ||
10431 (object->internal && !object->true_share))) ||
10432 entry->needs_copy) {
10433 vm_object_t old_object = VME_OBJECT(entry);
10434 vm_object_offset_t old_offset = VME_OFFSET(entry);
10435 vm_object_offset_t offset;
10436
10437 /*
10438 * Ensure that the source and destination aren't
10439 * identical
10440 */
10441 if (old_object == VME_OBJECT(copy_entry) &&
10442 old_offset == VME_OFFSET(copy_entry)) {
10443 vm_map_copy_entry_unlink(copy, copy_entry);
10444 vm_map_copy_entry_dispose(copy_entry);
10445
10446 if (old_object != VM_OBJECT_NULL) {
10447 vm_object_deallocate(old_object);
10448 }
10449
10450 start = tmp_entry->vme_end;
10451 tmp_entry = tmp_entry->vme_next;
10452 continue;
10453 }
10454
10455 #if XNU_TARGET_OS_OSX
10456 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
10457 #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
10458 if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
10459 VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
10460 copy_size <= __TRADEOFF1_COPY_SIZE) {
10461 /*
10462 * Virtual vs. Physical copy tradeoff #1.
10463 *
10464 * Copying only a few pages out of a large
10465 * object: do a physical copy instead of
10466 * a virtual copy, to avoid possibly keeping
10467 * the entire large object alive because of
10468 * those few copy-on-write pages.
10469 */
10470 vm_map_copy_overwrite_aligned_src_large++;
10471 goto slow_copy;
10472 }
10473 #endif /* XNU_TARGET_OS_OSX */
10474
10475 if ((dst_map->pmap != kernel_pmap) &&
10476 (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
10477 (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_MEDIUM)) {
10478 vm_object_t new_object, new_shadow;
10479
10480 /*
10481 * We're about to map something over a mapping
10482 * established by malloc()...
10483 */
10484 new_object = VME_OBJECT(copy_entry);
10485 if (new_object != VM_OBJECT_NULL) {
10486 vm_object_lock_shared(new_object);
10487 }
10488 while (new_object != VM_OBJECT_NULL &&
10489 #if XNU_TARGET_OS_OSX
10490 !new_object->true_share &&
10491 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10492 #endif /* XNU_TARGET_OS_OSX */
10493 new_object->internal) {
10494 new_shadow = new_object->shadow;
10495 if (new_shadow == VM_OBJECT_NULL) {
10496 break;
10497 }
10498 vm_object_lock_shared(new_shadow);
10499 vm_object_unlock(new_object);
10500 new_object = new_shadow;
10501 }
10502 if (new_object != VM_OBJECT_NULL) {
10503 if (!new_object->internal) {
10504 /*
10505 * The new mapping is backed
10506 * by an external object. We
10507 * don't want malloc'ed memory
10508 * to be replaced with such a
10509 * non-anonymous mapping, so
10510 * let's go off the optimized
10511 * path...
10512 */
10513 vm_map_copy_overwrite_aligned_src_not_internal++;
10514 vm_object_unlock(new_object);
10515 goto slow_copy;
10516 }
10517 #if XNU_TARGET_OS_OSX
10518 if (new_object->true_share ||
10519 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
10520 /*
10521 * Same if there's a "true_share"
10522 * object in the shadow chain, or
10523 * an object with a non-default
10524 * (SYMMETRIC) copy strategy.
10525 */
10526 vm_map_copy_overwrite_aligned_src_not_symmetric++;
10527 vm_object_unlock(new_object);
10528 goto slow_copy;
10529 }
10530 #endif /* XNU_TARGET_OS_OSX */
10531 vm_object_unlock(new_object);
10532 }
10533 /*
10534 * The new mapping is still backed by
10535 * anonymous (internal) memory, so it's
10536 * OK to substitute it for the original
10537 * malloc() mapping.
10538 */
10539 }
10540
10541 if (old_object != VM_OBJECT_NULL) {
10542 assert(!entry->vme_permanent);
10543 if (entry->is_sub_map) {
10544 if (entry->use_pmap) {
10545 #ifndef NO_NESTED_PMAP
10546 pmap_unnest(dst_map->pmap,
10547 (addr64_t)entry->vme_start,
10548 entry->vme_end - entry->vme_start);
10549 #endif /* NO_NESTED_PMAP */
10550 if (dst_map->mapped_in_other_pmaps) {
10551 /* clean up parent */
10552 /* map/maps */
10553 vm_map_submap_pmap_clean(
10554 dst_map, entry->vme_start,
10555 entry->vme_end,
10556 VME_SUBMAP(entry),
10557 VME_OFFSET(entry));
10558 }
10559 } else {
10560 vm_map_submap_pmap_clean(
10561 dst_map, entry->vme_start,
10562 entry->vme_end,
10563 VME_SUBMAP(entry),
10564 VME_OFFSET(entry));
10565 }
10566 vm_map_deallocate(VME_SUBMAP(entry));
10567 } else {
10568 if (dst_map->mapped_in_other_pmaps) {
10569 vm_object_pmap_protect_options(
10570 VME_OBJECT(entry),
10571 VME_OFFSET(entry),
10572 entry->vme_end
10573 - entry->vme_start,
10574 PMAP_NULL,
10575 PAGE_SIZE,
10576 entry->vme_start,
10577 VM_PROT_NONE,
10578 PMAP_OPTIONS_REMOVE);
10579 } else {
10580 pmap_remove_options(
10581 dst_map->pmap,
10582 (addr64_t)(entry->vme_start),
10583 (addr64_t)(entry->vme_end),
10584 PMAP_OPTIONS_REMOVE);
10585 }
10586 vm_object_deallocate(old_object);
10587 }
10588 }
10589
10590 if (entry->iokit_acct) {
10591 /* keep using iokit accounting */
10592 entry->use_pmap = FALSE;
10593 } else {
10594 /* use pmap accounting */
10595 entry->use_pmap = TRUE;
10596 }
10597 assert(!entry->vme_permanent);
10598 VME_OBJECT_SET(entry, VME_OBJECT(copy_entry), false, 0);
10599 object = VME_OBJECT(entry);
10600 entry->needs_copy = copy_entry->needs_copy;
10601 entry->wired_count = 0;
10602 entry->user_wired_count = 0;
10603 offset = VME_OFFSET(copy_entry);
10604 VME_OFFSET_SET(entry, offset);
10605
10606 vm_map_copy_entry_unlink(copy, copy_entry);
10607 vm_map_copy_entry_dispose(copy_entry);
10608
10609 /*
10610 * we could try to push pages into the pmap at this point, BUT
10611 * this optimization only saved on average 2 us per page if ALL
10612 * the pages in the source were currently mapped
10613 * and ALL the pages in the dest were touched, if there were fewer
10614 * than 2/3 of the pages touched, this optimization actually cost more cycles
10615 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
10616 */
10617
10618 /*
10619 * Set up for the next iteration. The map
10620 * has not been unlocked, so the next
10621 * address should be at the end of this
10622 * entry, and the next map entry should be
10623 * the one following it.
10624 */
10625
10626 start = tmp_entry->vme_end;
10627 tmp_entry = tmp_entry->vme_next;
10628 } else {
10629 vm_map_version_t version;
10630 vm_object_t dst_object;
10631 vm_object_offset_t dst_offset;
10632 kern_return_t r;
10633
10634 slow_copy:
10635 if (entry->needs_copy) {
10636 VME_OBJECT_SHADOW(entry,
10637 (entry->vme_end -
10638 entry->vme_start),
10639 vm_map_always_shadow(dst_map));
10640 entry->needs_copy = FALSE;
10641 }
10642
10643 dst_object = VME_OBJECT(entry);
10644 dst_offset = VME_OFFSET(entry);
10645
10646 /*
10647 * Take an object reference, and record
10648 * the map version information so that the
10649 * map can be safely unlocked.
10650 */
10651
10652 if (dst_object == VM_OBJECT_NULL) {
10653 /*
10654 * We would usually have just taken the
10655 * optimized path above if the destination
10656 * object has not been allocated yet. But we
10657 * now disable that optimization if the copy
10658 * entry's object is not backed by anonymous
10659 * memory to avoid replacing malloc'ed
10660 * (i.e. re-usable) anonymous memory with a
10661 * not-so-anonymous mapping.
10662 * So we have to handle this case here and
10663 * allocate a new VM object for this map entry.
10664 */
10665 dst_object = vm_object_allocate(
10666 entry->vme_end - entry->vme_start);
10667 dst_offset = 0;
10668 VME_OBJECT_SET(entry, dst_object, false, 0);
10669 VME_OFFSET_SET(entry, dst_offset);
10670 assert(entry->use_pmap);
10671 }
10672
10673 vm_object_reference(dst_object);
10674
10675 /* account for unlock bumping up timestamp */
10676 version.main_timestamp = dst_map->timestamp + 1;
10677
10678 vm_map_unlock(dst_map);
10679
10680 /*
10681 * Copy as much as possible in one pass
10682 */
10683
10684 copy_size = size;
10685 r = vm_fault_copy(
10686 VME_OBJECT(copy_entry),
10687 VME_OFFSET(copy_entry),
10688 ©_size,
10689 dst_object,
10690 dst_offset,
10691 dst_map,
10692 &version,
10693 THREAD_UNINT );
10694
10695 /*
10696 * Release the object reference
10697 */
10698
10699 vm_object_deallocate(dst_object);
10700
10701 /*
10702 * If a hard error occurred, return it now
10703 */
10704
10705 if (r != KERN_SUCCESS) {
10706 return r;
10707 }
10708
10709 if (copy_size != 0) {
10710 /*
10711 * Dispose of the copied region
10712 */
10713
10714 vm_map_copy_clip_end(copy, copy_entry,
10715 copy_entry->vme_start + copy_size);
10716 vm_map_copy_entry_unlink(copy, copy_entry);
10717 vm_object_deallocate(VME_OBJECT(copy_entry));
10718 vm_map_copy_entry_dispose(copy_entry);
10719 }
10720
10721 /*
10722 * Pick up in the destination map where we left off.
10723 *
10724 * Use the version information to avoid a lookup
10725 * in the normal case.
10726 */
10727
10728 start += copy_size;
10729 vm_map_lock(dst_map);
10730 if (version.main_timestamp == dst_map->timestamp &&
10731 copy_size != 0) {
10732 /* We can safely use saved tmp_entry value */
10733
10734 if (tmp_entry->map_aligned &&
10735 !VM_MAP_PAGE_ALIGNED(
10736 start,
10737 VM_MAP_PAGE_MASK(dst_map))) {
10738 /* no longer map-aligned */
10739 tmp_entry->map_aligned = FALSE;
10740 }
10741 vm_map_clip_end(dst_map, tmp_entry, start);
10742 tmp_entry = tmp_entry->vme_next;
10743 } else {
10744 /* Must do lookup of tmp_entry */
10745
10746 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
10747 vm_map_unlock(dst_map);
10748 return KERN_INVALID_ADDRESS;
10749 }
10750 if (tmp_entry->map_aligned &&
10751 !VM_MAP_PAGE_ALIGNED(
10752 start,
10753 VM_MAP_PAGE_MASK(dst_map))) {
10754 /* no longer map-aligned */
10755 tmp_entry->map_aligned = FALSE;
10756 }
10757 vm_map_clip_start(dst_map, tmp_entry, start);
10758 }
10759 }
10760 }/* while */
10761
10762 return KERN_SUCCESS;
10763 }/* vm_map_copy_overwrite_aligned */
10764
10765 /*
10766 * Routine: vm_map_copyin_kernel_buffer [internal use only]
10767 *
10768 * Description:
10769 * Copy in data to a kernel buffer from space in the
10770 * source map. The original space may be optionally
10771 * deallocated.
10772 *
10773 * If successful, returns a new copy object.
10774 */
10775 static kern_return_t
vm_map_copyin_kernel_buffer(vm_map_t src_map,vm_map_offset_t src_addr,vm_map_size_t len,boolean_t src_destroy,vm_map_copy_t * copy_result)10776 vm_map_copyin_kernel_buffer(
10777 vm_map_t src_map,
10778 vm_map_offset_t src_addr,
10779 vm_map_size_t len,
10780 boolean_t src_destroy,
10781 vm_map_copy_t *copy_result)
10782 {
10783 kern_return_t kr;
10784 vm_map_copy_t copy;
10785
10786 if (len > msg_ool_size_small) {
10787 return KERN_INVALID_ARGUMENT;
10788 }
10789
10790 copy = zalloc_id(ZONE_ID_VM_MAP_COPY, Z_WAITOK | Z_ZERO | Z_NOFAIL);
10791 copy->cpy_kdata = kalloc_data(len, Z_WAITOK);
10792 if (copy->cpy_kdata == NULL) {
10793 zfree_id(ZONE_ID_VM_MAP_COPY, copy);
10794 return KERN_RESOURCE_SHORTAGE;
10795 }
10796
10797 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
10798 copy->size = len;
10799 copy->offset = 0;
10800
10801 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
10802 if (kr != KERN_SUCCESS) {
10803 kfree_data(copy->cpy_kdata, len);
10804 zfree_id(ZONE_ID_VM_MAP_COPY, copy);
10805 return kr;
10806 }
10807
10808 if (src_destroy) {
10809 vmr_flags_t flags = VM_MAP_REMOVE_INTERRUPTIBLE;
10810
10811 if (src_map == kernel_map) {
10812 flags |= VM_MAP_REMOVE_KUNWIRE;
10813 }
10814
10815 (void)vm_map_remove_guard(src_map,
10816 vm_map_trunc_page(src_addr, VM_MAP_PAGE_MASK(src_map)),
10817 vm_map_round_page(src_addr + len, VM_MAP_PAGE_MASK(src_map)),
10818 flags, KMEM_GUARD_NONE);
10819 }
10820
10821 *copy_result = copy;
10822 return KERN_SUCCESS;
10823 }
10824
10825 /*
10826 * Routine: vm_map_copyout_kernel_buffer [internal use only]
10827 *
10828 * Description:
10829 * Copy out data from a kernel buffer into space in the
10830 * destination map. The space may be otpionally dynamically
10831 * allocated.
10832 *
10833 * If successful, consumes the copy object.
10834 * Otherwise, the caller is responsible for it.
10835 *
10836 * Callers of this function must call vm_map_copy_require on
10837 * previously created vm_map_copy_t or pass a newly created
10838 * one to ensure that it hasn't been forged.
10839 */
10840 static int vm_map_copyout_kernel_buffer_failures = 0;
10841 static kern_return_t
vm_map_copyout_kernel_buffer(vm_map_t map,vm_map_address_t * addr,vm_map_copy_t copy,vm_map_size_t copy_size,boolean_t overwrite,boolean_t consume_on_success)10842 vm_map_copyout_kernel_buffer(
10843 vm_map_t map,
10844 vm_map_address_t *addr, /* IN/OUT */
10845 vm_map_copy_t copy,
10846 vm_map_size_t copy_size,
10847 boolean_t overwrite,
10848 boolean_t consume_on_success)
10849 {
10850 kern_return_t kr = KERN_SUCCESS;
10851 thread_t thread = current_thread();
10852
10853 assert(copy->size == copy_size);
10854
10855 /*
10856 * check for corrupted vm_map_copy structure
10857 */
10858 if (copy_size > msg_ool_size_small || copy->offset) {
10859 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
10860 (long long)copy->size, (long long)copy->offset);
10861 }
10862
10863 if (!overwrite) {
10864 /*
10865 * Allocate space in the target map for the data
10866 */
10867 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
10868
10869 if (map == kernel_map) {
10870 vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
10871 }
10872 *addr = 0;
10873 kr = vm_map_enter(map,
10874 addr,
10875 vm_map_round_page(copy_size,
10876 VM_MAP_PAGE_MASK(map)),
10877 (vm_map_offset_t) 0,
10878 VM_FLAGS_ANYWHERE,
10879 vmk_flags,
10880 VM_KERN_MEMORY_NONE,
10881 VM_OBJECT_NULL,
10882 (vm_object_offset_t) 0,
10883 FALSE,
10884 VM_PROT_DEFAULT,
10885 VM_PROT_ALL,
10886 VM_INHERIT_DEFAULT);
10887 if (kr != KERN_SUCCESS) {
10888 return kr;
10889 }
10890 #if KASAN
10891 if (map->pmap == kernel_pmap) {
10892 kasan_notify_address(*addr, copy->size);
10893 }
10894 #endif
10895 }
10896
10897 /*
10898 * Copyout the data from the kernel buffer to the target map.
10899 */
10900 if (thread->map == map) {
10901 /*
10902 * If the target map is the current map, just do
10903 * the copy.
10904 */
10905 assert((vm_size_t)copy_size == copy_size);
10906 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10907 kr = KERN_INVALID_ADDRESS;
10908 }
10909 } else {
10910 vm_map_t oldmap;
10911
10912 /*
10913 * If the target map is another map, assume the
10914 * target's address space identity for the duration
10915 * of the copy.
10916 */
10917 vm_map_reference(map);
10918 oldmap = vm_map_switch(map);
10919
10920 assert((vm_size_t)copy_size == copy_size);
10921 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10922 vm_map_copyout_kernel_buffer_failures++;
10923 kr = KERN_INVALID_ADDRESS;
10924 }
10925
10926 (void) vm_map_switch(oldmap);
10927 vm_map_deallocate(map);
10928 }
10929
10930 if (kr != KERN_SUCCESS) {
10931 /* the copy failed, clean up */
10932 if (!overwrite) {
10933 /*
10934 * Deallocate the space we allocated in the target map.
10935 */
10936 (void) vm_map_remove(map,
10937 vm_map_trunc_page(*addr,
10938 VM_MAP_PAGE_MASK(map)),
10939 vm_map_round_page((*addr +
10940 vm_map_round_page(copy_size,
10941 VM_MAP_PAGE_MASK(map))),
10942 VM_MAP_PAGE_MASK(map)));
10943 *addr = 0;
10944 }
10945 } else {
10946 /* copy was successful, dicard the copy structure */
10947 if (consume_on_success) {
10948 kfree_data(copy->cpy_kdata, copy_size);
10949 zfree_id(ZONE_ID_VM_MAP_COPY, copy);
10950 }
10951 }
10952
10953 return kr;
10954 }
10955
10956 /*
10957 * Routine: vm_map_copy_insert [internal use only]
10958 *
10959 * Description:
10960 * Link a copy chain ("copy") into a map at the
10961 * specified location (after "where").
10962 *
10963 * Callers of this function must call vm_map_copy_require on
10964 * previously created vm_map_copy_t or pass a newly created
10965 * one to ensure that it hasn't been forged.
10966 * Side effects:
10967 * The copy chain is destroyed.
10968 */
10969 static void
vm_map_copy_insert(vm_map_t map,vm_map_entry_t after_where,vm_map_copy_t copy)10970 vm_map_copy_insert(
10971 vm_map_t map,
10972 vm_map_entry_t after_where,
10973 vm_map_copy_t copy)
10974 {
10975 vm_map_entry_t entry;
10976
10977 while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
10978 entry = vm_map_copy_first_entry(copy);
10979 vm_map_copy_entry_unlink(copy, entry);
10980 vm_map_store_entry_link(map, after_where, entry,
10981 VM_MAP_KERNEL_FLAGS_NONE);
10982 after_where = entry;
10983 }
10984 zfree_id(ZONE_ID_VM_MAP_COPY, copy);
10985 }
10986
10987 /*
10988 * Callers of this function must call vm_map_copy_require on
10989 * previously created vm_map_copy_t or pass a newly created
10990 * one to ensure that it hasn't been forged.
10991 */
10992 void
vm_map_copy_remap(vm_map_t map,vm_map_entry_t where,vm_map_copy_t copy,vm_map_offset_t adjustment,vm_prot_t cur_prot,vm_prot_t max_prot,vm_inherit_t inheritance)10993 vm_map_copy_remap(
10994 vm_map_t map,
10995 vm_map_entry_t where,
10996 vm_map_copy_t copy,
10997 vm_map_offset_t adjustment,
10998 vm_prot_t cur_prot,
10999 vm_prot_t max_prot,
11000 vm_inherit_t inheritance)
11001 {
11002 vm_map_entry_t copy_entry, new_entry;
11003
11004 for (copy_entry = vm_map_copy_first_entry(copy);
11005 copy_entry != vm_map_copy_to_entry(copy);
11006 copy_entry = copy_entry->vme_next) {
11007 /* get a new VM map entry for the map */
11008 new_entry = vm_map_entry_create(map);
11009 /* copy the "copy entry" to the new entry */
11010 vm_map_entry_copy(map, new_entry, copy_entry);
11011 /* adjust "start" and "end" */
11012 new_entry->vme_start += adjustment;
11013 new_entry->vme_end += adjustment;
11014 /* clear some attributes */
11015 new_entry->inheritance = inheritance;
11016 new_entry->protection = cur_prot;
11017 new_entry->max_protection = max_prot;
11018 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
11019 /* take an extra reference on the entry's "object" */
11020 if (new_entry->is_sub_map) {
11021 assert(!new_entry->use_pmap); /* not nested */
11022 vm_map_reference(VME_SUBMAP(new_entry));
11023 } else {
11024 vm_object_reference(VME_OBJECT(new_entry));
11025 }
11026 /* insert the new entry in the map */
11027 vm_map_store_entry_link(map, where, new_entry,
11028 VM_MAP_KERNEL_FLAGS_NONE);
11029 /* continue inserting the "copy entries" after the new entry */
11030 where = new_entry;
11031 }
11032 }
11033
11034
11035 /*
11036 * Returns true if *size matches (or is in the range of) copy->size.
11037 * Upon returning true, the *size field is updated with the actual size of the
11038 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
11039 */
11040 boolean_t
vm_map_copy_validate_size(vm_map_t dst_map,vm_map_copy_t copy,vm_map_size_t * size)11041 vm_map_copy_validate_size(
11042 vm_map_t dst_map,
11043 vm_map_copy_t copy,
11044 vm_map_size_t *size)
11045 {
11046 if (copy == VM_MAP_COPY_NULL) {
11047 return FALSE;
11048 }
11049
11050 /*
11051 * Assert that the vm_map_copy is coming from the right
11052 * zone and hasn't been forged
11053 */
11054 vm_map_copy_require(copy);
11055
11056 vm_map_size_t copy_sz = copy->size;
11057 vm_map_size_t sz = *size;
11058 switch (copy->type) {
11059 case VM_MAP_COPY_OBJECT:
11060 case VM_MAP_COPY_KERNEL_BUFFER:
11061 if (sz == copy_sz) {
11062 return TRUE;
11063 }
11064 break;
11065 case VM_MAP_COPY_ENTRY_LIST:
11066 /*
11067 * potential page-size rounding prevents us from exactly
11068 * validating this flavor of vm_map_copy, but we can at least
11069 * assert that it's within a range.
11070 */
11071 if (copy_sz >= sz &&
11072 copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
11073 *size = copy_sz;
11074 return TRUE;
11075 }
11076 break;
11077 default:
11078 break;
11079 }
11080 return FALSE;
11081 }
11082
11083 /*
11084 * Routine: vm_map_copyout_size
11085 *
11086 * Description:
11087 * Copy out a copy chain ("copy") into newly-allocated
11088 * space in the destination map. Uses a prevalidated
11089 * size for the copy object (vm_map_copy_validate_size).
11090 *
11091 * If successful, consumes the copy object.
11092 * Otherwise, the caller is responsible for it.
11093 */
11094 kern_return_t
vm_map_copyout_size(vm_map_t dst_map,vm_map_address_t * dst_addr,vm_map_copy_t copy,vm_map_size_t copy_size)11095 vm_map_copyout_size(
11096 vm_map_t dst_map,
11097 vm_map_address_t *dst_addr, /* OUT */
11098 vm_map_copy_t copy,
11099 vm_map_size_t copy_size)
11100 {
11101 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
11102 TRUE, /* consume_on_success */
11103 VM_PROT_DEFAULT,
11104 VM_PROT_ALL,
11105 VM_INHERIT_DEFAULT);
11106 }
11107
11108 /*
11109 * Routine: vm_map_copyout
11110 *
11111 * Description:
11112 * Copy out a copy chain ("copy") into newly-allocated
11113 * space in the destination map.
11114 *
11115 * If successful, consumes the copy object.
11116 * Otherwise, the caller is responsible for it.
11117 */
11118 kern_return_t
vm_map_copyout(vm_map_t dst_map,vm_map_address_t * dst_addr,vm_map_copy_t copy)11119 vm_map_copyout(
11120 vm_map_t dst_map,
11121 vm_map_address_t *dst_addr, /* OUT */
11122 vm_map_copy_t copy)
11123 {
11124 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
11125 TRUE, /* consume_on_success */
11126 VM_PROT_DEFAULT,
11127 VM_PROT_ALL,
11128 VM_INHERIT_DEFAULT);
11129 }
11130
11131 kern_return_t
vm_map_copyout_internal(vm_map_t dst_map,vm_map_address_t * dst_addr,vm_map_copy_t copy,vm_map_size_t copy_size,boolean_t consume_on_success,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)11132 vm_map_copyout_internal(
11133 vm_map_t dst_map,
11134 vm_map_address_t *dst_addr, /* OUT */
11135 vm_map_copy_t copy,
11136 vm_map_size_t copy_size,
11137 boolean_t consume_on_success,
11138 vm_prot_t cur_protection,
11139 vm_prot_t max_protection,
11140 vm_inherit_t inheritance)
11141 {
11142 vm_map_size_t size;
11143 vm_map_size_t adjustment;
11144 vm_map_offset_t start;
11145 vm_object_offset_t vm_copy_start;
11146 vm_map_entry_t last;
11147 vm_map_entry_t entry;
11148 vm_map_copy_t original_copy;
11149 kern_return_t kr;
11150 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
11151
11152 /*
11153 * Check for null copy object.
11154 */
11155
11156 if (copy == VM_MAP_COPY_NULL) {
11157 *dst_addr = 0;
11158 return KERN_SUCCESS;
11159 }
11160
11161 /*
11162 * Assert that the vm_map_copy is coming from the right
11163 * zone and hasn't been forged
11164 */
11165 vm_map_copy_require(copy);
11166
11167 if (copy->size != copy_size) {
11168 *dst_addr = 0;
11169 return KERN_FAILURE;
11170 }
11171
11172 /*
11173 * Check for special copy object, created
11174 * by vm_map_copyin_object.
11175 */
11176
11177 if (copy->type == VM_MAP_COPY_OBJECT) {
11178 vm_object_t object = copy->cpy_object;
11179 vm_object_offset_t offset;
11180
11181 offset = vm_object_trunc_page(copy->offset);
11182 size = vm_map_round_page((copy_size +
11183 (vm_map_size_t)(copy->offset -
11184 offset)),
11185 VM_MAP_PAGE_MASK(dst_map));
11186 *dst_addr = 0;
11187 kr = vm_map_enter(dst_map, dst_addr, size,
11188 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
11189 VM_MAP_KERNEL_FLAGS_NONE,
11190 VM_KERN_MEMORY_NONE,
11191 object, offset, FALSE,
11192 VM_PROT_DEFAULT, VM_PROT_ALL,
11193 VM_INHERIT_DEFAULT);
11194 if (kr != KERN_SUCCESS) {
11195 return kr;
11196 }
11197 /* Account for non-pagealigned copy object */
11198 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
11199 if (consume_on_success) {
11200 zfree_id(ZONE_ID_VM_MAP_COPY, copy);
11201 }
11202 return KERN_SUCCESS;
11203 }
11204
11205 /*
11206 * Check for special kernel buffer allocated
11207 * by new_ipc_kmsg_copyin.
11208 */
11209
11210 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
11211 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
11212 copy, copy_size, FALSE,
11213 consume_on_success);
11214 }
11215
11216 original_copy = copy;
11217 if (copy->cpy_hdr.page_shift != VM_MAP_PAGE_SHIFT(dst_map)) {
11218 vm_map_copy_t target_copy;
11219 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
11220
11221 target_copy = VM_MAP_COPY_NULL;
11222 DEBUG4K_ADJUST("adjusting...\n");
11223 kr = vm_map_copy_adjust_to_target(
11224 copy,
11225 0, /* offset */
11226 copy->size, /* size */
11227 dst_map,
11228 TRUE, /* copy */
11229 &target_copy,
11230 &overmap_start,
11231 &overmap_end,
11232 &trimmed_start);
11233 if (kr != KERN_SUCCESS) {
11234 DEBUG4K_COPY("adjust failed 0x%x\n", kr);
11235 return kr;
11236 }
11237 DEBUG4K_COPY("copy %p (%d 0x%llx 0x%llx) dst_map %p (%d) target_copy %p (%d 0x%llx 0x%llx) overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx\n", copy, copy->cpy_hdr.page_shift, copy->offset, (uint64_t)copy->size, dst_map, VM_MAP_PAGE_SHIFT(dst_map), target_copy, target_copy->cpy_hdr.page_shift, target_copy->offset, (uint64_t)target_copy->size, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start);
11238 if (target_copy != copy) {
11239 copy = target_copy;
11240 }
11241 copy_size = copy->size;
11242 }
11243
11244 /*
11245 * Find space for the data
11246 */
11247
11248 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
11249 VM_MAP_COPY_PAGE_MASK(copy));
11250 size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
11251 VM_MAP_COPY_PAGE_MASK(copy))
11252 - vm_copy_start;
11253
11254
11255 if (dst_map == kernel_map) {
11256 vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
11257 }
11258
11259 vm_map_lock(dst_map);
11260 kr = vm_map_locate_space(dst_map, size, 0, vmk_flags,
11261 &start, &last);
11262 if (kr != KERN_SUCCESS) {
11263 vm_map_unlock(dst_map);
11264 return kr;
11265 }
11266
11267 adjustment = start - vm_copy_start;
11268 if (!consume_on_success) {
11269 /*
11270 * We're not allowed to consume "copy", so we'll have to
11271 * copy its map entries into the destination map below.
11272 * No need to re-allocate map entries from the correct
11273 * (pageable or not) zone, since we'll get new map entries
11274 * during the transfer.
11275 * We'll also adjust the map entries's "start" and "end"
11276 * during the transfer, to keep "copy"'s entries consistent
11277 * with its "offset".
11278 */
11279 goto after_adjustments;
11280 }
11281
11282 /*
11283 * Since we're going to just drop the map
11284 * entries from the copy into the destination
11285 * map, they must come from the same pool.
11286 */
11287
11288 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
11289 /*
11290 * Mismatches occur when dealing with the default
11291 * pager.
11292 */
11293 vm_map_entry_t next, new;
11294
11295 /*
11296 * Find the zone that the copies were allocated from
11297 */
11298
11299 entry = vm_map_copy_first_entry(copy);
11300
11301 /*
11302 * Reinitialize the copy so that vm_map_copy_entry_link
11303 * will work.
11304 */
11305 vm_map_store_copy_reset(copy, entry);
11306 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
11307
11308 /*
11309 * Copy each entry.
11310 */
11311 while (entry != vm_map_copy_to_entry(copy)) {
11312 new = vm_map_copy_entry_create(copy);
11313 vm_map_entry_copy_full(new, entry);
11314 new->vme_no_copy_on_read = FALSE;
11315 assert(!new->iokit_acct);
11316 if (new->is_sub_map) {
11317 /* clr address space specifics */
11318 new->use_pmap = FALSE;
11319 }
11320 vm_map_copy_entry_link(copy,
11321 vm_map_copy_last_entry(copy),
11322 new);
11323 next = entry->vme_next;
11324 vm_map_entry_dispose(entry);
11325 entry = next;
11326 }
11327 }
11328
11329 /*
11330 * Adjust the addresses in the copy chain, and
11331 * reset the region attributes.
11332 */
11333
11334 for (entry = vm_map_copy_first_entry(copy);
11335 entry != vm_map_copy_to_entry(copy);
11336 entry = entry->vme_next) {
11337 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
11338 /*
11339 * We're injecting this copy entry into a map that
11340 * has the standard page alignment, so clear
11341 * "map_aligned" (which might have been inherited
11342 * from the original map entry).
11343 */
11344 entry->map_aligned = FALSE;
11345 }
11346
11347 entry->vme_start += adjustment;
11348 entry->vme_end += adjustment;
11349
11350 if (entry->map_aligned) {
11351 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
11352 VM_MAP_PAGE_MASK(dst_map)));
11353 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
11354 VM_MAP_PAGE_MASK(dst_map)));
11355 }
11356
11357 entry->inheritance = VM_INHERIT_DEFAULT;
11358 entry->protection = VM_PROT_DEFAULT;
11359 entry->max_protection = VM_PROT_ALL;
11360 entry->behavior = VM_BEHAVIOR_DEFAULT;
11361
11362 /*
11363 * If the entry is now wired,
11364 * map the pages into the destination map.
11365 */
11366 if (entry->wired_count != 0) {
11367 vm_map_offset_t va;
11368 vm_object_offset_t offset;
11369 vm_object_t object;
11370 vm_prot_t prot;
11371 int type_of_fault;
11372
11373 /* TODO4K would need to use actual page size */
11374 assert(VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT);
11375
11376 object = VME_OBJECT(entry);
11377 offset = VME_OFFSET(entry);
11378 va = entry->vme_start;
11379
11380 pmap_pageable(dst_map->pmap,
11381 entry->vme_start,
11382 entry->vme_end,
11383 TRUE);
11384
11385 while (va < entry->vme_end) {
11386 vm_page_t m;
11387 struct vm_object_fault_info fault_info = {};
11388
11389 /*
11390 * Look up the page in the object.
11391 * Assert that the page will be found in the
11392 * top object:
11393 * either
11394 * the object was newly created by
11395 * vm_object_copy_slowly, and has
11396 * copies of all of the pages from
11397 * the source object
11398 * or
11399 * the object was moved from the old
11400 * map entry; because the old map
11401 * entry was wired, all of the pages
11402 * were in the top-level object.
11403 * (XXX not true if we wire pages for
11404 * reading)
11405 */
11406 vm_object_lock(object);
11407
11408 m = vm_page_lookup(object, offset);
11409 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
11410 m->vmp_absent) {
11411 panic("vm_map_copyout: wiring %p", m);
11412 }
11413
11414 prot = entry->protection;
11415
11416 if (override_nx(dst_map, VME_ALIAS(entry)) &&
11417 prot) {
11418 prot |= VM_PROT_EXECUTE;
11419 }
11420
11421 type_of_fault = DBG_CACHE_HIT_FAULT;
11422
11423 fault_info.user_tag = VME_ALIAS(entry);
11424 fault_info.pmap_options = 0;
11425 if (entry->iokit_acct ||
11426 (!entry->is_sub_map && !entry->use_pmap)) {
11427 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
11428 }
11429
11430 vm_fault_enter(m,
11431 dst_map->pmap,
11432 va,
11433 PAGE_SIZE, 0,
11434 prot,
11435 prot,
11436 VM_PAGE_WIRED(m),
11437 FALSE, /* change_wiring */
11438 VM_KERN_MEMORY_NONE, /* tag - not wiring */
11439 &fault_info,
11440 NULL, /* need_retry */
11441 &type_of_fault);
11442
11443 vm_object_unlock(object);
11444
11445 offset += PAGE_SIZE_64;
11446 va += PAGE_SIZE;
11447 }
11448 }
11449 }
11450
11451 after_adjustments:
11452
11453 /*
11454 * Correct the page alignment for the result
11455 */
11456
11457 *dst_addr = start + (copy->offset - vm_copy_start);
11458
11459 #if KASAN
11460 kasan_notify_address(*dst_addr, size);
11461 #endif
11462
11463 /*
11464 * Update the hints and the map size
11465 */
11466
11467 if (consume_on_success) {
11468 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
11469 } else {
11470 SAVE_HINT_MAP_WRITE(dst_map, last);
11471 }
11472
11473 dst_map->size += size;
11474
11475 /*
11476 * Link in the copy
11477 */
11478
11479 if (consume_on_success) {
11480 vm_map_copy_insert(dst_map, last, copy);
11481 if (copy != original_copy) {
11482 vm_map_copy_discard(original_copy);
11483 original_copy = VM_MAP_COPY_NULL;
11484 }
11485 } else {
11486 vm_map_copy_remap(dst_map, last, copy, adjustment,
11487 cur_protection, max_protection,
11488 inheritance);
11489 if (copy != original_copy && original_copy != VM_MAP_COPY_NULL) {
11490 vm_map_copy_discard(copy);
11491 copy = original_copy;
11492 }
11493 }
11494
11495
11496 vm_map_unlock(dst_map);
11497
11498 /*
11499 * XXX If wiring_required, call vm_map_pageable
11500 */
11501
11502 return KERN_SUCCESS;
11503 }
11504
11505 /*
11506 * Routine: vm_map_copyin
11507 *
11508 * Description:
11509 * see vm_map_copyin_common. Exported via Unsupported.exports.
11510 *
11511 */
11512
11513 #undef vm_map_copyin
11514
11515 kern_return_t
vm_map_copyin(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,boolean_t src_destroy,vm_map_copy_t * copy_result)11516 vm_map_copyin(
11517 vm_map_t src_map,
11518 vm_map_address_t src_addr,
11519 vm_map_size_t len,
11520 boolean_t src_destroy,
11521 vm_map_copy_t *copy_result) /* OUT */
11522 {
11523 return vm_map_copyin_common(src_map, src_addr, len, src_destroy,
11524 FALSE, copy_result, FALSE);
11525 }
11526
11527 /*
11528 * Routine: vm_map_copyin_common
11529 *
11530 * Description:
11531 * Copy the specified region (src_addr, len) from the
11532 * source address space (src_map), possibly removing
11533 * the region from the source address space (src_destroy).
11534 *
11535 * Returns:
11536 * A vm_map_copy_t object (copy_result), suitable for
11537 * insertion into another address space (using vm_map_copyout),
11538 * copying over another address space region (using
11539 * vm_map_copy_overwrite). If the copy is unused, it
11540 * should be destroyed (using vm_map_copy_discard).
11541 *
11542 * In/out conditions:
11543 * The source map should not be locked on entry.
11544 */
11545
11546 typedef struct submap_map {
11547 vm_map_t parent_map;
11548 vm_map_offset_t base_start;
11549 vm_map_offset_t base_end;
11550 vm_map_size_t base_len;
11551 struct submap_map *next;
11552 } submap_map_t;
11553
11554 kern_return_t
vm_map_copyin_common(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,boolean_t src_destroy,__unused boolean_t src_volatile,vm_map_copy_t * copy_result,boolean_t use_maxprot)11555 vm_map_copyin_common(
11556 vm_map_t src_map,
11557 vm_map_address_t src_addr,
11558 vm_map_size_t len,
11559 boolean_t src_destroy,
11560 __unused boolean_t src_volatile,
11561 vm_map_copy_t *copy_result, /* OUT */
11562 boolean_t use_maxprot)
11563 {
11564 int flags;
11565
11566 flags = 0;
11567 if (src_destroy) {
11568 flags |= VM_MAP_COPYIN_SRC_DESTROY;
11569 }
11570 if (use_maxprot) {
11571 flags |= VM_MAP_COPYIN_USE_MAXPROT;
11572 }
11573 return vm_map_copyin_internal(src_map,
11574 src_addr,
11575 len,
11576 flags,
11577 copy_result);
11578 }
11579 kern_return_t
vm_map_copyin_internal(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,int flags,vm_map_copy_t * copy_result)11580 vm_map_copyin_internal(
11581 vm_map_t src_map,
11582 vm_map_address_t src_addr,
11583 vm_map_size_t len,
11584 int flags,
11585 vm_map_copy_t *copy_result) /* OUT */
11586 {
11587 vm_map_entry_t tmp_entry; /* Result of last map lookup --
11588 * in multi-level lookup, this
11589 * entry contains the actual
11590 * vm_object/offset.
11591 */
11592 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
11593
11594 vm_map_offset_t src_start; /* Start of current entry --
11595 * where copy is taking place now
11596 */
11597 vm_map_offset_t src_end; /* End of entire region to be
11598 * copied */
11599 vm_map_offset_t src_base;
11600 vm_map_t base_map = src_map;
11601 boolean_t map_share = FALSE;
11602 submap_map_t *parent_maps = NULL;
11603
11604 vm_map_copy_t copy; /* Resulting copy */
11605 vm_map_address_t copy_addr;
11606 vm_map_size_t copy_size;
11607 boolean_t src_destroy;
11608 boolean_t use_maxprot;
11609 boolean_t preserve_purgeable;
11610 boolean_t entry_was_shared;
11611 vm_map_entry_t saved_src_entry;
11612
11613 if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
11614 return KERN_INVALID_ARGUMENT;
11615 }
11616
11617 #if CONFIG_KERNEL_TBI
11618 if (src_map->pmap == kernel_pmap) {
11619 src_addr = VM_KERNEL_TBI_FILL(src_addr);
11620 }
11621 #endif /* CONFIG_KERNEL_TBI && KASAN_TBI */
11622
11623 src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
11624 use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
11625 preserve_purgeable =
11626 (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
11627
11628 /*
11629 * Check for copies of zero bytes.
11630 */
11631
11632 if (len == 0) {
11633 *copy_result = VM_MAP_COPY_NULL;
11634 return KERN_SUCCESS;
11635 }
11636
11637 /*
11638 * Check that the end address doesn't overflow
11639 */
11640 src_end = src_addr + len;
11641 if (src_end < src_addr) {
11642 return KERN_INVALID_ADDRESS;
11643 }
11644
11645 /*
11646 * Compute (page aligned) start and end of region
11647 */
11648 src_start = vm_map_trunc_page(src_addr,
11649 VM_MAP_PAGE_MASK(src_map));
11650 src_end = vm_map_round_page(src_end,
11651 VM_MAP_PAGE_MASK(src_map));
11652
11653 /*
11654 * If the copy is sufficiently small, use a kernel buffer instead
11655 * of making a virtual copy. The theory being that the cost of
11656 * setting up VM (and taking C-O-W faults) dominates the copy costs
11657 * for small regions.
11658 */
11659 if ((len <= msg_ool_size_small) &&
11660 !use_maxprot &&
11661 !preserve_purgeable &&
11662 !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
11663 /*
11664 * Since the "msg_ool_size_small" threshold was increased and
11665 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
11666 * address space limits, we revert to doing a virtual copy if the
11667 * copied range goes beyond those limits. Otherwise, mach_vm_read()
11668 * of the commpage would now fail when it used to work.
11669 */
11670 (src_start >= vm_map_min(src_map) &&
11671 src_start < vm_map_max(src_map) &&
11672 src_end >= vm_map_min(src_map) &&
11673 src_end < vm_map_max(src_map))) {
11674 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
11675 src_destroy, copy_result);
11676 }
11677
11678 /*
11679 * Allocate a header element for the list.
11680 *
11681 * Use the start and end in the header to
11682 * remember the endpoints prior to rounding.
11683 */
11684
11685 copy = vm_map_copy_allocate();
11686 copy->type = VM_MAP_COPY_ENTRY_LIST;
11687 copy->cpy_hdr.entries_pageable = TRUE;
11688 copy->cpy_hdr.page_shift = (uint16_t)VM_MAP_PAGE_SHIFT(src_map);
11689
11690 vm_map_store_init( &(copy->cpy_hdr));
11691
11692 copy->offset = src_addr;
11693 copy->size = len;
11694
11695 new_entry = vm_map_copy_entry_create(copy);
11696
11697 #define RETURN(x) \
11698 MACRO_BEGIN \
11699 vm_map_unlock(src_map); \
11700 if(src_map != base_map) \
11701 vm_map_deallocate(src_map); \
11702 if (new_entry != VM_MAP_ENTRY_NULL) \
11703 vm_map_copy_entry_dispose(new_entry); \
11704 vm_map_copy_discard(copy); \
11705 { \
11706 submap_map_t *_ptr; \
11707 \
11708 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11709 parent_maps=parent_maps->next; \
11710 if (_ptr->parent_map != base_map) \
11711 vm_map_deallocate(_ptr->parent_map); \
11712 kfree_type(submap_map_t, _ptr); \
11713 } \
11714 } \
11715 MACRO_RETURN(x); \
11716 MACRO_END
11717
11718 /*
11719 * Find the beginning of the region.
11720 */
11721
11722 vm_map_lock(src_map);
11723
11724 /*
11725 * Lookup the original "src_addr" rather than the truncated
11726 * "src_start", in case "src_start" falls in a non-map-aligned
11727 * map entry *before* the map entry that contains "src_addr"...
11728 */
11729 if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry)) {
11730 RETURN(KERN_INVALID_ADDRESS);
11731 }
11732 if (!tmp_entry->is_sub_map) {
11733 /*
11734 * ... but clip to the map-rounded "src_start" rather than
11735 * "src_addr" to preserve map-alignment. We'll adjust the
11736 * first copy entry at the end, if needed.
11737 */
11738 vm_map_clip_start(src_map, tmp_entry, src_start);
11739 }
11740 if (src_start < tmp_entry->vme_start) {
11741 /*
11742 * Move "src_start" up to the start of the
11743 * first map entry to copy.
11744 */
11745 src_start = tmp_entry->vme_start;
11746 }
11747 /* set for later submap fix-up */
11748 copy_addr = src_start;
11749
11750 /*
11751 * Go through entries until we get to the end.
11752 */
11753
11754 while (TRUE) {
11755 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
11756 vm_map_size_t src_size; /* Size of source
11757 * map entry (in both
11758 * maps)
11759 */
11760
11761 vm_object_t src_object; /* Object to copy */
11762 vm_object_offset_t src_offset;
11763
11764 vm_object_t new_copy_object;/* vm_object_copy_* result */
11765
11766 boolean_t src_needs_copy; /* Should source map
11767 * be made read-only
11768 * for copy-on-write?
11769 */
11770
11771 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
11772
11773 boolean_t was_wired; /* Was source wired? */
11774 boolean_t saved_used_for_jit; /* Saved used_for_jit. */
11775 vm_map_version_t version; /* Version before locks
11776 * dropped to make copy
11777 */
11778 kern_return_t result; /* Return value from
11779 * copy_strategically.
11780 */
11781 while (tmp_entry->is_sub_map) {
11782 vm_map_size_t submap_len;
11783 submap_map_t *ptr;
11784
11785 ptr = kalloc_type(submap_map_t, Z_WAITOK);
11786 ptr->next = parent_maps;
11787 parent_maps = ptr;
11788 ptr->parent_map = src_map;
11789 ptr->base_start = src_start;
11790 ptr->base_end = src_end;
11791 submap_len = tmp_entry->vme_end - src_start;
11792 if (submap_len > (src_end - src_start)) {
11793 submap_len = src_end - src_start;
11794 }
11795 ptr->base_len = submap_len;
11796
11797 src_start -= tmp_entry->vme_start;
11798 src_start += VME_OFFSET(tmp_entry);
11799 src_end = src_start + submap_len;
11800 src_map = VME_SUBMAP(tmp_entry);
11801 vm_map_lock(src_map);
11802 /* keep an outstanding reference for all maps in */
11803 /* the parents tree except the base map */
11804 vm_map_reference(src_map);
11805 vm_map_unlock(ptr->parent_map);
11806 if (!vm_map_lookup_entry(
11807 src_map, src_start, &tmp_entry)) {
11808 RETURN(KERN_INVALID_ADDRESS);
11809 }
11810 map_share = TRUE;
11811 if (!tmp_entry->is_sub_map) {
11812 vm_map_clip_start(src_map, tmp_entry, src_start);
11813 }
11814 src_entry = tmp_entry;
11815 }
11816 /* we are now in the lowest level submap... */
11817
11818 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
11819 (VME_OBJECT(tmp_entry)->phys_contiguous)) {
11820 /* This is not, supported for now.In future */
11821 /* we will need to detect the phys_contig */
11822 /* condition and then upgrade copy_slowly */
11823 /* to do physical copy from the device mem */
11824 /* based object. We can piggy-back off of */
11825 /* the was wired boolean to set-up the */
11826 /* proper handling */
11827 RETURN(KERN_PROTECTION_FAILURE);
11828 }
11829 /*
11830 * Create a new address map entry to hold the result.
11831 * Fill in the fields from the appropriate source entries.
11832 * We must unlock the source map to do this if we need
11833 * to allocate a map entry.
11834 */
11835 if (new_entry == VM_MAP_ENTRY_NULL) {
11836 version.main_timestamp = src_map->timestamp;
11837 vm_map_unlock(src_map);
11838
11839 new_entry = vm_map_copy_entry_create(copy);
11840
11841 vm_map_lock(src_map);
11842 if ((version.main_timestamp + 1) != src_map->timestamp) {
11843 if (!vm_map_lookup_entry(src_map, src_start,
11844 &tmp_entry)) {
11845 RETURN(KERN_INVALID_ADDRESS);
11846 }
11847 if (!tmp_entry->is_sub_map) {
11848 vm_map_clip_start(src_map, tmp_entry, src_start);
11849 }
11850 continue; /* restart w/ new tmp_entry */
11851 }
11852 }
11853
11854 /*
11855 * Verify that the region can be read.
11856 */
11857 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
11858 !use_maxprot) ||
11859 (src_entry->max_protection & VM_PROT_READ) == 0) {
11860 RETURN(KERN_PROTECTION_FAILURE);
11861 }
11862
11863 /*
11864 * Clip against the endpoints of the entire region.
11865 */
11866
11867 vm_map_clip_end(src_map, src_entry, src_end);
11868
11869 src_size = src_entry->vme_end - src_start;
11870 src_object = VME_OBJECT(src_entry);
11871 src_offset = VME_OFFSET(src_entry);
11872 was_wired = (src_entry->wired_count != 0);
11873
11874 vm_map_entry_copy(src_map, new_entry, src_entry);
11875 if (new_entry->is_sub_map) {
11876 /* clr address space specifics */
11877 new_entry->use_pmap = FALSE;
11878 } else {
11879 /*
11880 * We're dealing with a copy-on-write operation,
11881 * so the resulting mapping should not inherit the
11882 * original mapping's accounting settings.
11883 * "iokit_acct" should have been cleared in
11884 * vm_map_entry_copy().
11885 * "use_pmap" should be reset to its default (TRUE)
11886 * so that the new mapping gets accounted for in
11887 * the task's memory footprint.
11888 */
11889 assert(!new_entry->iokit_acct);
11890 new_entry->use_pmap = TRUE;
11891 }
11892
11893 /*
11894 * Attempt non-blocking copy-on-write optimizations.
11895 */
11896
11897 /*
11898 * If we are destroying the source, and the object
11899 * is internal, we could move the object reference
11900 * from the source to the copy. The copy is
11901 * copy-on-write only if the source is.
11902 * We make another reference to the object, because
11903 * destroying the source entry will deallocate it.
11904 *
11905 * This memory transfer has to be atomic, (to prevent
11906 * the VM object from being shared or copied while
11907 * it's being moved here), so we could only do this
11908 * if we won't have to unlock the VM map until the
11909 * original mapping has been fully removed.
11910 */
11911
11912 RestartCopy:
11913 if ((src_object == VM_OBJECT_NULL ||
11914 (!was_wired && !map_share && !tmp_entry->is_shared
11915 && !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT))) &&
11916 vm_object_copy_quickly(
11917 VME_OBJECT(new_entry),
11918 src_offset,
11919 src_size,
11920 &src_needs_copy,
11921 &new_entry_needs_copy)) {
11922 new_entry->needs_copy = new_entry_needs_copy;
11923
11924 /*
11925 * Handle copy-on-write obligations
11926 */
11927
11928 if (src_needs_copy && !tmp_entry->needs_copy) {
11929 vm_prot_t prot;
11930
11931 prot = src_entry->protection & ~VM_PROT_WRITE;
11932
11933 if (override_nx(src_map, VME_ALIAS(src_entry))
11934 && prot) {
11935 prot |= VM_PROT_EXECUTE;
11936 }
11937
11938 vm_object_pmap_protect(
11939 src_object,
11940 src_offset,
11941 src_size,
11942 (src_entry->is_shared ?
11943 PMAP_NULL
11944 : src_map->pmap),
11945 VM_MAP_PAGE_SIZE(src_map),
11946 src_entry->vme_start,
11947 prot);
11948
11949 assert(tmp_entry->wired_count == 0);
11950 tmp_entry->needs_copy = TRUE;
11951 }
11952
11953 /*
11954 * The map has never been unlocked, so it's safe
11955 * to move to the next entry rather than doing
11956 * another lookup.
11957 */
11958
11959 goto CopySuccessful;
11960 }
11961
11962 entry_was_shared = tmp_entry->is_shared;
11963
11964 /*
11965 * Take an object reference, so that we may
11966 * release the map lock(s).
11967 */
11968
11969 assert(src_object != VM_OBJECT_NULL);
11970 vm_object_reference(src_object);
11971
11972 /*
11973 * Record the timestamp for later verification.
11974 * Unlock the map.
11975 */
11976
11977 version.main_timestamp = src_map->timestamp;
11978 vm_map_unlock(src_map); /* Increments timestamp once! */
11979 saved_src_entry = src_entry;
11980 tmp_entry = VM_MAP_ENTRY_NULL;
11981 src_entry = VM_MAP_ENTRY_NULL;
11982
11983 /*
11984 * Perform the copy
11985 */
11986
11987 if (was_wired ||
11988 (debug4k_no_cow_copyin &&
11989 VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT)) {
11990 CopySlowly:
11991 vm_object_lock(src_object);
11992 result = vm_object_copy_slowly(
11993 src_object,
11994 src_offset,
11995 src_size,
11996 THREAD_UNINT,
11997 &new_copy_object);
11998 /* VME_OBJECT_SET will reset used_for_jit, so preserve it. */
11999 saved_used_for_jit = new_entry->used_for_jit;
12000 VME_OBJECT_SET(new_entry, new_copy_object, false, 0);
12001 new_entry->used_for_jit = saved_used_for_jit;
12002 VME_OFFSET_SET(new_entry,
12003 src_offset - vm_object_trunc_page(src_offset));
12004 new_entry->needs_copy = FALSE;
12005 } else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
12006 (entry_was_shared || map_share)) {
12007 vm_object_t new_object;
12008
12009 vm_object_lock_shared(src_object);
12010 new_object = vm_object_copy_delayed(
12011 src_object,
12012 src_offset,
12013 src_size,
12014 TRUE);
12015 if (new_object == VM_OBJECT_NULL) {
12016 goto CopySlowly;
12017 }
12018
12019 VME_OBJECT_SET(new_entry, new_object, false, 0);
12020 assert(new_entry->wired_count == 0);
12021 new_entry->needs_copy = TRUE;
12022 assert(!new_entry->iokit_acct);
12023 assert(new_object->purgable == VM_PURGABLE_DENY);
12024 assertf(new_entry->use_pmap, "src_map %p new_entry %p\n", src_map, new_entry);
12025 result = KERN_SUCCESS;
12026 } else {
12027 vm_object_offset_t new_offset;
12028 new_offset = VME_OFFSET(new_entry);
12029 result = vm_object_copy_strategically(src_object,
12030 src_offset,
12031 src_size,
12032 &new_copy_object,
12033 &new_offset,
12034 &new_entry_needs_copy);
12035 /* VME_OBJECT_SET will reset used_for_jit, so preserve it. */
12036 saved_used_for_jit = new_entry->used_for_jit;
12037 VME_OBJECT_SET(new_entry, new_copy_object, false, 0);
12038 new_entry->used_for_jit = saved_used_for_jit;
12039 if (new_offset != VME_OFFSET(new_entry)) {
12040 VME_OFFSET_SET(new_entry, new_offset);
12041 }
12042
12043 new_entry->needs_copy = new_entry_needs_copy;
12044 }
12045
12046 if (result == KERN_SUCCESS &&
12047 ((preserve_purgeable &&
12048 src_object->purgable != VM_PURGABLE_DENY) ||
12049 new_entry->used_for_jit)) {
12050 /*
12051 * Purgeable objects should be COPY_NONE, true share;
12052 * this should be propogated to the copy.
12053 *
12054 * Also force mappings the pmap specially protects to
12055 * be COPY_NONE; trying to COW these mappings would
12056 * change the effective protections, which could have
12057 * side effects if the pmap layer relies on the
12058 * specified protections.
12059 */
12060
12061 vm_object_t new_object;
12062
12063 new_object = VME_OBJECT(new_entry);
12064 assert(new_object != src_object);
12065 vm_object_lock(new_object);
12066 assert(new_object->ref_count == 1);
12067 assert(new_object->shadow == VM_OBJECT_NULL);
12068 assert(new_object->copy == VM_OBJECT_NULL);
12069 assert(new_object->vo_owner == NULL);
12070
12071 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
12072
12073 if (preserve_purgeable &&
12074 src_object->purgable != VM_PURGABLE_DENY) {
12075 new_object->true_share = TRUE;
12076
12077 /* start as non-volatile with no owner... */
12078 new_object->purgable = VM_PURGABLE_NONVOLATILE;
12079 vm_purgeable_nonvolatile_enqueue(new_object, NULL);
12080 /* ... and move to src_object's purgeable state */
12081 if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
12082 int state;
12083 state = src_object->purgable;
12084 vm_object_purgable_control(
12085 new_object,
12086 VM_PURGABLE_SET_STATE_FROM_KERNEL,
12087 &state);
12088 }
12089 /* no pmap accounting for purgeable objects */
12090 new_entry->use_pmap = FALSE;
12091 }
12092
12093 vm_object_unlock(new_object);
12094 new_object = VM_OBJECT_NULL;
12095 }
12096
12097 if (result != KERN_SUCCESS &&
12098 result != KERN_MEMORY_RESTART_COPY) {
12099 vm_map_lock(src_map);
12100 RETURN(result);
12101 }
12102
12103 /*
12104 * Throw away the extra reference
12105 */
12106
12107 vm_object_deallocate(src_object);
12108
12109 /*
12110 * Verify that the map has not substantially
12111 * changed while the copy was being made.
12112 */
12113
12114 vm_map_lock(src_map);
12115
12116 if ((version.main_timestamp + 1) == src_map->timestamp) {
12117 /* src_map hasn't changed: src_entry is still valid */
12118 src_entry = saved_src_entry;
12119 goto VerificationSuccessful;
12120 }
12121
12122 /*
12123 * Simple version comparison failed.
12124 *
12125 * Retry the lookup and verify that the
12126 * same object/offset are still present.
12127 *
12128 * [Note: a memory manager that colludes with
12129 * the calling task can detect that we have
12130 * cheated. While the map was unlocked, the
12131 * mapping could have been changed and restored.]
12132 */
12133
12134 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
12135 if (result != KERN_MEMORY_RESTART_COPY) {
12136 vm_object_deallocate(VME_OBJECT(new_entry));
12137 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL, false, 0);
12138 /* reset accounting state */
12139 new_entry->iokit_acct = FALSE;
12140 new_entry->use_pmap = TRUE;
12141 }
12142 RETURN(KERN_INVALID_ADDRESS);
12143 }
12144
12145 src_entry = tmp_entry;
12146 vm_map_clip_start(src_map, src_entry, src_start);
12147
12148 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
12149 !use_maxprot) ||
12150 ((src_entry->max_protection & VM_PROT_READ) == 0)) {
12151 goto VerificationFailed;
12152 }
12153
12154 if (src_entry->vme_end < new_entry->vme_end) {
12155 /*
12156 * This entry might have been shortened
12157 * (vm_map_clip_end) or been replaced with
12158 * an entry that ends closer to "src_start"
12159 * than before.
12160 * Adjust "new_entry" accordingly; copying
12161 * less memory would be correct but we also
12162 * redo the copy (see below) if the new entry
12163 * no longer points at the same object/offset.
12164 */
12165 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
12166 VM_MAP_COPY_PAGE_MASK(copy)));
12167 new_entry->vme_end = src_entry->vme_end;
12168 src_size = new_entry->vme_end - src_start;
12169 } else if (src_entry->vme_end > new_entry->vme_end) {
12170 /*
12171 * This entry might have been extended
12172 * (vm_map_entry_simplify() or coalesce)
12173 * or been replaced with an entry that ends farther
12174 * from "src_start" than before.
12175 *
12176 * We've called vm_object_copy_*() only on
12177 * the previous <start:end> range, so we can't
12178 * just extend new_entry. We have to re-do
12179 * the copy based on the new entry as if it was
12180 * pointing at a different object/offset (see
12181 * "Verification failed" below).
12182 */
12183 }
12184
12185 if ((VME_OBJECT(src_entry) != src_object) ||
12186 (VME_OFFSET(src_entry) != src_offset) ||
12187 (src_entry->vme_end > new_entry->vme_end)) {
12188 /*
12189 * Verification failed.
12190 *
12191 * Start over with this top-level entry.
12192 */
12193
12194 VerificationFailed: ;
12195
12196 vm_object_deallocate(VME_OBJECT(new_entry));
12197 tmp_entry = src_entry;
12198 continue;
12199 }
12200
12201 /*
12202 * Verification succeeded.
12203 */
12204
12205 VerificationSuccessful:;
12206
12207 if (result == KERN_MEMORY_RESTART_COPY) {
12208 goto RestartCopy;
12209 }
12210
12211 /*
12212 * Copy succeeded.
12213 */
12214
12215 CopySuccessful: ;
12216
12217 /*
12218 * Link in the new copy entry.
12219 */
12220
12221 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
12222 new_entry);
12223
12224 /*
12225 * Determine whether the entire region
12226 * has been copied.
12227 */
12228 src_base = src_start;
12229 src_start = new_entry->vme_end;
12230 new_entry = VM_MAP_ENTRY_NULL;
12231 while ((src_start >= src_end) && (src_end != 0)) {
12232 submap_map_t *ptr;
12233
12234 if (src_map == base_map) {
12235 /* back to the top */
12236 break;
12237 }
12238
12239 ptr = parent_maps;
12240 assert(ptr != NULL);
12241 parent_maps = parent_maps->next;
12242
12243 /* fix up the damage we did in that submap */
12244 vm_map_simplify_range(src_map,
12245 src_base,
12246 src_end);
12247
12248 vm_map_unlock(src_map);
12249 vm_map_deallocate(src_map);
12250 vm_map_lock(ptr->parent_map);
12251 src_map = ptr->parent_map;
12252 src_base = ptr->base_start;
12253 src_start = ptr->base_start + ptr->base_len;
12254 src_end = ptr->base_end;
12255 if (!vm_map_lookup_entry(src_map,
12256 src_start,
12257 &tmp_entry) &&
12258 (src_end > src_start)) {
12259 RETURN(KERN_INVALID_ADDRESS);
12260 }
12261 kfree_type(submap_map_t, ptr);
12262 if (parent_maps == NULL) {
12263 map_share = FALSE;
12264 }
12265 src_entry = tmp_entry->vme_prev;
12266 }
12267
12268 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
12269 (src_start >= src_addr + len) &&
12270 (src_addr + len != 0)) {
12271 /*
12272 * Stop copying now, even though we haven't reached
12273 * "src_end". We'll adjust the end of the last copy
12274 * entry at the end, if needed.
12275 *
12276 * If src_map's aligment is different from the
12277 * system's page-alignment, there could be
12278 * extra non-map-aligned map entries between
12279 * the original (non-rounded) "src_addr + len"
12280 * and the rounded "src_end".
12281 * We do not want to copy those map entries since
12282 * they're not part of the copied range.
12283 */
12284 break;
12285 }
12286
12287 if ((src_start >= src_end) && (src_end != 0)) {
12288 break;
12289 }
12290
12291 /*
12292 * Verify that there are no gaps in the region
12293 */
12294
12295 tmp_entry = src_entry->vme_next;
12296 if ((tmp_entry->vme_start != src_start) ||
12297 (tmp_entry == vm_map_to_entry(src_map))) {
12298 RETURN(KERN_INVALID_ADDRESS);
12299 }
12300 }
12301
12302 /*
12303 * If the source should be destroyed, do it now, since the
12304 * copy was successful.
12305 */
12306 if (src_destroy) {
12307 vmr_flags_t remove_flags = VM_MAP_REMOVE_NO_FLAGS;
12308
12309 if (src_map == kernel_map) {
12310 remove_flags |= VM_MAP_REMOVE_KUNWIRE;
12311 }
12312 (void)vm_map_remove_and_unlock(src_map,
12313 vm_map_trunc_page(src_addr, VM_MAP_PAGE_MASK(src_map)),
12314 src_end,
12315 remove_flags,
12316 KMEM_GUARD_NONE);
12317 } else {
12318 /* fix up the damage we did in the base map */
12319 vm_map_simplify_range(
12320 src_map,
12321 vm_map_trunc_page(src_addr,
12322 VM_MAP_PAGE_MASK(src_map)),
12323 vm_map_round_page(src_end,
12324 VM_MAP_PAGE_MASK(src_map)));
12325 vm_map_unlock(src_map);
12326 }
12327
12328 tmp_entry = VM_MAP_ENTRY_NULL;
12329
12330 if (VM_MAP_PAGE_SHIFT(src_map) > PAGE_SHIFT &&
12331 VM_MAP_PAGE_SHIFT(src_map) != VM_MAP_COPY_PAGE_SHIFT(copy)) {
12332 vm_map_offset_t original_start, original_offset, original_end;
12333
12334 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
12335
12336 /* adjust alignment of first copy_entry's "vme_start" */
12337 tmp_entry = vm_map_copy_first_entry(copy);
12338 if (tmp_entry != vm_map_copy_to_entry(copy)) {
12339 vm_map_offset_t adjustment;
12340
12341 original_start = tmp_entry->vme_start;
12342 original_offset = VME_OFFSET(tmp_entry);
12343
12344 /* map-align the start of the first copy entry... */
12345 adjustment = (tmp_entry->vme_start -
12346 vm_map_trunc_page(
12347 tmp_entry->vme_start,
12348 VM_MAP_PAGE_MASK(src_map)));
12349 tmp_entry->vme_start -= adjustment;
12350 VME_OFFSET_SET(tmp_entry,
12351 VME_OFFSET(tmp_entry) - adjustment);
12352 copy_addr -= adjustment;
12353 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12354 /* ... adjust for mis-aligned start of copy range */
12355 adjustment =
12356 (vm_map_trunc_page(copy->offset,
12357 PAGE_MASK) -
12358 vm_map_trunc_page(copy->offset,
12359 VM_MAP_PAGE_MASK(src_map)));
12360 if (adjustment) {
12361 assert(page_aligned(adjustment));
12362 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
12363 tmp_entry->vme_start += adjustment;
12364 VME_OFFSET_SET(tmp_entry,
12365 (VME_OFFSET(tmp_entry) +
12366 adjustment));
12367 copy_addr += adjustment;
12368 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12369 }
12370
12371 /*
12372 * Assert that the adjustments haven't exposed
12373 * more than was originally copied...
12374 */
12375 assert(tmp_entry->vme_start >= original_start);
12376 assert(VME_OFFSET(tmp_entry) >= original_offset);
12377 /*
12378 * ... and that it did not adjust outside of a
12379 * a single 16K page.
12380 */
12381 assert(vm_map_trunc_page(tmp_entry->vme_start,
12382 VM_MAP_PAGE_MASK(src_map)) ==
12383 vm_map_trunc_page(original_start,
12384 VM_MAP_PAGE_MASK(src_map)));
12385 }
12386
12387 /* adjust alignment of last copy_entry's "vme_end" */
12388 tmp_entry = vm_map_copy_last_entry(copy);
12389 if (tmp_entry != vm_map_copy_to_entry(copy)) {
12390 vm_map_offset_t adjustment;
12391
12392 original_end = tmp_entry->vme_end;
12393
12394 /* map-align the end of the last copy entry... */
12395 tmp_entry->vme_end =
12396 vm_map_round_page(tmp_entry->vme_end,
12397 VM_MAP_PAGE_MASK(src_map));
12398 /* ... adjust for mis-aligned end of copy range */
12399 adjustment =
12400 (vm_map_round_page((copy->offset +
12401 copy->size),
12402 VM_MAP_PAGE_MASK(src_map)) -
12403 vm_map_round_page((copy->offset +
12404 copy->size),
12405 PAGE_MASK));
12406 if (adjustment) {
12407 assert(page_aligned(adjustment));
12408 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
12409 tmp_entry->vme_end -= adjustment;
12410 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12411 }
12412
12413 /*
12414 * Assert that the adjustments haven't exposed
12415 * more than was originally copied...
12416 */
12417 assert(tmp_entry->vme_end <= original_end);
12418 /*
12419 * ... and that it did not adjust outside of a
12420 * a single 16K page.
12421 */
12422 assert(vm_map_round_page(tmp_entry->vme_end,
12423 VM_MAP_PAGE_MASK(src_map)) ==
12424 vm_map_round_page(original_end,
12425 VM_MAP_PAGE_MASK(src_map)));
12426 }
12427 }
12428
12429 /* Fix-up start and end points in copy. This is necessary */
12430 /* when the various entries in the copy object were picked */
12431 /* up from different sub-maps */
12432
12433 tmp_entry = vm_map_copy_first_entry(copy);
12434 copy_size = 0; /* compute actual size */
12435 while (tmp_entry != vm_map_copy_to_entry(copy)) {
12436 assert(VM_MAP_PAGE_ALIGNED(
12437 copy_addr + (tmp_entry->vme_end -
12438 tmp_entry->vme_start),
12439 MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
12440 assert(VM_MAP_PAGE_ALIGNED(
12441 copy_addr,
12442 MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
12443
12444 /*
12445 * The copy_entries will be injected directly into the
12446 * destination map and might not be "map aligned" there...
12447 */
12448 tmp_entry->map_aligned = FALSE;
12449
12450 tmp_entry->vme_end = copy_addr +
12451 (tmp_entry->vme_end - tmp_entry->vme_start);
12452 tmp_entry->vme_start = copy_addr;
12453 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12454 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
12455 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
12456 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
12457 }
12458
12459 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
12460 copy_size < copy->size) {
12461 /*
12462 * The actual size of the VM map copy is smaller than what
12463 * was requested by the caller. This must be because some
12464 * PAGE_SIZE-sized pages are missing at the end of the last
12465 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
12466 * The caller might not have been aware of those missing
12467 * pages and might not want to be aware of it, which is
12468 * fine as long as they don't try to access (and crash on)
12469 * those missing pages.
12470 * Let's adjust the size of the "copy", to avoid failing
12471 * in vm_map_copyout() or vm_map_copy_overwrite().
12472 */
12473 assert(vm_map_round_page(copy_size,
12474 VM_MAP_PAGE_MASK(src_map)) ==
12475 vm_map_round_page(copy->size,
12476 VM_MAP_PAGE_MASK(src_map)));
12477 copy->size = copy_size;
12478 }
12479
12480 *copy_result = copy;
12481 return KERN_SUCCESS;
12482
12483 #undef RETURN
12484 }
12485
12486 kern_return_t
vm_map_copy_extract(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,boolean_t do_copy,vm_map_copy_t * copy_result,vm_prot_t * cur_prot,vm_prot_t * max_prot,vm_inherit_t inheritance,vm_map_kernel_flags_t vmk_flags)12487 vm_map_copy_extract(
12488 vm_map_t src_map,
12489 vm_map_address_t src_addr,
12490 vm_map_size_t len,
12491 boolean_t do_copy,
12492 vm_map_copy_t *copy_result, /* OUT */
12493 vm_prot_t *cur_prot, /* IN/OUT */
12494 vm_prot_t *max_prot, /* IN/OUT */
12495 vm_inherit_t inheritance,
12496 vm_map_kernel_flags_t vmk_flags)
12497 {
12498 vm_map_copy_t copy;
12499 kern_return_t kr;
12500 vm_prot_t required_cur_prot, required_max_prot;
12501
12502 /*
12503 * Check for copies of zero bytes.
12504 */
12505
12506 if (len == 0) {
12507 *copy_result = VM_MAP_COPY_NULL;
12508 return KERN_SUCCESS;
12509 }
12510
12511 /*
12512 * Check that the end address doesn't overflow
12513 */
12514 if (src_addr + len < src_addr) {
12515 return KERN_INVALID_ADDRESS;
12516 }
12517
12518 if (VM_MAP_PAGE_SIZE(src_map) < PAGE_SIZE) {
12519 DEBUG4K_SHARE("src_map %p src_addr 0x%llx src_end 0x%llx\n", src_map, (uint64_t)src_addr, (uint64_t)(src_addr + len));
12520 }
12521
12522 required_cur_prot = *cur_prot;
12523 required_max_prot = *max_prot;
12524
12525 /*
12526 * Allocate a header element for the list.
12527 *
12528 * Use the start and end in the header to
12529 * remember the endpoints prior to rounding.
12530 */
12531
12532 copy = vm_map_copy_allocate();
12533 copy->type = VM_MAP_COPY_ENTRY_LIST;
12534 copy->cpy_hdr.entries_pageable = vmk_flags.vmkf_copy_pageable;
12535
12536 vm_map_store_init(©->cpy_hdr);
12537
12538 copy->offset = 0;
12539 copy->size = len;
12540
12541 kr = vm_map_remap_extract(src_map,
12542 src_addr,
12543 len,
12544 do_copy, /* copy */
12545 ©->cpy_hdr,
12546 cur_prot, /* IN/OUT */
12547 max_prot, /* IN/OUT */
12548 inheritance,
12549 vmk_flags);
12550 if (kr != KERN_SUCCESS) {
12551 vm_map_copy_discard(copy);
12552 return kr;
12553 }
12554 if (required_cur_prot != VM_PROT_NONE) {
12555 assert((*cur_prot & required_cur_prot) == required_cur_prot);
12556 assert((*max_prot & required_max_prot) == required_max_prot);
12557 }
12558
12559 *copy_result = copy;
12560 return KERN_SUCCESS;
12561 }
12562
12563 /*
12564 * vm_map_copyin_object:
12565 *
12566 * Create a copy object from an object.
12567 * Our caller donates an object reference.
12568 */
12569
12570 kern_return_t
vm_map_copyin_object(vm_object_t object,vm_object_offset_t offset,vm_object_size_t size,vm_map_copy_t * copy_result)12571 vm_map_copyin_object(
12572 vm_object_t object,
12573 vm_object_offset_t offset, /* offset of region in object */
12574 vm_object_size_t size, /* size of region in object */
12575 vm_map_copy_t *copy_result) /* OUT */
12576 {
12577 vm_map_copy_t copy; /* Resulting copy */
12578
12579 /*
12580 * We drop the object into a special copy object
12581 * that contains the object directly.
12582 */
12583
12584 copy = vm_map_copy_allocate();
12585 copy->type = VM_MAP_COPY_OBJECT;
12586 copy->cpy_object = object;
12587 copy->offset = offset;
12588 copy->size = size;
12589
12590 *copy_result = copy;
12591 return KERN_SUCCESS;
12592 }
12593
12594 static void
vm_map_fork_share(vm_map_t old_map,vm_map_entry_t old_entry,vm_map_t new_map)12595 vm_map_fork_share(
12596 vm_map_t old_map,
12597 vm_map_entry_t old_entry,
12598 vm_map_t new_map)
12599 {
12600 vm_object_t object;
12601 vm_map_entry_t new_entry;
12602
12603 /*
12604 * New sharing code. New map entry
12605 * references original object. Internal
12606 * objects use asynchronous copy algorithm for
12607 * future copies. First make sure we have
12608 * the right object. If we need a shadow,
12609 * or someone else already has one, then
12610 * make a new shadow and share it.
12611 */
12612
12613 if (!old_entry->is_sub_map) {
12614 object = VME_OBJECT(old_entry);
12615 }
12616
12617 if (old_entry->is_sub_map) {
12618 assert(old_entry->wired_count == 0);
12619 #ifndef NO_NESTED_PMAP
12620 #if !PMAP_FORK_NEST
12621 if (old_entry->use_pmap) {
12622 kern_return_t result;
12623
12624 result = pmap_nest(new_map->pmap,
12625 (VME_SUBMAP(old_entry))->pmap,
12626 (addr64_t)old_entry->vme_start,
12627 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
12628 if (result) {
12629 panic("vm_map_fork_share: pmap_nest failed!");
12630 }
12631 }
12632 #endif /* !PMAP_FORK_NEST */
12633 #endif /* NO_NESTED_PMAP */
12634 } else if (object == VM_OBJECT_NULL) {
12635 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
12636 old_entry->vme_start));
12637 VME_OFFSET_SET(old_entry, 0);
12638 VME_OBJECT_SET(old_entry, object, false, 0);
12639 old_entry->use_pmap = TRUE;
12640 // assert(!old_entry->needs_copy);
12641 } else if (object->copy_strategy !=
12642 MEMORY_OBJECT_COPY_SYMMETRIC) {
12643 /*
12644 * We are already using an asymmetric
12645 * copy, and therefore we already have
12646 * the right object.
12647 */
12648
12649 assert(!old_entry->needs_copy);
12650 } else if (old_entry->needs_copy || /* case 1 */
12651 object->shadowed || /* case 2 */
12652 (!object->true_share && /* case 3 */
12653 !old_entry->is_shared &&
12654 (object->vo_size >
12655 (vm_map_size_t)(old_entry->vme_end -
12656 old_entry->vme_start)))) {
12657 /*
12658 * We need to create a shadow.
12659 * There are three cases here.
12660 * In the first case, we need to
12661 * complete a deferred symmetrical
12662 * copy that we participated in.
12663 * In the second and third cases,
12664 * we need to create the shadow so
12665 * that changes that we make to the
12666 * object do not interfere with
12667 * any symmetrical copies which
12668 * have occured (case 2) or which
12669 * might occur (case 3).
12670 *
12671 * The first case is when we had
12672 * deferred shadow object creation
12673 * via the entry->needs_copy mechanism.
12674 * This mechanism only works when
12675 * only one entry points to the source
12676 * object, and we are about to create
12677 * a second entry pointing to the
12678 * same object. The problem is that
12679 * there is no way of mapping from
12680 * an object to the entries pointing
12681 * to it. (Deferred shadow creation
12682 * works with one entry because occurs
12683 * at fault time, and we walk from the
12684 * entry to the object when handling
12685 * the fault.)
12686 *
12687 * The second case is when the object
12688 * to be shared has already been copied
12689 * with a symmetric copy, but we point
12690 * directly to the object without
12691 * needs_copy set in our entry. (This
12692 * can happen because different ranges
12693 * of an object can be pointed to by
12694 * different entries. In particular,
12695 * a single entry pointing to an object
12696 * can be split by a call to vm_inherit,
12697 * which, combined with task_create, can
12698 * result in the different entries
12699 * having different needs_copy values.)
12700 * The shadowed flag in the object allows
12701 * us to detect this case. The problem
12702 * with this case is that if this object
12703 * has or will have shadows, then we
12704 * must not perform an asymmetric copy
12705 * of this object, since such a copy
12706 * allows the object to be changed, which
12707 * will break the previous symmetrical
12708 * copies (which rely upon the object
12709 * not changing). In a sense, the shadowed
12710 * flag says "don't change this object".
12711 * We fix this by creating a shadow
12712 * object for this object, and sharing
12713 * that. This works because we are free
12714 * to change the shadow object (and thus
12715 * to use an asymmetric copy strategy);
12716 * this is also semantically correct,
12717 * since this object is temporary, and
12718 * therefore a copy of the object is
12719 * as good as the object itself. (This
12720 * is not true for permanent objects,
12721 * since the pager needs to see changes,
12722 * which won't happen if the changes
12723 * are made to a copy.)
12724 *
12725 * The third case is when the object
12726 * to be shared has parts sticking
12727 * outside of the entry we're working
12728 * with, and thus may in the future
12729 * be subject to a symmetrical copy.
12730 * (This is a preemptive version of
12731 * case 2.)
12732 */
12733 VME_OBJECT_SHADOW(old_entry,
12734 (vm_map_size_t) (old_entry->vme_end -
12735 old_entry->vme_start),
12736 vm_map_always_shadow(old_map));
12737
12738 /*
12739 * If we're making a shadow for other than
12740 * copy on write reasons, then we have
12741 * to remove write permission.
12742 */
12743
12744 if (!old_entry->needs_copy &&
12745 (old_entry->protection & VM_PROT_WRITE)) {
12746 vm_prot_t prot;
12747
12748 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
12749
12750 prot = old_entry->protection & ~VM_PROT_WRITE;
12751
12752 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
12753
12754 if (override_nx(old_map, VME_ALIAS(old_entry)) && prot) {
12755 prot |= VM_PROT_EXECUTE;
12756 }
12757
12758
12759 if (old_map->mapped_in_other_pmaps) {
12760 vm_object_pmap_protect(
12761 VME_OBJECT(old_entry),
12762 VME_OFFSET(old_entry),
12763 (old_entry->vme_end -
12764 old_entry->vme_start),
12765 PMAP_NULL,
12766 PAGE_SIZE,
12767 old_entry->vme_start,
12768 prot);
12769 } else {
12770 pmap_protect(old_map->pmap,
12771 old_entry->vme_start,
12772 old_entry->vme_end,
12773 prot);
12774 }
12775 }
12776
12777 old_entry->needs_copy = FALSE;
12778 object = VME_OBJECT(old_entry);
12779 }
12780
12781
12782 /*
12783 * If object was using a symmetric copy strategy,
12784 * change its copy strategy to the default
12785 * asymmetric copy strategy, which is copy_delay
12786 * in the non-norma case and copy_call in the
12787 * norma case. Bump the reference count for the
12788 * new entry.
12789 */
12790
12791 if (old_entry->is_sub_map) {
12792 vm_map_reference(VME_SUBMAP(old_entry));
12793 } else {
12794 vm_object_lock(object);
12795 vm_object_reference_locked(object);
12796 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
12797 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
12798 }
12799 vm_object_unlock(object);
12800 }
12801
12802 /*
12803 * Clone the entry, using object ref from above.
12804 * Mark both entries as shared.
12805 */
12806
12807 new_entry = vm_map_entry_create(new_map); /* Never the kernel map or descendants */
12808 vm_map_entry_copy(old_map, new_entry, old_entry);
12809 old_entry->is_shared = TRUE;
12810 new_entry->is_shared = TRUE;
12811
12812 /*
12813 * We're dealing with a shared mapping, so the resulting mapping
12814 * should inherit some of the original mapping's accounting settings.
12815 * "iokit_acct" should have been cleared in vm_map_entry_copy().
12816 * "use_pmap" should stay the same as before (if it hasn't been reset
12817 * to TRUE when we cleared "iokit_acct").
12818 */
12819 assert(!new_entry->iokit_acct);
12820
12821 /*
12822 * If old entry's inheritence is VM_INHERIT_NONE,
12823 * the new entry is for corpse fork, remove the
12824 * write permission from the new entry.
12825 */
12826 if (old_entry->inheritance == VM_INHERIT_NONE) {
12827 new_entry->protection &= ~VM_PROT_WRITE;
12828 new_entry->max_protection &= ~VM_PROT_WRITE;
12829 }
12830
12831 /*
12832 * Insert the entry into the new map -- we
12833 * know we're inserting at the end of the new
12834 * map.
12835 */
12836
12837 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry,
12838 VM_MAP_KERNEL_FLAGS_NONE);
12839
12840 /*
12841 * Update the physical map
12842 */
12843
12844 if (old_entry->is_sub_map) {
12845 /* Bill Angell pmap support goes here */
12846 } else {
12847 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
12848 old_entry->vme_end - old_entry->vme_start,
12849 old_entry->vme_start);
12850 }
12851 }
12852
12853 static boolean_t
vm_map_fork_copy(vm_map_t old_map,vm_map_entry_t * old_entry_p,vm_map_t new_map,int vm_map_copyin_flags)12854 vm_map_fork_copy(
12855 vm_map_t old_map,
12856 vm_map_entry_t *old_entry_p,
12857 vm_map_t new_map,
12858 int vm_map_copyin_flags)
12859 {
12860 vm_map_entry_t old_entry = *old_entry_p;
12861 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
12862 vm_map_offset_t start = old_entry->vme_start;
12863 vm_map_copy_t copy;
12864 vm_map_entry_t last = vm_map_last_entry(new_map);
12865
12866 vm_map_unlock(old_map);
12867 /*
12868 * Use maxprot version of copyin because we
12869 * care about whether this memory can ever
12870 * be accessed, not just whether it's accessible
12871 * right now.
12872 */
12873 vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
12874 if (vm_map_copyin_internal(old_map, start, entry_size,
12875 vm_map_copyin_flags, ©)
12876 != KERN_SUCCESS) {
12877 /*
12878 * The map might have changed while it
12879 * was unlocked, check it again. Skip
12880 * any blank space or permanently
12881 * unreadable region.
12882 */
12883 vm_map_lock(old_map);
12884 if (!vm_map_lookup_entry(old_map, start, &last) ||
12885 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
12886 last = last->vme_next;
12887 }
12888 *old_entry_p = last;
12889
12890 /*
12891 * XXX For some error returns, want to
12892 * XXX skip to the next element. Note
12893 * that INVALID_ADDRESS and
12894 * PROTECTION_FAILURE are handled above.
12895 */
12896
12897 return FALSE;
12898 }
12899
12900 /*
12901 * Assert that the vm_map_copy is coming from the right
12902 * zone and hasn't been forged
12903 */
12904 vm_map_copy_require(copy);
12905
12906 /*
12907 * Insert the copy into the new map
12908 */
12909 vm_map_copy_insert(new_map, last, copy);
12910
12911 /*
12912 * Pick up the traversal at the end of
12913 * the copied region.
12914 */
12915
12916 vm_map_lock(old_map);
12917 start += entry_size;
12918 if (!vm_map_lookup_entry(old_map, start, &last)) {
12919 last = last->vme_next;
12920 } else {
12921 if (last->vme_start == start) {
12922 /*
12923 * No need to clip here and we don't
12924 * want to cause any unnecessary
12925 * unnesting...
12926 */
12927 } else {
12928 vm_map_clip_start(old_map, last, start);
12929 }
12930 }
12931 *old_entry_p = last;
12932
12933 return TRUE;
12934 }
12935
12936 #if PMAP_FORK_NEST
12937 #define PMAP_FORK_NEST_DEBUG 0
12938 static inline void
vm_map_fork_unnest(pmap_t new_pmap,vm_map_offset_t pre_nested_start,vm_map_offset_t pre_nested_end,vm_map_offset_t start,vm_map_offset_t end)12939 vm_map_fork_unnest(
12940 pmap_t new_pmap,
12941 vm_map_offset_t pre_nested_start,
12942 vm_map_offset_t pre_nested_end,
12943 vm_map_offset_t start,
12944 vm_map_offset_t end)
12945 {
12946 kern_return_t kr;
12947 vm_map_offset_t nesting_mask, start_unnest, end_unnest;
12948
12949 assertf(pre_nested_start <= pre_nested_end,
12950 "pre_nested start 0x%llx end 0x%llx",
12951 (uint64_t)pre_nested_start, (uint64_t)pre_nested_end);
12952 assertf(start <= end,
12953 "start 0x%llx end 0x%llx",
12954 (uint64_t) start, (uint64_t)end);
12955
12956 if (pre_nested_start == pre_nested_end) {
12957 /* nothing was pre-nested: done */
12958 return;
12959 }
12960 if (end <= pre_nested_start) {
12961 /* fully before pre-nested range: done */
12962 return;
12963 }
12964 if (start >= pre_nested_end) {
12965 /* fully after pre-nested range: done */
12966 return;
12967 }
12968 /* ignore parts of range outside of pre_nested range */
12969 if (start < pre_nested_start) {
12970 start = pre_nested_start;
12971 }
12972 if (end > pre_nested_end) {
12973 end = pre_nested_end;
12974 }
12975 nesting_mask = pmap_shared_region_size_min(new_pmap) - 1;
12976 start_unnest = start & ~nesting_mask;
12977 end_unnest = (end + nesting_mask) & ~nesting_mask;
12978 kr = pmap_unnest(new_pmap,
12979 (addr64_t)start_unnest,
12980 (uint64_t)(end_unnest - start_unnest));
12981 #if PMAP_FORK_NEST_DEBUG
12982 printf("PMAP_FORK_NEST %s:%d new_pmap %p 0x%llx:0x%llx -> pmap_unnest 0x%llx:0x%llx kr 0x%x\n", __FUNCTION__, __LINE__, new_pmap, (uint64_t)start, (uint64_t)end, (uint64_t)start_unnest, (uint64_t)end_unnest, kr);
12983 #endif /* PMAP_FORK_NEST_DEBUG */
12984 assertf(kr == KERN_SUCCESS,
12985 "0x%llx 0x%llx pmap_unnest(%p, 0x%llx, 0x%llx) -> 0x%x",
12986 (uint64_t)start, (uint64_t)end, new_pmap,
12987 (uint64_t)start_unnest, (uint64_t)(end_unnest - start_unnest),
12988 kr);
12989 }
12990 #endif /* PMAP_FORK_NEST */
12991
12992 /*
12993 * vm_map_fork:
12994 *
12995 * Create and return a new map based on the old
12996 * map, according to the inheritance values on the
12997 * regions in that map and the options.
12998 *
12999 * The source map must not be locked.
13000 */
13001 vm_map_t
vm_map_fork(ledger_t ledger,vm_map_t old_map,int options)13002 vm_map_fork(
13003 ledger_t ledger,
13004 vm_map_t old_map,
13005 int options)
13006 {
13007 pmap_t new_pmap;
13008 vm_map_t new_map;
13009 vm_map_entry_t old_entry;
13010 vm_map_size_t new_size = 0, entry_size;
13011 vm_map_entry_t new_entry;
13012 boolean_t src_needs_copy;
13013 boolean_t new_entry_needs_copy;
13014 boolean_t pmap_is64bit;
13015 int vm_map_copyin_flags;
13016 vm_inherit_t old_entry_inheritance;
13017 int map_create_options;
13018 kern_return_t footprint_collect_kr;
13019
13020 if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
13021 VM_MAP_FORK_PRESERVE_PURGEABLE |
13022 VM_MAP_FORK_CORPSE_FOOTPRINT)) {
13023 /* unsupported option */
13024 return VM_MAP_NULL;
13025 }
13026
13027 pmap_is64bit =
13028 #if defined(__i386__) || defined(__x86_64__)
13029 old_map->pmap->pm_task_map != TASK_MAP_32BIT;
13030 #elif defined(__arm64__)
13031 old_map->pmap->is_64bit;
13032 #else
13033 #error Unknown architecture.
13034 #endif
13035
13036 unsigned int pmap_flags = 0;
13037 pmap_flags |= pmap_is64bit ? PMAP_CREATE_64BIT : 0;
13038 #if defined(HAS_APPLE_PAC)
13039 pmap_flags |= old_map->pmap->disable_jop ? PMAP_CREATE_DISABLE_JOP : 0;
13040 #endif
13041 #if CONFIG_ROSETTA
13042 pmap_flags |= old_map->pmap->is_rosetta ? PMAP_CREATE_ROSETTA : 0;
13043 #endif
13044 #if PMAP_CREATE_FORCE_4K_PAGES
13045 if (VM_MAP_PAGE_SIZE(old_map) == FOURK_PAGE_SIZE &&
13046 PAGE_SIZE != FOURK_PAGE_SIZE) {
13047 pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
13048 }
13049 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
13050 new_pmap = pmap_create_options(ledger, (vm_map_size_t) 0, pmap_flags);
13051 if (new_pmap == NULL) {
13052 return VM_MAP_NULL;
13053 }
13054
13055 vm_map_reference(old_map);
13056 vm_map_lock(old_map);
13057
13058 map_create_options = 0;
13059 if (old_map->hdr.entries_pageable) {
13060 map_create_options |= VM_MAP_CREATE_PAGEABLE;
13061 }
13062 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
13063 map_create_options |= VM_MAP_CREATE_CORPSE_FOOTPRINT;
13064 footprint_collect_kr = KERN_SUCCESS;
13065 }
13066 new_map = vm_map_create_options(new_pmap,
13067 old_map->min_offset,
13068 old_map->max_offset,
13069 map_create_options);
13070 /* inherit cs_enforcement */
13071 vm_map_cs_enforcement_set(new_map, old_map->cs_enforcement);
13072 vm_map_lock(new_map);
13073 vm_commit_pagezero_status(new_map);
13074 /* inherit the parent map's page size */
13075 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
13076
13077 #if CONFIG_MAP_RANGES
13078 /* inherit the parent map's VM ranges */
13079 vm_map_range_fork(new_map, old_map);
13080 #endif
13081 /* ensure PMAP_CS structures are prepared for the fork */
13082 pmap_cs_fork_prepare(old_map->pmap, new_pmap);
13083
13084 #if PMAP_FORK_NEST
13085 /*
13086 * Pre-nest the shared region's pmap.
13087 */
13088 vm_map_offset_t pre_nested_start = 0, pre_nested_end = 0;
13089 pmap_fork_nest(old_map->pmap, new_pmap,
13090 &pre_nested_start, &pre_nested_end);
13091 #if PMAP_FORK_NEST_DEBUG
13092 printf("PMAP_FORK_NEST %s:%d old %p new %p pre_nested start 0x%llx end 0x%llx\n", __FUNCTION__, __LINE__, old_map->pmap, new_pmap, (uint64_t)pre_nested_start, (uint64_t)pre_nested_end);
13093 #endif /* PMAP_FORK_NEST_DEBUG */
13094 #endif /* PMAP_FORK_NEST */
13095
13096 for (old_entry = vm_map_first_entry(old_map); old_entry != vm_map_to_entry(old_map);) {
13097 /*
13098 * Abort any corpse collection if the system is shutting down.
13099 */
13100 if ((options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
13101 get_system_inshutdown()) {
13102 #if PMAP_FORK_NEST
13103 new_entry = vm_map_last_entry(new_map);
13104 if (new_entry == vm_map_to_entry(new_map)) {
13105 /* unnest all that was pre-nested */
13106 vm_map_fork_unnest(new_pmap,
13107 pre_nested_start, pre_nested_end,
13108 vm_map_min(new_map), vm_map_max(new_map));
13109 } else if (new_entry->vme_end < vm_map_max(new_map)) {
13110 /* unnest hole at the end, if pre-nested */
13111 vm_map_fork_unnest(new_pmap,
13112 pre_nested_start, pre_nested_end,
13113 new_entry->vme_end, vm_map_max(new_map));
13114 }
13115 #endif /* PMAP_FORK_NEST */
13116 vm_map_corpse_footprint_collect_done(new_map);
13117 vm_map_unlock(new_map);
13118 vm_map_unlock(old_map);
13119 vm_map_deallocate(new_map);
13120 vm_map_deallocate(old_map);
13121 printf("Aborting corpse map due to system shutdown\n");
13122 return VM_MAP_NULL;
13123 }
13124
13125 entry_size = old_entry->vme_end - old_entry->vme_start;
13126
13127 #if PMAP_FORK_NEST
13128 /*
13129 * Undo any unnecessary pre-nesting.
13130 */
13131 vm_map_offset_t prev_end;
13132 if (old_entry == vm_map_first_entry(old_map)) {
13133 prev_end = vm_map_min(old_map);
13134 } else {
13135 prev_end = old_entry->vme_prev->vme_end;
13136 }
13137 if (prev_end < old_entry->vme_start) {
13138 /* unnest hole before this entry, if pre-nested */
13139 vm_map_fork_unnest(new_pmap,
13140 pre_nested_start, pre_nested_end,
13141 prev_end, old_entry->vme_start);
13142 }
13143 if (old_entry->is_sub_map && old_entry->use_pmap) {
13144 /* keep this entry nested in the child */
13145 #if PMAP_FORK_NEST_DEBUG
13146 printf("PMAP_FORK_NEST %s:%d new_pmap %p keeping 0x%llx:0x%llx nested\n", __FUNCTION__, __LINE__, new_pmap, (uint64_t)old_entry->vme_start, (uint64_t)old_entry->vme_end);
13147 #endif /* PMAP_FORK_NEST_DEBUG */
13148 } else {
13149 /* undo nesting for this entry, if pre-nested */
13150 vm_map_fork_unnest(new_pmap,
13151 pre_nested_start, pre_nested_end,
13152 old_entry->vme_start, old_entry->vme_end);
13153 }
13154 #endif /* PMAP_FORK_NEST */
13155
13156 old_entry_inheritance = old_entry->inheritance;
13157 /*
13158 * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
13159 * share VM_INHERIT_NONE entries that are not backed by a
13160 * device pager.
13161 */
13162 if (old_entry_inheritance == VM_INHERIT_NONE &&
13163 (options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE) &&
13164 (old_entry->protection & VM_PROT_READ) &&
13165 !(!old_entry->is_sub_map &&
13166 VME_OBJECT(old_entry) != NULL &&
13167 VME_OBJECT(old_entry)->pager != NULL &&
13168 is_device_pager_ops(
13169 VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
13170 old_entry_inheritance = VM_INHERIT_SHARE;
13171 }
13172
13173 if (old_entry_inheritance != VM_INHERIT_NONE &&
13174 (options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
13175 footprint_collect_kr == KERN_SUCCESS) {
13176 /*
13177 * The corpse won't have old_map->pmap to query
13178 * footprint information, so collect that data now
13179 * and store it in new_map->vmmap_corpse_footprint
13180 * for later autopsy.
13181 */
13182 footprint_collect_kr =
13183 vm_map_corpse_footprint_collect(old_map,
13184 old_entry,
13185 new_map);
13186 }
13187
13188 switch (old_entry_inheritance) {
13189 case VM_INHERIT_NONE:
13190 break;
13191
13192 case VM_INHERIT_SHARE:
13193 vm_map_fork_share(old_map, old_entry, new_map);
13194 new_size += entry_size;
13195 break;
13196
13197 case VM_INHERIT_COPY:
13198
13199 /*
13200 * Inline the copy_quickly case;
13201 * upon failure, fall back on call
13202 * to vm_map_fork_copy.
13203 */
13204
13205 if (old_entry->is_sub_map) {
13206 break;
13207 }
13208 if ((old_entry->wired_count != 0) ||
13209 ((VME_OBJECT(old_entry) != NULL) &&
13210 (VME_OBJECT(old_entry)->true_share))) {
13211 goto slow_vm_map_fork_copy;
13212 }
13213
13214 new_entry = vm_map_entry_create(new_map); /* never the kernel map or descendants */
13215 vm_map_entry_copy(old_map, new_entry, old_entry);
13216 if (old_entry->vme_permanent) {
13217 /* inherit "permanent" on fork() */
13218 new_entry->vme_permanent = TRUE;
13219 }
13220
13221 if (new_entry->used_for_jit == TRUE && new_map->jit_entry_exists == FALSE) {
13222 new_map->jit_entry_exists = TRUE;
13223 }
13224
13225 if (new_entry->is_sub_map) {
13226 /* clear address space specifics */
13227 new_entry->use_pmap = FALSE;
13228 } else {
13229 /*
13230 * We're dealing with a copy-on-write operation,
13231 * so the resulting mapping should not inherit
13232 * the original mapping's accounting settings.
13233 * "iokit_acct" should have been cleared in
13234 * vm_map_entry_copy().
13235 * "use_pmap" should be reset to its default
13236 * (TRUE) so that the new mapping gets
13237 * accounted for in the task's memory footprint.
13238 */
13239 assert(!new_entry->iokit_acct);
13240 new_entry->use_pmap = TRUE;
13241 }
13242
13243 if (!vm_object_copy_quickly(
13244 VME_OBJECT(new_entry),
13245 VME_OFFSET(old_entry),
13246 (old_entry->vme_end -
13247 old_entry->vme_start),
13248 &src_needs_copy,
13249 &new_entry_needs_copy)) {
13250 vm_map_entry_dispose(new_entry);
13251 goto slow_vm_map_fork_copy;
13252 }
13253
13254 /*
13255 * Handle copy-on-write obligations
13256 */
13257
13258 if (src_needs_copy && !old_entry->needs_copy) {
13259 vm_prot_t prot;
13260
13261 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
13262
13263 prot = old_entry->protection & ~VM_PROT_WRITE;
13264
13265 if (override_nx(old_map, VME_ALIAS(old_entry))
13266 && prot) {
13267 prot |= VM_PROT_EXECUTE;
13268 }
13269
13270 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
13271
13272 vm_object_pmap_protect(
13273 VME_OBJECT(old_entry),
13274 VME_OFFSET(old_entry),
13275 (old_entry->vme_end -
13276 old_entry->vme_start),
13277 ((old_entry->is_shared
13278 || old_map->mapped_in_other_pmaps)
13279 ? PMAP_NULL :
13280 old_map->pmap),
13281 VM_MAP_PAGE_SIZE(old_map),
13282 old_entry->vme_start,
13283 prot);
13284
13285 assert(old_entry->wired_count == 0);
13286 old_entry->needs_copy = TRUE;
13287 }
13288 new_entry->needs_copy = new_entry_needs_copy;
13289
13290 /*
13291 * Insert the entry at the end
13292 * of the map.
13293 */
13294
13295 vm_map_store_entry_link(new_map,
13296 vm_map_last_entry(new_map),
13297 new_entry,
13298 VM_MAP_KERNEL_FLAGS_NONE);
13299 new_size += entry_size;
13300 break;
13301
13302 slow_vm_map_fork_copy:
13303 vm_map_copyin_flags = 0;
13304 if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
13305 vm_map_copyin_flags |=
13306 VM_MAP_COPYIN_PRESERVE_PURGEABLE;
13307 }
13308 if (vm_map_fork_copy(old_map,
13309 &old_entry,
13310 new_map,
13311 vm_map_copyin_flags)) {
13312 new_size += entry_size;
13313 }
13314 continue;
13315 }
13316 old_entry = old_entry->vme_next;
13317 }
13318
13319 #if PMAP_FORK_NEST
13320 new_entry = vm_map_last_entry(new_map);
13321 if (new_entry == vm_map_to_entry(new_map)) {
13322 /* unnest all that was pre-nested */
13323 vm_map_fork_unnest(new_pmap,
13324 pre_nested_start, pre_nested_end,
13325 vm_map_min(new_map), vm_map_max(new_map));
13326 } else if (new_entry->vme_end < vm_map_max(new_map)) {
13327 /* unnest hole at the end, if pre-nested */
13328 vm_map_fork_unnest(new_pmap,
13329 pre_nested_start, pre_nested_end,
13330 new_entry->vme_end, vm_map_max(new_map));
13331 }
13332 #endif /* PMAP_FORK_NEST */
13333
13334 #if defined(__arm64__)
13335 pmap_insert_sharedpage(new_map->pmap);
13336 #endif /* __arm64__ */
13337
13338 new_map->size = new_size;
13339
13340 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
13341 vm_map_corpse_footprint_collect_done(new_map);
13342 }
13343
13344 /* Propagate JIT entitlement for the pmap layer. */
13345 if (pmap_get_jit_entitled(old_map->pmap)) {
13346 /* Tell the pmap that it supports JIT. */
13347 pmap_set_jit_entitled(new_map->pmap);
13348 }
13349
13350 vm_map_unlock(new_map);
13351 vm_map_unlock(old_map);
13352 vm_map_deallocate(old_map);
13353
13354 return new_map;
13355 }
13356
13357 /*
13358 * vm_map_exec:
13359 *
13360 * Setup the "new_map" with the proper execution environment according
13361 * to the type of executable (platform, 64bit, chroot environment).
13362 * Map the comm page and shared region, etc...
13363 */
13364 kern_return_t
vm_map_exec(vm_map_t new_map,task_t task,boolean_t is64bit,void * fsroot,cpu_type_t cpu,cpu_subtype_t cpu_subtype,boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)13365 vm_map_exec(
13366 vm_map_t new_map,
13367 task_t task,
13368 boolean_t is64bit,
13369 void *fsroot,
13370 cpu_type_t cpu,
13371 cpu_subtype_t cpu_subtype,
13372 boolean_t reslide,
13373 boolean_t is_driverkit,
13374 uint32_t rsr_version)
13375 {
13376 SHARED_REGION_TRACE_DEBUG(
13377 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
13378 (void *)VM_KERNEL_ADDRPERM(current_task()),
13379 (void *)VM_KERNEL_ADDRPERM(new_map),
13380 (void *)VM_KERNEL_ADDRPERM(task),
13381 (void *)VM_KERNEL_ADDRPERM(fsroot),
13382 cpu,
13383 cpu_subtype));
13384 (void) vm_commpage_enter(new_map, task, is64bit);
13385
13386 (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu, cpu_subtype, reslide, is_driverkit, rsr_version);
13387
13388 SHARED_REGION_TRACE_DEBUG(
13389 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
13390 (void *)VM_KERNEL_ADDRPERM(current_task()),
13391 (void *)VM_KERNEL_ADDRPERM(new_map),
13392 (void *)VM_KERNEL_ADDRPERM(task),
13393 (void *)VM_KERNEL_ADDRPERM(fsroot),
13394 cpu,
13395 cpu_subtype));
13396
13397 /*
13398 * Some devices have region(s) of memory that shouldn't get allocated by
13399 * user processes. The following code creates dummy vm_map_entry_t's for each
13400 * of the regions that needs to be reserved to prevent any allocations in
13401 * those regions.
13402 */
13403 kern_return_t kr = KERN_FAILURE;
13404 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
13405 vmk_flags.vmkf_permanent = TRUE;
13406 vmk_flags.vmkf_beyond_max = TRUE;
13407
13408 struct vm_reserved_region *regions = NULL;
13409 size_t num_regions = ml_get_vm_reserved_regions(is64bit, ®ions);
13410 assert((num_regions == 0) || (num_regions > 0 && regions != NULL));
13411
13412 for (size_t i = 0; i < num_regions; ++i) {
13413 kr = vm_map_enter(
13414 new_map,
13415 ®ions[i].vmrr_addr,
13416 regions[i].vmrr_size,
13417 (vm_map_offset_t)0,
13418 VM_FLAGS_FIXED,
13419 vmk_flags,
13420 VM_KERN_MEMORY_NONE,
13421 VM_OBJECT_NULL,
13422 (vm_object_offset_t)0,
13423 FALSE,
13424 VM_PROT_NONE,
13425 VM_PROT_NONE,
13426 VM_INHERIT_COPY);
13427
13428 if (kr != KERN_SUCCESS) {
13429 panic("Failed to reserve %s region in user map %p %d", regions[i].vmrr_name, new_map, kr);
13430 }
13431 }
13432
13433 new_map->reserved_regions = (num_regions ? TRUE : FALSE);
13434
13435 return KERN_SUCCESS;
13436 }
13437
13438 uint64_t vm_map_lookup_and_lock_object_copy_slowly_count = 0;
13439 uint64_t vm_map_lookup_and_lock_object_copy_slowly_size = 0;
13440 uint64_t vm_map_lookup_and_lock_object_copy_slowly_max = 0;
13441 uint64_t vm_map_lookup_and_lock_object_copy_slowly_restart = 0;
13442 uint64_t vm_map_lookup_and_lock_object_copy_slowly_error = 0;
13443 uint64_t vm_map_lookup_and_lock_object_copy_strategically_count = 0;
13444 uint64_t vm_map_lookup_and_lock_object_copy_strategically_size = 0;
13445 uint64_t vm_map_lookup_and_lock_object_copy_strategically_max = 0;
13446 uint64_t vm_map_lookup_and_lock_object_copy_strategically_restart = 0;
13447 uint64_t vm_map_lookup_and_lock_object_copy_strategically_error = 0;
13448 uint64_t vm_map_lookup_and_lock_object_copy_shadow_count = 0;
13449 uint64_t vm_map_lookup_and_lock_object_copy_shadow_size = 0;
13450 uint64_t vm_map_lookup_and_lock_object_copy_shadow_max = 0;
13451 /*
13452 * vm_map_lookup_and_lock_object:
13453 *
13454 * Finds the VM object, offset, and
13455 * protection for a given virtual address in the
13456 * specified map, assuming a page fault of the
13457 * type specified.
13458 *
13459 * Returns the (object, offset, protection) for
13460 * this address, whether it is wired down, and whether
13461 * this map has the only reference to the data in question.
13462 * In order to later verify this lookup, a "version"
13463 * is returned.
13464 * If contended != NULL, *contended will be set to
13465 * true iff the thread had to spin or block to acquire
13466 * an exclusive lock.
13467 *
13468 * The map MUST be locked by the caller and WILL be
13469 * locked on exit. In order to guarantee the
13470 * existence of the returned object, it is returned
13471 * locked.
13472 *
13473 * If a lookup is requested with "write protection"
13474 * specified, the map may be changed to perform virtual
13475 * copying operations, although the data referenced will
13476 * remain the same.
13477 */
13478 kern_return_t
vm_map_lookup_and_lock_object(vm_map_t * var_map,vm_map_offset_t vaddr,vm_prot_t fault_type,int object_lock_type,vm_map_version_t * out_version,vm_object_t * object,vm_object_offset_t * offset,vm_prot_t * out_prot,boolean_t * wired,vm_object_fault_info_t fault_info,vm_map_t * real_map,bool * contended)13479 vm_map_lookup_and_lock_object(
13480 vm_map_t *var_map, /* IN/OUT */
13481 vm_map_offset_t vaddr,
13482 vm_prot_t fault_type,
13483 int object_lock_type,
13484 vm_map_version_t *out_version, /* OUT */
13485 vm_object_t *object, /* OUT */
13486 vm_object_offset_t *offset, /* OUT */
13487 vm_prot_t *out_prot, /* OUT */
13488 boolean_t *wired, /* OUT */
13489 vm_object_fault_info_t fault_info, /* OUT */
13490 vm_map_t *real_map, /* OUT */
13491 bool *contended) /* OUT */
13492 {
13493 vm_map_entry_t entry;
13494 vm_map_t map = *var_map;
13495 vm_map_t old_map = *var_map;
13496 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
13497 vm_map_offset_t cow_parent_vaddr = 0;
13498 vm_map_offset_t old_start = 0;
13499 vm_map_offset_t old_end = 0;
13500 vm_prot_t prot;
13501 boolean_t mask_protections;
13502 boolean_t force_copy;
13503 boolean_t no_force_copy_if_executable;
13504 boolean_t submap_needed_copy;
13505 vm_prot_t original_fault_type;
13506 vm_map_size_t fault_page_mask;
13507
13508 /*
13509 * VM_PROT_MASK means that the caller wants us to use "fault_type"
13510 * as a mask against the mapping's actual protections, not as an
13511 * absolute value.
13512 */
13513 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
13514 force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
13515 no_force_copy_if_executable = (fault_type & VM_PROT_COPY_FAIL_IF_EXECUTABLE) ? TRUE : FALSE;
13516 fault_type &= VM_PROT_ALL;
13517 original_fault_type = fault_type;
13518 if (contended) {
13519 *contended = false;
13520 }
13521
13522 *real_map = map;
13523
13524 fault_page_mask = MIN(VM_MAP_PAGE_MASK(map), PAGE_MASK);
13525 vaddr = VM_MAP_TRUNC_PAGE(vaddr, fault_page_mask);
13526
13527 RetryLookup:
13528 fault_type = original_fault_type;
13529
13530 /*
13531 * If the map has an interesting hint, try it before calling
13532 * full blown lookup routine.
13533 */
13534 entry = map->hint;
13535
13536 if ((entry == vm_map_to_entry(map)) ||
13537 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
13538 vm_map_entry_t tmp_entry;
13539
13540 /*
13541 * Entry was either not a valid hint, or the vaddr
13542 * was not contained in the entry, so do a full lookup.
13543 */
13544 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
13545 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13546 vm_map_unlock(cow_sub_map_parent);
13547 }
13548 if ((*real_map != map)
13549 && (*real_map != cow_sub_map_parent)) {
13550 vm_map_unlock(*real_map);
13551 }
13552 return KERN_INVALID_ADDRESS;
13553 }
13554
13555 entry = tmp_entry;
13556 }
13557 if (map == old_map) {
13558 old_start = entry->vme_start;
13559 old_end = entry->vme_end;
13560 }
13561
13562 /*
13563 * Handle submaps. Drop lock on upper map, submap is
13564 * returned locked.
13565 */
13566
13567 submap_needed_copy = FALSE;
13568 submap_recurse:
13569 if (entry->is_sub_map) {
13570 vm_map_offset_t local_vaddr;
13571 vm_map_offset_t end_delta;
13572 vm_map_offset_t start_delta;
13573 vm_map_entry_t submap_entry, saved_submap_entry;
13574 vm_object_offset_t submap_entry_offset;
13575 vm_object_size_t submap_entry_size;
13576 vm_prot_t subentry_protection;
13577 vm_prot_t subentry_max_protection;
13578 boolean_t subentry_no_copy_on_read;
13579 boolean_t subentry_permanent;
13580 boolean_t subentry_pmap_cs_associated;
13581 boolean_t mapped_needs_copy = FALSE;
13582 vm_map_version_t version;
13583
13584 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) >= VM_MAP_PAGE_SHIFT(map),
13585 "map %p (%d) entry %p submap %p (%d)\n",
13586 map, VM_MAP_PAGE_SHIFT(map), entry,
13587 VME_SUBMAP(entry), VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
13588
13589 local_vaddr = vaddr;
13590
13591 if ((entry->use_pmap &&
13592 !((fault_type & VM_PROT_WRITE) ||
13593 force_copy))) {
13594 /* if real_map equals map we unlock below */
13595 if ((*real_map != map) &&
13596 (*real_map != cow_sub_map_parent)) {
13597 vm_map_unlock(*real_map);
13598 }
13599 *real_map = VME_SUBMAP(entry);
13600 }
13601
13602 if (entry->needs_copy &&
13603 ((fault_type & VM_PROT_WRITE) ||
13604 force_copy)) {
13605 if (!mapped_needs_copy) {
13606 if (vm_map_lock_read_to_write(map)) {
13607 vm_map_lock_read(map);
13608 *real_map = map;
13609 goto RetryLookup;
13610 }
13611 vm_map_lock_read(VME_SUBMAP(entry));
13612 *var_map = VME_SUBMAP(entry);
13613 cow_sub_map_parent = map;
13614 /* reset base to map before cow object */
13615 /* this is the map which will accept */
13616 /* the new cow object */
13617 old_start = entry->vme_start;
13618 old_end = entry->vme_end;
13619 cow_parent_vaddr = vaddr;
13620 mapped_needs_copy = TRUE;
13621 } else {
13622 vm_map_lock_read(VME_SUBMAP(entry));
13623 *var_map = VME_SUBMAP(entry);
13624 if ((cow_sub_map_parent != map) &&
13625 (*real_map != map)) {
13626 vm_map_unlock(map);
13627 }
13628 }
13629 } else {
13630 if (entry->needs_copy) {
13631 submap_needed_copy = TRUE;
13632 }
13633 vm_map_lock_read(VME_SUBMAP(entry));
13634 *var_map = VME_SUBMAP(entry);
13635 /* leave map locked if it is a target */
13636 /* cow sub_map above otherwise, just */
13637 /* follow the maps down to the object */
13638 /* here we unlock knowing we are not */
13639 /* revisiting the map. */
13640 if ((*real_map != map) && (map != cow_sub_map_parent)) {
13641 vm_map_unlock_read(map);
13642 }
13643 }
13644
13645 map = *var_map;
13646
13647 /* calculate the offset in the submap for vaddr */
13648 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
13649 assertf(VM_MAP_PAGE_ALIGNED(local_vaddr, fault_page_mask),
13650 "local_vaddr 0x%llx entry->vme_start 0x%llx fault_page_mask 0x%llx\n",
13651 (uint64_t)local_vaddr, (uint64_t)entry->vme_start, (uint64_t)fault_page_mask);
13652
13653 RetrySubMap:
13654 if (!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
13655 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13656 vm_map_unlock(cow_sub_map_parent);
13657 }
13658 if ((*real_map != map)
13659 && (*real_map != cow_sub_map_parent)) {
13660 vm_map_unlock(*real_map);
13661 }
13662 *real_map = map;
13663 return KERN_INVALID_ADDRESS;
13664 }
13665
13666 /* find the attenuated shadow of the underlying object */
13667 /* on our target map */
13668
13669 /* in english the submap object may extend beyond the */
13670 /* region mapped by the entry or, may only fill a portion */
13671 /* of it. For our purposes, we only care if the object */
13672 /* doesn't fill. In this case the area which will */
13673 /* ultimately be clipped in the top map will only need */
13674 /* to be as big as the portion of the underlying entry */
13675 /* which is mapped */
13676 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
13677 submap_entry->vme_start - VME_OFFSET(entry) : 0;
13678
13679 end_delta =
13680 (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
13681 submap_entry->vme_end ?
13682 0 : (VME_OFFSET(entry) +
13683 (old_end - old_start))
13684 - submap_entry->vme_end;
13685
13686 old_start += start_delta;
13687 old_end -= end_delta;
13688
13689 if (submap_entry->is_sub_map) {
13690 entry = submap_entry;
13691 vaddr = local_vaddr;
13692 goto submap_recurse;
13693 }
13694
13695 if (((fault_type & VM_PROT_WRITE) ||
13696 force_copy)
13697 && cow_sub_map_parent) {
13698 vm_object_t sub_object, copy_object;
13699 vm_object_offset_t copy_offset;
13700 vm_map_offset_t local_start;
13701 vm_map_offset_t local_end;
13702 boolean_t object_copied = FALSE;
13703 vm_object_offset_t object_copied_offset = 0;
13704 boolean_t object_copied_needs_copy = FALSE;
13705 kern_return_t kr = KERN_SUCCESS;
13706
13707 if (vm_map_lock_read_to_write(map)) {
13708 vm_map_lock_read(map);
13709 old_start -= start_delta;
13710 old_end += end_delta;
13711 goto RetrySubMap;
13712 }
13713
13714
13715 sub_object = VME_OBJECT(submap_entry);
13716 if (sub_object == VM_OBJECT_NULL) {
13717 sub_object =
13718 vm_object_allocate(
13719 (vm_map_size_t)
13720 (submap_entry->vme_end -
13721 submap_entry->vme_start));
13722 VME_OBJECT_SET(submap_entry, sub_object, false, 0);
13723 VME_OFFSET_SET(submap_entry, 0);
13724 assert(!submap_entry->is_sub_map);
13725 assert(submap_entry->use_pmap);
13726 }
13727 local_start = local_vaddr -
13728 (cow_parent_vaddr - old_start);
13729 local_end = local_vaddr +
13730 (old_end - cow_parent_vaddr);
13731 vm_map_clip_start(map, submap_entry, local_start);
13732 vm_map_clip_end(map, submap_entry, local_end);
13733 if (submap_entry->is_sub_map) {
13734 /* unnesting was done when clipping */
13735 assert(!submap_entry->use_pmap);
13736 }
13737
13738 /* This is the COW case, lets connect */
13739 /* an entry in our space to the underlying */
13740 /* object in the submap, bypassing the */
13741 /* submap. */
13742 submap_entry_offset = VME_OFFSET(submap_entry);
13743 submap_entry_size = submap_entry->vme_end - submap_entry->vme_start;
13744
13745 if ((submap_entry->wired_count != 0 ||
13746 sub_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) &&
13747 (submap_entry->protection & VM_PROT_EXECUTE) &&
13748 no_force_copy_if_executable) {
13749 // printf("FBDP map %p entry %p start 0x%llx end 0x%llx wired %d strat %d\n", map, submap_entry, (uint64_t)local_start, (uint64_t)local_end, submap_entry->wired_count, sub_object->copy_strategy);
13750 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13751 vm_map_unlock(cow_sub_map_parent);
13752 }
13753 if ((*real_map != map)
13754 && (*real_map != cow_sub_map_parent)) {
13755 vm_map_unlock(*real_map);
13756 }
13757 *real_map = map;
13758 ktriage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_SUBMAP_NO_COW_ON_EXECUTABLE), 0 /* arg */);
13759 vm_map_lock_write_to_read(map);
13760 kr = KERN_PROTECTION_FAILURE;
13761 DTRACE_VM4(submap_no_copy_executable,
13762 vm_map_t, map,
13763 vm_object_offset_t, submap_entry_offset,
13764 vm_object_size_t, submap_entry_size,
13765 int, kr);
13766 return kr;
13767 }
13768
13769 if (submap_entry->wired_count != 0) {
13770 vm_object_reference(sub_object);
13771
13772 assertf(VM_MAP_PAGE_ALIGNED(VME_OFFSET(submap_entry), VM_MAP_PAGE_MASK(map)),
13773 "submap_entry %p offset 0x%llx\n",
13774 submap_entry, VME_OFFSET(submap_entry));
13775
13776 DTRACE_VM6(submap_copy_slowly,
13777 vm_map_t, cow_sub_map_parent,
13778 vm_map_offset_t, vaddr,
13779 vm_map_t, map,
13780 vm_object_size_t, submap_entry_size,
13781 int, submap_entry->wired_count,
13782 int, sub_object->copy_strategy);
13783
13784 saved_submap_entry = submap_entry;
13785 version.main_timestamp = map->timestamp;
13786 vm_map_unlock(map); /* Increments timestamp by 1 */
13787 submap_entry = VM_MAP_ENTRY_NULL;
13788
13789 vm_object_lock(sub_object);
13790 kr = vm_object_copy_slowly(sub_object,
13791 submap_entry_offset,
13792 submap_entry_size,
13793 FALSE,
13794 ©_object);
13795 object_copied = TRUE;
13796 object_copied_offset = 0;
13797 /* 4k: account for extra offset in physical page */
13798 object_copied_offset += submap_entry_offset - vm_object_trunc_page(submap_entry_offset);
13799 object_copied_needs_copy = FALSE;
13800 vm_object_deallocate(sub_object);
13801
13802 vm_map_lock(map);
13803
13804 if (kr != KERN_SUCCESS &&
13805 kr != KERN_MEMORY_RESTART_COPY) {
13806 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13807 vm_map_unlock(cow_sub_map_parent);
13808 }
13809 if ((*real_map != map)
13810 && (*real_map != cow_sub_map_parent)) {
13811 vm_map_unlock(*real_map);
13812 }
13813 *real_map = map;
13814 vm_object_deallocate(copy_object);
13815 copy_object = VM_OBJECT_NULL;
13816 ktriage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_SUBMAP_COPY_SLOWLY_FAILED), 0 /* arg */);
13817 vm_map_lock_write_to_read(map);
13818 DTRACE_VM4(submap_copy_error_slowly,
13819 vm_object_t, sub_object,
13820 vm_object_offset_t, submap_entry_offset,
13821 vm_object_size_t, submap_entry_size,
13822 int, kr);
13823 vm_map_lookup_and_lock_object_copy_slowly_error++;
13824 return kr;
13825 }
13826
13827 if ((kr == KERN_SUCCESS) &&
13828 (version.main_timestamp + 1) == map->timestamp) {
13829 submap_entry = saved_submap_entry;
13830 } else {
13831 saved_submap_entry = NULL;
13832 old_start -= start_delta;
13833 old_end += end_delta;
13834 vm_object_deallocate(copy_object);
13835 copy_object = VM_OBJECT_NULL;
13836 vm_map_lock_write_to_read(map);
13837 vm_map_lookup_and_lock_object_copy_slowly_restart++;
13838 goto RetrySubMap;
13839 }
13840 vm_map_lookup_and_lock_object_copy_slowly_count++;
13841 vm_map_lookup_and_lock_object_copy_slowly_size += submap_entry_size;
13842 if (submap_entry_size > vm_map_lookup_and_lock_object_copy_slowly_max) {
13843 vm_map_lookup_and_lock_object_copy_slowly_max = submap_entry_size;
13844 }
13845 } else if (sub_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
13846 submap_entry_offset = VME_OFFSET(submap_entry);
13847 copy_object = VM_OBJECT_NULL;
13848 object_copied_offset = submap_entry_offset;
13849 object_copied_needs_copy = FALSE;
13850 DTRACE_VM6(submap_copy_strategically,
13851 vm_map_t, cow_sub_map_parent,
13852 vm_map_offset_t, vaddr,
13853 vm_map_t, map,
13854 vm_object_size_t, submap_entry_size,
13855 int, submap_entry->wired_count,
13856 int, sub_object->copy_strategy);
13857 kr = vm_object_copy_strategically(
13858 sub_object,
13859 submap_entry_offset,
13860 submap_entry->vme_end - submap_entry->vme_start,
13861 ©_object,
13862 &object_copied_offset,
13863 &object_copied_needs_copy);
13864 if (kr == KERN_MEMORY_RESTART_COPY) {
13865 old_start -= start_delta;
13866 old_end += end_delta;
13867 vm_object_deallocate(copy_object);
13868 copy_object = VM_OBJECT_NULL;
13869 vm_map_lock_write_to_read(map);
13870 vm_map_lookup_and_lock_object_copy_strategically_restart++;
13871 goto RetrySubMap;
13872 }
13873 if (kr != KERN_SUCCESS) {
13874 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13875 vm_map_unlock(cow_sub_map_parent);
13876 }
13877 if ((*real_map != map)
13878 && (*real_map != cow_sub_map_parent)) {
13879 vm_map_unlock(*real_map);
13880 }
13881 *real_map = map;
13882 vm_object_deallocate(copy_object);
13883 copy_object = VM_OBJECT_NULL;
13884 ktriage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_SUBMAP_COPY_STRAT_FAILED), 0 /* arg */);
13885 vm_map_lock_write_to_read(map);
13886 DTRACE_VM4(submap_copy_error_strategically,
13887 vm_object_t, sub_object,
13888 vm_object_offset_t, submap_entry_offset,
13889 vm_object_size_t, submap_entry_size,
13890 int, kr);
13891 vm_map_lookup_and_lock_object_copy_strategically_error++;
13892 return kr;
13893 }
13894 assert(copy_object != VM_OBJECT_NULL);
13895 assert(copy_object != sub_object);
13896 object_copied = TRUE;
13897 vm_map_lookup_and_lock_object_copy_strategically_count++;
13898 vm_map_lookup_and_lock_object_copy_strategically_size += submap_entry_size;
13899 if (submap_entry_size > vm_map_lookup_and_lock_object_copy_strategically_max) {
13900 vm_map_lookup_and_lock_object_copy_strategically_max = submap_entry_size;
13901 }
13902 } else {
13903 /* set up shadow object */
13904 object_copied = FALSE;
13905 copy_object = sub_object;
13906 vm_object_lock(sub_object);
13907 vm_object_reference_locked(sub_object);
13908 sub_object->shadowed = TRUE;
13909 vm_object_unlock(sub_object);
13910
13911 assert(submap_entry->wired_count == 0);
13912 submap_entry->needs_copy = TRUE;
13913
13914 prot = submap_entry->protection;
13915 assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
13916 prot = prot & ~VM_PROT_WRITE;
13917 assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
13918
13919 if (override_nx(old_map,
13920 VME_ALIAS(submap_entry))
13921 && prot) {
13922 prot |= VM_PROT_EXECUTE;
13923 }
13924
13925 vm_object_pmap_protect(
13926 sub_object,
13927 VME_OFFSET(submap_entry),
13928 submap_entry->vme_end -
13929 submap_entry->vme_start,
13930 (submap_entry->is_shared
13931 || map->mapped_in_other_pmaps) ?
13932 PMAP_NULL : map->pmap,
13933 VM_MAP_PAGE_SIZE(map),
13934 submap_entry->vme_start,
13935 prot);
13936 vm_map_lookup_and_lock_object_copy_shadow_count++;
13937 vm_map_lookup_and_lock_object_copy_shadow_size += submap_entry_size;
13938 if (submap_entry_size > vm_map_lookup_and_lock_object_copy_shadow_max) {
13939 vm_map_lookup_and_lock_object_copy_shadow_max = submap_entry_size;
13940 }
13941 }
13942
13943 /*
13944 * Adjust the fault offset to the submap entry.
13945 */
13946 copy_offset = (local_vaddr -
13947 submap_entry->vme_start +
13948 VME_OFFSET(submap_entry));
13949
13950 /* This works diffently than the */
13951 /* normal submap case. We go back */
13952 /* to the parent of the cow map and*/
13953 /* clip out the target portion of */
13954 /* the sub_map, substituting the */
13955 /* new copy object, */
13956
13957 subentry_protection = submap_entry->protection;
13958 subentry_max_protection = submap_entry->max_protection;
13959 subentry_no_copy_on_read = submap_entry->vme_no_copy_on_read;
13960 subentry_permanent = submap_entry->vme_permanent;
13961 subentry_pmap_cs_associated = submap_entry->pmap_cs_associated;
13962
13963 vm_map_unlock(map);
13964 submap_entry = NULL; /* not valid after map unlock */
13965
13966 local_start = old_start;
13967 local_end = old_end;
13968 map = cow_sub_map_parent;
13969 *var_map = cow_sub_map_parent;
13970 vaddr = cow_parent_vaddr;
13971 cow_sub_map_parent = NULL;
13972
13973 if (!vm_map_lookup_entry(map,
13974 vaddr, &entry)) {
13975 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13976 vm_map_unlock(cow_sub_map_parent);
13977 }
13978 if ((*real_map != map)
13979 && (*real_map != cow_sub_map_parent)) {
13980 vm_map_unlock(*real_map);
13981 }
13982 *real_map = map;
13983 vm_object_deallocate(
13984 copy_object);
13985 copy_object = VM_OBJECT_NULL;
13986 vm_map_lock_write_to_read(map);
13987 DTRACE_VM4(submap_lookup_post_unlock,
13988 uint64_t, (uint64_t)entry->vme_start,
13989 uint64_t, (uint64_t)entry->vme_end,
13990 vm_map_offset_t, vaddr,
13991 int, object_copied);
13992 return KERN_INVALID_ADDRESS;
13993 }
13994
13995 /* clip out the portion of space */
13996 /* mapped by the sub map which */
13997 /* corresponds to the underlying */
13998 /* object */
13999
14000 /*
14001 * Clip (and unnest) the smallest nested chunk
14002 * possible around the faulting address...
14003 */
14004 local_start = vaddr & ~(pmap_shared_region_size_min(map->pmap) - 1);
14005 local_end = local_start + pmap_shared_region_size_min(map->pmap);
14006 /*
14007 * ... but don't go beyond the "old_start" to "old_end"
14008 * range, to avoid spanning over another VM region
14009 * with a possibly different VM object and/or offset.
14010 */
14011 if (local_start < old_start) {
14012 local_start = old_start;
14013 }
14014 if (local_end > old_end) {
14015 local_end = old_end;
14016 }
14017 /*
14018 * Adjust copy_offset to the start of the range.
14019 */
14020 copy_offset -= (vaddr - local_start);
14021
14022 vm_map_clip_start(map, entry, local_start);
14023 vm_map_clip_end(map, entry, local_end);
14024 if (entry->is_sub_map) {
14025 /* unnesting was done when clipping */
14026 assert(!entry->use_pmap);
14027 }
14028
14029 /* substitute copy object for */
14030 /* shared map entry */
14031 vm_map_deallocate(VME_SUBMAP(entry));
14032 assert(!entry->iokit_acct);
14033 entry->use_pmap = TRUE;
14034 VME_OBJECT_SET(entry, copy_object, false, 0);
14035
14036 /* propagate the submap entry's protections */
14037 if (entry->protection != VM_PROT_READ) {
14038 /*
14039 * Someone has already altered the top entry's
14040 * protections via vm_protect(VM_PROT_COPY).
14041 * Respect these new values and ignore the
14042 * submap entry's protections.
14043 */
14044 } else {
14045 /*
14046 * Regular copy-on-write: propagate the submap
14047 * entry's protections to the top map entry.
14048 */
14049 entry->protection |= subentry_protection;
14050 }
14051 entry->max_protection |= subentry_max_protection;
14052 /* propagate some attributes from subentry */
14053 entry->vme_no_copy_on_read = subentry_no_copy_on_read;
14054 entry->vme_permanent = subentry_permanent;
14055 entry->pmap_cs_associated = subentry_pmap_cs_associated;
14056
14057 if ((entry->protection & VM_PROT_WRITE) &&
14058 (entry->protection & VM_PROT_EXECUTE) &&
14059 #if XNU_TARGET_OS_OSX
14060 map->pmap != kernel_pmap &&
14061 (vm_map_cs_enforcement(map)
14062 #if __arm64__
14063 || !VM_MAP_IS_EXOTIC(map)
14064 #endif /* __arm64__ */
14065 ) &&
14066 #endif /* XNU_TARGET_OS_OSX */
14067 !(entry->used_for_jit) &&
14068 VM_MAP_POLICY_WX_STRIP_X(map)) {
14069 DTRACE_VM3(cs_wx,
14070 uint64_t, (uint64_t)entry->vme_start,
14071 uint64_t, (uint64_t)entry->vme_end,
14072 vm_prot_t, entry->protection);
14073 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
14074 proc_selfpid(),
14075 (get_bsdtask_info(current_task())
14076 ? proc_name_address(get_bsdtask_info(current_task()))
14077 : "?"),
14078 __FUNCTION__);
14079 entry->protection &= ~VM_PROT_EXECUTE;
14080 }
14081
14082 if (object_copied) {
14083 VME_OFFSET_SET(entry, local_start - old_start + object_copied_offset);
14084 entry->needs_copy = object_copied_needs_copy;
14085 entry->is_shared = FALSE;
14086 } else {
14087 assert(VME_OBJECT(entry) != VM_OBJECT_NULL);
14088 assert(VME_OBJECT(entry)->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
14089 assert(entry->wired_count == 0);
14090 VME_OFFSET_SET(entry, copy_offset);
14091 entry->needs_copy = TRUE;
14092 if (map != old_map) {
14093 entry->is_shared = TRUE;
14094 }
14095 }
14096 if (entry->inheritance == VM_INHERIT_SHARE) {
14097 entry->inheritance = VM_INHERIT_COPY;
14098 }
14099
14100 vm_map_lock_write_to_read(map);
14101 } else {
14102 if ((cow_sub_map_parent)
14103 && (cow_sub_map_parent != *real_map)
14104 && (cow_sub_map_parent != map)) {
14105 vm_map_unlock(cow_sub_map_parent);
14106 }
14107 entry = submap_entry;
14108 vaddr = local_vaddr;
14109 }
14110 }
14111
14112 /*
14113 * Check whether this task is allowed to have
14114 * this page.
14115 */
14116
14117 prot = entry->protection;
14118
14119 if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
14120 /*
14121 * HACK -- if not a stack, then allow execution
14122 */
14123 prot |= VM_PROT_EXECUTE;
14124 }
14125
14126 if (mask_protections) {
14127 fault_type &= prot;
14128 if (fault_type == VM_PROT_NONE) {
14129 goto protection_failure;
14130 }
14131 }
14132 if (((fault_type & prot) != fault_type)
14133 #if __arm64__
14134 /* prefetch abort in execute-only page */
14135 && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
14136 #elif defined(__x86_64__)
14137 /* Consider the UEXEC bit when handling an EXECUTE fault */
14138 && !((fault_type & VM_PROT_EXECUTE) && !(prot & VM_PROT_EXECUTE) && (prot & VM_PROT_UEXEC))
14139 #endif
14140 ) {
14141 protection_failure:
14142 if (*real_map != map) {
14143 vm_map_unlock(*real_map);
14144 }
14145 *real_map = map;
14146
14147 if ((fault_type & VM_PROT_EXECUTE) && prot) {
14148 log_stack_execution_failure((addr64_t)vaddr, prot);
14149 }
14150
14151 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
14152 DTRACE_VM3(prot_fault_detailed, vm_prot_t, fault_type, vm_prot_t, prot, void *, vaddr);
14153 /*
14154 * Noisy (esp. internally) and can be inferred from CrashReports. So OFF for now.
14155 *
14156 * ktriage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_PROTECTION_FAILURE), 0);
14157 */
14158 return KERN_PROTECTION_FAILURE;
14159 }
14160
14161 /*
14162 * If this page is not pageable, we have to get
14163 * it for all possible accesses.
14164 */
14165
14166 *wired = (entry->wired_count != 0);
14167 if (*wired) {
14168 fault_type = prot;
14169 }
14170
14171 /*
14172 * If the entry was copy-on-write, we either ...
14173 */
14174
14175 if (entry->needs_copy) {
14176 /*
14177 * If we want to write the page, we may as well
14178 * handle that now since we've got the map locked.
14179 *
14180 * If we don't need to write the page, we just
14181 * demote the permissions allowed.
14182 */
14183
14184 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
14185 /*
14186 * Make a new object, and place it in the
14187 * object chain. Note that no new references
14188 * have appeared -- one just moved from the
14189 * map to the new object.
14190 */
14191
14192 if (vm_map_lock_read_to_write(map)) {
14193 vm_map_lock_read(map);
14194 goto RetryLookup;
14195 }
14196
14197 if (VME_OBJECT(entry)->shadowed == FALSE) {
14198 vm_object_lock(VME_OBJECT(entry));
14199 VME_OBJECT(entry)->shadowed = TRUE;
14200 vm_object_unlock(VME_OBJECT(entry));
14201 }
14202 VME_OBJECT_SHADOW(entry,
14203 (vm_map_size_t) (entry->vme_end -
14204 entry->vme_start),
14205 vm_map_always_shadow(map));
14206 entry->needs_copy = FALSE;
14207
14208 vm_map_lock_write_to_read(map);
14209 }
14210 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
14211 /*
14212 * We're attempting to read a copy-on-write
14213 * page -- don't allow writes.
14214 */
14215
14216 prot &= (~VM_PROT_WRITE);
14217 }
14218 }
14219
14220 if (submap_needed_copy && (prot & VM_PROT_WRITE)) {
14221 /*
14222 * We went through a "needs_copy" submap without triggering
14223 * a copy, so granting write access to the page would bypass
14224 * that submap's "needs_copy".
14225 */
14226 assert(!(fault_type & VM_PROT_WRITE));
14227 assert(!*wired);
14228 assert(!force_copy);
14229 // printf("FBDP %d[%s] submap_needed_copy for %p 0x%llx\n", proc_selfpid(), proc_name_address(current_task()->bsd_info), map, vaddr);
14230 prot &= ~VM_PROT_WRITE;
14231 }
14232
14233 /*
14234 * Create an object if necessary.
14235 */
14236 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
14237 if (vm_map_lock_read_to_write(map)) {
14238 vm_map_lock_read(map);
14239 goto RetryLookup;
14240 }
14241
14242 VME_OBJECT_SET(entry,
14243 vm_object_allocate(
14244 (vm_map_size_t)(entry->vme_end -
14245 entry->vme_start)), false, 0);
14246 VME_OFFSET_SET(entry, 0);
14247 assert(entry->use_pmap);
14248 vm_map_lock_write_to_read(map);
14249 }
14250
14251 /*
14252 * Return the object/offset from this entry. If the entry
14253 * was copy-on-write or empty, it has been fixed up. Also
14254 * return the protection.
14255 */
14256
14257 *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
14258 *object = VME_OBJECT(entry);
14259 *out_prot = prot;
14260 KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_MAP_LOOKUP_OBJECT), VM_KERNEL_UNSLIDE_OR_PERM(*object), (unsigned long) VME_ALIAS(entry), 0, 0);
14261
14262 if (fault_info) {
14263 fault_info->interruptible = THREAD_UNINT; /* for now... */
14264 /* ... the caller will change "interruptible" if needed */
14265 fault_info->cluster_size = 0;
14266 fault_info->user_tag = VME_ALIAS(entry);
14267 fault_info->pmap_options = 0;
14268 if (entry->iokit_acct ||
14269 (!entry->is_sub_map && !entry->use_pmap)) {
14270 fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
14271 }
14272 fault_info->behavior = entry->behavior;
14273 fault_info->lo_offset = VME_OFFSET(entry);
14274 fault_info->hi_offset =
14275 (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
14276 fault_info->no_cache = entry->no_cache;
14277 fault_info->stealth = FALSE;
14278 fault_info->io_sync = FALSE;
14279 if (entry->used_for_jit ||
14280 entry->vme_resilient_codesign) {
14281 fault_info->cs_bypass = TRUE;
14282 } else {
14283 fault_info->cs_bypass = FALSE;
14284 }
14285 fault_info->pmap_cs_associated = FALSE;
14286 #if CONFIG_PMAP_CS
14287 if (entry->pmap_cs_associated) {
14288 /*
14289 * The pmap layer will validate this page
14290 * before allowing it to be executed from.
14291 */
14292 fault_info->pmap_cs_associated = TRUE;
14293 }
14294 #endif /* CONFIG_PMAP_CS */
14295 fault_info->mark_zf_absent = FALSE;
14296 fault_info->batch_pmap_op = FALSE;
14297 fault_info->resilient_media = entry->vme_resilient_media;
14298 fault_info->no_copy_on_read = entry->vme_no_copy_on_read;
14299 if (entry->translated_allow_execute) {
14300 fault_info->pmap_options |= PMAP_OPTIONS_TRANSLATED_ALLOW_EXECUTE;
14301 }
14302 }
14303
14304 /*
14305 * Lock the object to prevent it from disappearing
14306 */
14307 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE) {
14308 if (contended == NULL) {
14309 vm_object_lock(*object);
14310 } else {
14311 *contended = vm_object_lock_check_contended(*object);
14312 }
14313 } else {
14314 vm_object_lock_shared(*object);
14315 }
14316
14317 /*
14318 * Save the version number
14319 */
14320
14321 out_version->main_timestamp = map->timestamp;
14322
14323 return KERN_SUCCESS;
14324 }
14325
14326
14327 /*
14328 * vm_map_verify:
14329 *
14330 * Verifies that the map in question has not changed
14331 * since the given version. The map has to be locked
14332 * ("shared" mode is fine) before calling this function
14333 * and it will be returned locked too.
14334 */
14335 boolean_t
vm_map_verify(vm_map_t map,vm_map_version_t * version)14336 vm_map_verify(
14337 vm_map_t map,
14338 vm_map_version_t *version) /* REF */
14339 {
14340 boolean_t result;
14341
14342 vm_map_lock_assert_held(map);
14343 result = (map->timestamp == version->main_timestamp);
14344
14345 return result;
14346 }
14347
14348 /*
14349 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
14350 * Goes away after regular vm_region_recurse function migrates to
14351 * 64 bits
14352 * vm_region_recurse: A form of vm_region which follows the
14353 * submaps in a target map
14354 *
14355 */
14356
14357 kern_return_t
vm_map_region_recurse_64(vm_map_t map,vm_map_offset_t * address,vm_map_size_t * size,natural_t * nesting_depth,vm_region_submap_info_64_t submap_info,mach_msg_type_number_t * count)14358 vm_map_region_recurse_64(
14359 vm_map_t map,
14360 vm_map_offset_t *address, /* IN/OUT */
14361 vm_map_size_t *size, /* OUT */
14362 natural_t *nesting_depth, /* IN/OUT */
14363 vm_region_submap_info_64_t submap_info, /* IN/OUT */
14364 mach_msg_type_number_t *count) /* IN/OUT */
14365 {
14366 mach_msg_type_number_t original_count;
14367 vm_region_extended_info_data_t extended;
14368 vm_map_entry_t tmp_entry;
14369 vm_map_offset_t user_address;
14370 unsigned int user_max_depth;
14371
14372 /*
14373 * "curr_entry" is the VM map entry preceding or including the
14374 * address we're looking for.
14375 * "curr_map" is the map or sub-map containing "curr_entry".
14376 * "curr_address" is the equivalent of the top map's "user_address"
14377 * in the current map.
14378 * "curr_offset" is the cumulated offset of "curr_map" in the
14379 * target task's address space.
14380 * "curr_depth" is the depth of "curr_map" in the chain of
14381 * sub-maps.
14382 *
14383 * "curr_max_below" and "curr_max_above" limit the range (around
14384 * "curr_address") we should take into account in the current (sub)map.
14385 * They limit the range to what's visible through the map entries
14386 * we've traversed from the top map to the current map.
14387 *
14388 */
14389 vm_map_entry_t curr_entry;
14390 vm_map_address_t curr_address;
14391 vm_map_offset_t curr_offset;
14392 vm_map_t curr_map;
14393 unsigned int curr_depth;
14394 vm_map_offset_t curr_max_below, curr_max_above;
14395 vm_map_offset_t curr_skip;
14396
14397 /*
14398 * "next_" is the same as "curr_" but for the VM region immediately
14399 * after the address we're looking for. We need to keep track of this
14400 * too because we want to return info about that region if the
14401 * address we're looking for is not mapped.
14402 */
14403 vm_map_entry_t next_entry;
14404 vm_map_offset_t next_offset;
14405 vm_map_offset_t next_address;
14406 vm_map_t next_map;
14407 unsigned int next_depth;
14408 vm_map_offset_t next_max_below, next_max_above;
14409 vm_map_offset_t next_skip;
14410
14411 boolean_t look_for_pages;
14412 vm_region_submap_short_info_64_t short_info;
14413 boolean_t do_region_footprint;
14414 int effective_page_size, effective_page_shift;
14415 boolean_t submap_needed_copy;
14416
14417 if (map == VM_MAP_NULL) {
14418 /* no address space to work on */
14419 return KERN_INVALID_ARGUMENT;
14420 }
14421
14422 effective_page_shift = vm_self_region_page_shift(map);
14423 effective_page_size = (1 << effective_page_shift);
14424
14425 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
14426 /*
14427 * "info" structure is not big enough and
14428 * would overflow
14429 */
14430 return KERN_INVALID_ARGUMENT;
14431 }
14432
14433 do_region_footprint = task_self_region_footprint();
14434 original_count = *count;
14435
14436 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
14437 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
14438 look_for_pages = FALSE;
14439 short_info = (vm_region_submap_short_info_64_t) submap_info;
14440 submap_info = NULL;
14441 } else {
14442 look_for_pages = TRUE;
14443 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
14444 short_info = NULL;
14445
14446 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
14447 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
14448 }
14449 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
14450 *count = VM_REGION_SUBMAP_INFO_V2_COUNT_64;
14451 }
14452 }
14453
14454 user_address = *address;
14455 user_max_depth = *nesting_depth;
14456 submap_needed_copy = FALSE;
14457
14458 if (not_in_kdp) {
14459 vm_map_lock_read(map);
14460 }
14461
14462 recurse_again:
14463 curr_entry = NULL;
14464 curr_map = map;
14465 curr_address = user_address;
14466 curr_offset = 0;
14467 curr_skip = 0;
14468 curr_depth = 0;
14469 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
14470 curr_max_below = curr_address;
14471
14472 next_entry = NULL;
14473 next_map = NULL;
14474 next_address = 0;
14475 next_offset = 0;
14476 next_skip = 0;
14477 next_depth = 0;
14478 next_max_above = (vm_map_offset_t) -1;
14479 next_max_below = (vm_map_offset_t) -1;
14480
14481 for (;;) {
14482 if (vm_map_lookup_entry(curr_map,
14483 curr_address,
14484 &tmp_entry)) {
14485 /* tmp_entry contains the address we're looking for */
14486 curr_entry = tmp_entry;
14487 } else {
14488 vm_map_offset_t skip;
14489 /*
14490 * The address is not mapped. "tmp_entry" is the
14491 * map entry preceding the address. We want the next
14492 * one, if it exists.
14493 */
14494 curr_entry = tmp_entry->vme_next;
14495
14496 if (curr_entry == vm_map_to_entry(curr_map) ||
14497 (curr_entry->vme_start >=
14498 curr_address + curr_max_above)) {
14499 /* no next entry at this level: stop looking */
14500 if (not_in_kdp) {
14501 vm_map_unlock_read(curr_map);
14502 }
14503 curr_entry = NULL;
14504 curr_map = NULL;
14505 curr_skip = 0;
14506 curr_offset = 0;
14507 curr_depth = 0;
14508 curr_max_above = 0;
14509 curr_max_below = 0;
14510 break;
14511 }
14512
14513 /* adjust current address and offset */
14514 skip = curr_entry->vme_start - curr_address;
14515 curr_address = curr_entry->vme_start;
14516 curr_skip += skip;
14517 curr_offset += skip;
14518 curr_max_above -= skip;
14519 curr_max_below = 0;
14520 }
14521
14522 /*
14523 * Is the next entry at this level closer to the address (or
14524 * deeper in the submap chain) than the one we had
14525 * so far ?
14526 */
14527 tmp_entry = curr_entry->vme_next;
14528 if (tmp_entry == vm_map_to_entry(curr_map)) {
14529 /* no next entry at this level */
14530 } else if (tmp_entry->vme_start >=
14531 curr_address + curr_max_above) {
14532 /*
14533 * tmp_entry is beyond the scope of what we mapped of
14534 * this submap in the upper level: ignore it.
14535 */
14536 } else if ((next_entry == NULL) ||
14537 (tmp_entry->vme_start + curr_offset <=
14538 next_entry->vme_start + next_offset)) {
14539 /*
14540 * We didn't have a "next_entry" or this one is
14541 * closer to the address we're looking for:
14542 * use this "tmp_entry" as the new "next_entry".
14543 */
14544 if (next_entry != NULL) {
14545 /* unlock the last "next_map" */
14546 if (next_map != curr_map && not_in_kdp) {
14547 vm_map_unlock_read(next_map);
14548 }
14549 }
14550 next_entry = tmp_entry;
14551 next_map = curr_map;
14552 next_depth = curr_depth;
14553 next_address = next_entry->vme_start;
14554 next_skip = curr_skip;
14555 next_skip += (next_address - curr_address);
14556 next_offset = curr_offset;
14557 next_offset += (next_address - curr_address);
14558 next_max_above = MIN(next_max_above, curr_max_above);
14559 next_max_above = MIN(next_max_above,
14560 next_entry->vme_end - next_address);
14561 next_max_below = MIN(next_max_below, curr_max_below);
14562 next_max_below = MIN(next_max_below,
14563 next_address - next_entry->vme_start);
14564 }
14565
14566 /*
14567 * "curr_max_{above,below}" allow us to keep track of the
14568 * portion of the submap that is actually mapped at this level:
14569 * the rest of that submap is irrelevant to us, since it's not
14570 * mapped here.
14571 * The relevant portion of the map starts at
14572 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
14573 */
14574 curr_max_above = MIN(curr_max_above,
14575 curr_entry->vme_end - curr_address);
14576 curr_max_below = MIN(curr_max_below,
14577 curr_address - curr_entry->vme_start);
14578
14579 if (!curr_entry->is_sub_map ||
14580 curr_depth >= user_max_depth) {
14581 /*
14582 * We hit a leaf map or we reached the maximum depth
14583 * we could, so stop looking. Keep the current map
14584 * locked.
14585 */
14586 break;
14587 }
14588
14589 /*
14590 * Get down to the next submap level.
14591 */
14592
14593 if (curr_entry->needs_copy) {
14594 /* everything below this is effectively copy-on-write */
14595 submap_needed_copy = TRUE;
14596 }
14597
14598 /*
14599 * Lock the next level and unlock the current level,
14600 * unless we need to keep it locked to access the "next_entry"
14601 * later.
14602 */
14603 if (not_in_kdp) {
14604 vm_map_lock_read(VME_SUBMAP(curr_entry));
14605 }
14606 if (curr_map == next_map) {
14607 /* keep "next_map" locked in case we need it */
14608 } else {
14609 /* release this map */
14610 if (not_in_kdp) {
14611 vm_map_unlock_read(curr_map);
14612 }
14613 }
14614
14615 /*
14616 * Adjust the offset. "curr_entry" maps the submap
14617 * at relative address "curr_entry->vme_start" in the
14618 * curr_map but skips the first "VME_OFFSET(curr_entry)"
14619 * bytes of the submap.
14620 * "curr_offset" always represents the offset of a virtual
14621 * address in the curr_map relative to the absolute address
14622 * space (i.e. the top-level VM map).
14623 */
14624 curr_offset +=
14625 (VME_OFFSET(curr_entry) - curr_entry->vme_start);
14626 curr_address = user_address + curr_offset;
14627 /* switch to the submap */
14628 curr_map = VME_SUBMAP(curr_entry);
14629 curr_depth++;
14630 curr_entry = NULL;
14631 }
14632
14633 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
14634 // so probably should be a real 32b ID vs. ptr.
14635 // Current users just check for equality
14636
14637 if (curr_entry == NULL) {
14638 /* no VM region contains the address... */
14639
14640 if (do_region_footprint && /* we want footprint numbers */
14641 next_entry == NULL && /* & there are no more regions */
14642 /* & we haven't already provided our fake region: */
14643 user_address <= vm_map_last_entry(map)->vme_end) {
14644 ledger_amount_t ledger_resident, ledger_compressed;
14645
14646 /*
14647 * Add a fake memory region to account for
14648 * purgeable and/or ledger-tagged memory that
14649 * counts towards this task's memory footprint,
14650 * i.e. the resident/compressed pages of non-volatile
14651 * objects owned by that task.
14652 */
14653 task_ledgers_footprint(map->pmap->ledger,
14654 &ledger_resident,
14655 &ledger_compressed);
14656 if (ledger_resident + ledger_compressed == 0) {
14657 /* no purgeable memory usage to report */
14658 return KERN_INVALID_ADDRESS;
14659 }
14660 /* fake region to show nonvolatile footprint */
14661 if (look_for_pages) {
14662 submap_info->protection = VM_PROT_DEFAULT;
14663 submap_info->max_protection = VM_PROT_DEFAULT;
14664 submap_info->inheritance = VM_INHERIT_DEFAULT;
14665 submap_info->offset = 0;
14666 submap_info->user_tag = -1;
14667 submap_info->pages_resident = (unsigned int) (ledger_resident / effective_page_size);
14668 submap_info->pages_shared_now_private = 0;
14669 submap_info->pages_swapped_out = (unsigned int) (ledger_compressed / effective_page_size);
14670 submap_info->pages_dirtied = submap_info->pages_resident;
14671 submap_info->ref_count = 1;
14672 submap_info->shadow_depth = 0;
14673 submap_info->external_pager = 0;
14674 submap_info->share_mode = SM_PRIVATE;
14675 if (submap_needed_copy) {
14676 submap_info->share_mode = SM_COW;
14677 }
14678 submap_info->is_submap = 0;
14679 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
14680 submap_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
14681 submap_info->user_wired_count = 0;
14682 submap_info->pages_reusable = 0;
14683 } else {
14684 short_info->user_tag = -1;
14685 short_info->offset = 0;
14686 short_info->protection = VM_PROT_DEFAULT;
14687 short_info->inheritance = VM_INHERIT_DEFAULT;
14688 short_info->max_protection = VM_PROT_DEFAULT;
14689 short_info->behavior = VM_BEHAVIOR_DEFAULT;
14690 short_info->user_wired_count = 0;
14691 short_info->is_submap = 0;
14692 short_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
14693 short_info->external_pager = 0;
14694 short_info->shadow_depth = 0;
14695 short_info->share_mode = SM_PRIVATE;
14696 if (submap_needed_copy) {
14697 short_info->share_mode = SM_COW;
14698 }
14699 short_info->ref_count = 1;
14700 }
14701 *nesting_depth = 0;
14702 *size = (vm_map_size_t) (ledger_resident + ledger_compressed);
14703 // *address = user_address;
14704 *address = vm_map_last_entry(map)->vme_end;
14705 return KERN_SUCCESS;
14706 }
14707
14708 if (next_entry == NULL) {
14709 /* ... and no VM region follows it either */
14710 return KERN_INVALID_ADDRESS;
14711 }
14712 /* ... gather info about the next VM region */
14713 curr_entry = next_entry;
14714 curr_map = next_map; /* still locked ... */
14715 curr_address = next_address;
14716 curr_skip = next_skip;
14717 curr_offset = next_offset;
14718 curr_depth = next_depth;
14719 curr_max_above = next_max_above;
14720 curr_max_below = next_max_below;
14721 } else {
14722 /* we won't need "next_entry" after all */
14723 if (next_entry != NULL) {
14724 /* release "next_map" */
14725 if (next_map != curr_map && not_in_kdp) {
14726 vm_map_unlock_read(next_map);
14727 }
14728 }
14729 }
14730 next_entry = NULL;
14731 next_map = NULL;
14732 next_offset = 0;
14733 next_skip = 0;
14734 next_depth = 0;
14735 next_max_below = -1;
14736 next_max_above = -1;
14737
14738 if (curr_entry->is_sub_map &&
14739 curr_depth < user_max_depth) {
14740 /*
14741 * We're not as deep as we could be: we must have
14742 * gone back up after not finding anything mapped
14743 * below the original top-level map entry's.
14744 * Let's move "curr_address" forward and recurse again.
14745 */
14746 user_address = curr_address;
14747 goto recurse_again;
14748 }
14749
14750 *nesting_depth = curr_depth;
14751 *size = curr_max_above + curr_max_below;
14752 *address = user_address + curr_skip - curr_max_below;
14753
14754 if (look_for_pages) {
14755 submap_info->user_tag = VME_ALIAS(curr_entry);
14756 submap_info->offset = VME_OFFSET(curr_entry);
14757 submap_info->protection = curr_entry->protection;
14758 submap_info->inheritance = curr_entry->inheritance;
14759 submap_info->max_protection = curr_entry->max_protection;
14760 submap_info->behavior = curr_entry->behavior;
14761 submap_info->user_wired_count = curr_entry->user_wired_count;
14762 submap_info->is_submap = curr_entry->is_sub_map;
14763 if (curr_entry->is_sub_map) {
14764 submap_info->object_id = VM_OBJECT_ID(VME_SUBMAP(curr_entry));
14765 } else {
14766 submap_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
14767 }
14768 } else {
14769 short_info->user_tag = VME_ALIAS(curr_entry);
14770 short_info->offset = VME_OFFSET(curr_entry);
14771 short_info->protection = curr_entry->protection;
14772 short_info->inheritance = curr_entry->inheritance;
14773 short_info->max_protection = curr_entry->max_protection;
14774 short_info->behavior = curr_entry->behavior;
14775 short_info->user_wired_count = curr_entry->user_wired_count;
14776 short_info->is_submap = curr_entry->is_sub_map;
14777 if (curr_entry->is_sub_map) {
14778 short_info->object_id = VM_OBJECT_ID(VME_SUBMAP(curr_entry));
14779 } else {
14780 short_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
14781 }
14782 }
14783
14784 extended.pages_resident = 0;
14785 extended.pages_swapped_out = 0;
14786 extended.pages_shared_now_private = 0;
14787 extended.pages_dirtied = 0;
14788 extended.pages_reusable = 0;
14789 extended.external_pager = 0;
14790 extended.shadow_depth = 0;
14791 extended.share_mode = SM_EMPTY;
14792 extended.ref_count = 0;
14793
14794 if (not_in_kdp) {
14795 if (!curr_entry->is_sub_map) {
14796 vm_map_offset_t range_start, range_end;
14797 range_start = MAX((curr_address - curr_max_below),
14798 curr_entry->vme_start);
14799 range_end = MIN((curr_address + curr_max_above),
14800 curr_entry->vme_end);
14801 vm_map_region_walk(curr_map,
14802 range_start,
14803 curr_entry,
14804 (VME_OFFSET(curr_entry) +
14805 (range_start -
14806 curr_entry->vme_start)),
14807 range_end - range_start,
14808 &extended,
14809 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
14810 if (extended.external_pager &&
14811 extended.ref_count == 2 &&
14812 extended.share_mode == SM_SHARED) {
14813 extended.share_mode = SM_PRIVATE;
14814 }
14815 if (submap_needed_copy) {
14816 extended.share_mode = SM_COW;
14817 }
14818 } else {
14819 if (curr_entry->use_pmap) {
14820 extended.share_mode = SM_TRUESHARED;
14821 } else {
14822 extended.share_mode = SM_PRIVATE;
14823 }
14824 extended.ref_count = os_ref_get_count_raw(&VME_SUBMAP(curr_entry)->map_refcnt);
14825 }
14826 }
14827
14828 if (look_for_pages) {
14829 submap_info->pages_resident = extended.pages_resident;
14830 submap_info->pages_swapped_out = extended.pages_swapped_out;
14831 submap_info->pages_shared_now_private =
14832 extended.pages_shared_now_private;
14833 submap_info->pages_dirtied = extended.pages_dirtied;
14834 submap_info->external_pager = extended.external_pager;
14835 submap_info->shadow_depth = extended.shadow_depth;
14836 submap_info->share_mode = extended.share_mode;
14837 submap_info->ref_count = extended.ref_count;
14838
14839 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
14840 submap_info->pages_reusable = extended.pages_reusable;
14841 }
14842 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
14843 if (curr_entry->is_sub_map) {
14844 submap_info->object_id_full = (vm_object_id_t)VM_KERNEL_ADDRPERM(VME_SUBMAP(curr_entry));
14845 } else if (VME_OBJECT(curr_entry)) {
14846 submap_info->object_id_full = (vm_object_id_t)VM_KERNEL_ADDRPERM(VME_OBJECT(curr_entry));
14847 } else {
14848 submap_info->object_id_full = 0ull;
14849 }
14850 }
14851 } else {
14852 short_info->external_pager = extended.external_pager;
14853 short_info->shadow_depth = extended.shadow_depth;
14854 short_info->share_mode = extended.share_mode;
14855 short_info->ref_count = extended.ref_count;
14856 }
14857
14858 if (not_in_kdp) {
14859 vm_map_unlock_read(curr_map);
14860 }
14861
14862 return KERN_SUCCESS;
14863 }
14864
14865 /*
14866 * vm_region:
14867 *
14868 * User call to obtain information about a region in
14869 * a task's address map. Currently, only one flavor is
14870 * supported.
14871 *
14872 * XXX The reserved and behavior fields cannot be filled
14873 * in until the vm merge from the IK is completed, and
14874 * vm_reserve is implemented.
14875 */
14876
14877 kern_return_t
vm_map_region(vm_map_t map,vm_map_offset_t * address,vm_map_size_t * size,vm_region_flavor_t flavor,vm_region_info_t info,mach_msg_type_number_t * count,mach_port_t * object_name)14878 vm_map_region(
14879 vm_map_t map,
14880 vm_map_offset_t *address, /* IN/OUT */
14881 vm_map_size_t *size, /* OUT */
14882 vm_region_flavor_t flavor, /* IN */
14883 vm_region_info_t info, /* OUT */
14884 mach_msg_type_number_t *count, /* IN/OUT */
14885 mach_port_t *object_name) /* OUT */
14886 {
14887 vm_map_entry_t tmp_entry;
14888 vm_map_entry_t entry;
14889 vm_map_offset_t start;
14890
14891 if (map == VM_MAP_NULL) {
14892 return KERN_INVALID_ARGUMENT;
14893 }
14894
14895 switch (flavor) {
14896 case VM_REGION_BASIC_INFO:
14897 /* legacy for old 32-bit objects info */
14898 {
14899 vm_region_basic_info_t basic;
14900
14901 if (*count < VM_REGION_BASIC_INFO_COUNT) {
14902 return KERN_INVALID_ARGUMENT;
14903 }
14904
14905 basic = (vm_region_basic_info_t) info;
14906 *count = VM_REGION_BASIC_INFO_COUNT;
14907
14908 vm_map_lock_read(map);
14909
14910 start = *address;
14911 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14912 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14913 vm_map_unlock_read(map);
14914 return KERN_INVALID_ADDRESS;
14915 }
14916 } else {
14917 entry = tmp_entry;
14918 }
14919
14920 start = entry->vme_start;
14921
14922 basic->offset = (uint32_t)VME_OFFSET(entry);
14923 basic->protection = entry->protection;
14924 basic->inheritance = entry->inheritance;
14925 basic->max_protection = entry->max_protection;
14926 basic->behavior = entry->behavior;
14927 basic->user_wired_count = entry->user_wired_count;
14928 basic->reserved = entry->is_sub_map;
14929 *address = start;
14930 *size = (entry->vme_end - start);
14931
14932 if (object_name) {
14933 *object_name = IP_NULL;
14934 }
14935 if (entry->is_sub_map) {
14936 basic->shared = FALSE;
14937 } else {
14938 basic->shared = entry->is_shared;
14939 }
14940
14941 vm_map_unlock_read(map);
14942 return KERN_SUCCESS;
14943 }
14944
14945 case VM_REGION_BASIC_INFO_64:
14946 {
14947 vm_region_basic_info_64_t basic;
14948
14949 if (*count < VM_REGION_BASIC_INFO_COUNT_64) {
14950 return KERN_INVALID_ARGUMENT;
14951 }
14952
14953 basic = (vm_region_basic_info_64_t) info;
14954 *count = VM_REGION_BASIC_INFO_COUNT_64;
14955
14956 vm_map_lock_read(map);
14957
14958 start = *address;
14959 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14960 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14961 vm_map_unlock_read(map);
14962 return KERN_INVALID_ADDRESS;
14963 }
14964 } else {
14965 entry = tmp_entry;
14966 }
14967
14968 start = entry->vme_start;
14969
14970 basic->offset = VME_OFFSET(entry);
14971 basic->protection = entry->protection;
14972 basic->inheritance = entry->inheritance;
14973 basic->max_protection = entry->max_protection;
14974 basic->behavior = entry->behavior;
14975 basic->user_wired_count = entry->user_wired_count;
14976 basic->reserved = entry->is_sub_map;
14977 *address = start;
14978 *size = (entry->vme_end - start);
14979
14980 if (object_name) {
14981 *object_name = IP_NULL;
14982 }
14983 if (entry->is_sub_map) {
14984 basic->shared = FALSE;
14985 } else {
14986 basic->shared = entry->is_shared;
14987 }
14988
14989 vm_map_unlock_read(map);
14990 return KERN_SUCCESS;
14991 }
14992 case VM_REGION_EXTENDED_INFO:
14993 if (*count < VM_REGION_EXTENDED_INFO_COUNT) {
14994 return KERN_INVALID_ARGUMENT;
14995 }
14996 OS_FALLTHROUGH;
14997 case VM_REGION_EXTENDED_INFO__legacy:
14998 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy) {
14999 return KERN_INVALID_ARGUMENT;
15000 }
15001
15002 {
15003 vm_region_extended_info_t extended;
15004 mach_msg_type_number_t original_count;
15005 int effective_page_size, effective_page_shift;
15006
15007 extended = (vm_region_extended_info_t) info;
15008
15009 effective_page_shift = vm_self_region_page_shift(map);
15010 effective_page_size = (1 << effective_page_shift);
15011
15012 vm_map_lock_read(map);
15013
15014 start = *address;
15015 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
15016 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
15017 vm_map_unlock_read(map);
15018 return KERN_INVALID_ADDRESS;
15019 }
15020 } else {
15021 entry = tmp_entry;
15022 }
15023 start = entry->vme_start;
15024
15025 extended->protection = entry->protection;
15026 extended->user_tag = VME_ALIAS(entry);
15027 extended->pages_resident = 0;
15028 extended->pages_swapped_out = 0;
15029 extended->pages_shared_now_private = 0;
15030 extended->pages_dirtied = 0;
15031 extended->external_pager = 0;
15032 extended->shadow_depth = 0;
15033
15034 original_count = *count;
15035 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
15036 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
15037 } else {
15038 extended->pages_reusable = 0;
15039 *count = VM_REGION_EXTENDED_INFO_COUNT;
15040 }
15041
15042 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
15043
15044 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) {
15045 extended->share_mode = SM_PRIVATE;
15046 }
15047
15048 if (object_name) {
15049 *object_name = IP_NULL;
15050 }
15051 *address = start;
15052 *size = (entry->vme_end - start);
15053
15054 vm_map_unlock_read(map);
15055 return KERN_SUCCESS;
15056 }
15057 case VM_REGION_TOP_INFO:
15058 {
15059 vm_region_top_info_t top;
15060
15061 if (*count < VM_REGION_TOP_INFO_COUNT) {
15062 return KERN_INVALID_ARGUMENT;
15063 }
15064
15065 top = (vm_region_top_info_t) info;
15066 *count = VM_REGION_TOP_INFO_COUNT;
15067
15068 vm_map_lock_read(map);
15069
15070 start = *address;
15071 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
15072 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
15073 vm_map_unlock_read(map);
15074 return KERN_INVALID_ADDRESS;
15075 }
15076 } else {
15077 entry = tmp_entry;
15078 }
15079 start = entry->vme_start;
15080
15081 top->private_pages_resident = 0;
15082 top->shared_pages_resident = 0;
15083
15084 vm_map_region_top_walk(entry, top);
15085
15086 if (object_name) {
15087 *object_name = IP_NULL;
15088 }
15089 *address = start;
15090 *size = (entry->vme_end - start);
15091
15092 vm_map_unlock_read(map);
15093 return KERN_SUCCESS;
15094 }
15095 default:
15096 return KERN_INVALID_ARGUMENT;
15097 }
15098 }
15099
15100 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
15101 MIN((entry_size), \
15102 ((obj)->all_reusable ? \
15103 (obj)->wired_page_count : \
15104 (obj)->resident_page_count - (obj)->reusable_page_count))
15105
15106 void
vm_map_region_top_walk(vm_map_entry_t entry,vm_region_top_info_t top)15107 vm_map_region_top_walk(
15108 vm_map_entry_t entry,
15109 vm_region_top_info_t top)
15110 {
15111 if (entry->is_sub_map || VME_OBJECT(entry) == 0) {
15112 top->share_mode = SM_EMPTY;
15113 top->ref_count = 0;
15114 top->obj_id = 0;
15115 return;
15116 }
15117
15118 {
15119 struct vm_object *obj, *tmp_obj;
15120 int ref_count;
15121 uint32_t entry_size;
15122
15123 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
15124
15125 obj = VME_OBJECT(entry);
15126
15127 vm_object_lock(obj);
15128
15129 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
15130 ref_count--;
15131 }
15132
15133 assert(obj->reusable_page_count <= obj->resident_page_count);
15134 if (obj->shadow) {
15135 if (ref_count == 1) {
15136 top->private_pages_resident =
15137 OBJ_RESIDENT_COUNT(obj, entry_size);
15138 } else {
15139 top->shared_pages_resident =
15140 OBJ_RESIDENT_COUNT(obj, entry_size);
15141 }
15142 top->ref_count = ref_count;
15143 top->share_mode = SM_COW;
15144
15145 while ((tmp_obj = obj->shadow)) {
15146 vm_object_lock(tmp_obj);
15147 vm_object_unlock(obj);
15148 obj = tmp_obj;
15149
15150 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
15151 ref_count--;
15152 }
15153
15154 assert(obj->reusable_page_count <= obj->resident_page_count);
15155 top->shared_pages_resident +=
15156 OBJ_RESIDENT_COUNT(obj, entry_size);
15157 top->ref_count += ref_count - 1;
15158 }
15159 } else {
15160 if (entry->superpage_size) {
15161 top->share_mode = SM_LARGE_PAGE;
15162 top->shared_pages_resident = 0;
15163 top->private_pages_resident = entry_size;
15164 } else if (entry->needs_copy) {
15165 top->share_mode = SM_COW;
15166 top->shared_pages_resident =
15167 OBJ_RESIDENT_COUNT(obj, entry_size);
15168 } else {
15169 if (ref_count == 1 ||
15170 (ref_count == 2 && obj->named)) {
15171 top->share_mode = SM_PRIVATE;
15172 top->private_pages_resident =
15173 OBJ_RESIDENT_COUNT(obj,
15174 entry_size);
15175 } else {
15176 top->share_mode = SM_SHARED;
15177 top->shared_pages_resident =
15178 OBJ_RESIDENT_COUNT(obj,
15179 entry_size);
15180 }
15181 }
15182 top->ref_count = ref_count;
15183 }
15184 /* XXX K64: obj_id will be truncated */
15185 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
15186
15187 vm_object_unlock(obj);
15188 }
15189 }
15190
15191 void
vm_map_region_walk(vm_map_t map,vm_map_offset_t va,vm_map_entry_t entry,vm_object_offset_t offset,vm_object_size_t range,vm_region_extended_info_t extended,boolean_t look_for_pages,mach_msg_type_number_t count)15192 vm_map_region_walk(
15193 vm_map_t map,
15194 vm_map_offset_t va,
15195 vm_map_entry_t entry,
15196 vm_object_offset_t offset,
15197 vm_object_size_t range,
15198 vm_region_extended_info_t extended,
15199 boolean_t look_for_pages,
15200 mach_msg_type_number_t count)
15201 {
15202 struct vm_object *obj, *tmp_obj;
15203 vm_map_offset_t last_offset;
15204 int i;
15205 int ref_count;
15206 struct vm_object *shadow_object;
15207 unsigned short shadow_depth;
15208 boolean_t do_region_footprint;
15209 int effective_page_size, effective_page_shift;
15210 vm_map_offset_t effective_page_mask;
15211
15212 do_region_footprint = task_self_region_footprint();
15213
15214 if ((entry->is_sub_map) ||
15215 (VME_OBJECT(entry) == 0) ||
15216 (VME_OBJECT(entry)->phys_contiguous &&
15217 !entry->superpage_size)) {
15218 extended->share_mode = SM_EMPTY;
15219 extended->ref_count = 0;
15220 return;
15221 }
15222
15223 if (entry->superpage_size) {
15224 extended->shadow_depth = 0;
15225 extended->share_mode = SM_LARGE_PAGE;
15226 extended->ref_count = 1;
15227 extended->external_pager = 0;
15228
15229 /* TODO4K: Superpage in 4k mode? */
15230 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
15231 extended->shadow_depth = 0;
15232 return;
15233 }
15234
15235 effective_page_shift = vm_self_region_page_shift(map);
15236 effective_page_size = (1 << effective_page_shift);
15237 effective_page_mask = effective_page_size - 1;
15238
15239 offset = vm_map_trunc_page(offset, effective_page_mask);
15240
15241 obj = VME_OBJECT(entry);
15242
15243 vm_object_lock(obj);
15244
15245 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
15246 ref_count--;
15247 }
15248
15249 if (look_for_pages) {
15250 for (last_offset = offset + range;
15251 offset < last_offset;
15252 offset += effective_page_size, va += effective_page_size) {
15253 if (do_region_footprint) {
15254 int disp;
15255
15256 disp = 0;
15257 if (map->has_corpse_footprint) {
15258 /*
15259 * Query the page info data we saved
15260 * while forking the corpse.
15261 */
15262 vm_map_corpse_footprint_query_page_info(
15263 map,
15264 va,
15265 &disp);
15266 } else {
15267 /*
15268 * Query the pmap.
15269 */
15270 vm_map_footprint_query_page_info(
15271 map,
15272 entry,
15273 va,
15274 &disp);
15275 }
15276 if (disp & VM_PAGE_QUERY_PAGE_PRESENT) {
15277 extended->pages_resident++;
15278 }
15279 if (disp & VM_PAGE_QUERY_PAGE_REUSABLE) {
15280 extended->pages_reusable++;
15281 }
15282 if (disp & VM_PAGE_QUERY_PAGE_DIRTY) {
15283 extended->pages_dirtied++;
15284 }
15285 if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
15286 extended->pages_swapped_out++;
15287 }
15288 continue;
15289 }
15290
15291 vm_map_region_look_for_page(map, va, obj,
15292 vm_object_trunc_page(offset), ref_count,
15293 0, extended, count);
15294 }
15295
15296 if (do_region_footprint) {
15297 goto collect_object_info;
15298 }
15299 } else {
15300 collect_object_info:
15301 shadow_object = obj->shadow;
15302 shadow_depth = 0;
15303
15304 if (!(obj->internal)) {
15305 extended->external_pager = 1;
15306 }
15307
15308 if (shadow_object != VM_OBJECT_NULL) {
15309 vm_object_lock(shadow_object);
15310 for (;
15311 shadow_object != VM_OBJECT_NULL;
15312 shadow_depth++) {
15313 vm_object_t next_shadow;
15314
15315 if (!(shadow_object->internal)) {
15316 extended->external_pager = 1;
15317 }
15318
15319 next_shadow = shadow_object->shadow;
15320 if (next_shadow) {
15321 vm_object_lock(next_shadow);
15322 }
15323 vm_object_unlock(shadow_object);
15324 shadow_object = next_shadow;
15325 }
15326 }
15327 extended->shadow_depth = shadow_depth;
15328 }
15329
15330 if (extended->shadow_depth || entry->needs_copy) {
15331 extended->share_mode = SM_COW;
15332 } else {
15333 if (ref_count == 1) {
15334 extended->share_mode = SM_PRIVATE;
15335 } else {
15336 if (obj->true_share) {
15337 extended->share_mode = SM_TRUESHARED;
15338 } else {
15339 extended->share_mode = SM_SHARED;
15340 }
15341 }
15342 }
15343 extended->ref_count = ref_count - extended->shadow_depth;
15344
15345 for (i = 0; i < extended->shadow_depth; i++) {
15346 if ((tmp_obj = obj->shadow) == 0) {
15347 break;
15348 }
15349 vm_object_lock(tmp_obj);
15350 vm_object_unlock(obj);
15351
15352 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) {
15353 ref_count--;
15354 }
15355
15356 extended->ref_count += ref_count;
15357 obj = tmp_obj;
15358 }
15359 vm_object_unlock(obj);
15360
15361 if (extended->share_mode == SM_SHARED) {
15362 vm_map_entry_t cur;
15363 vm_map_entry_t last;
15364 int my_refs;
15365
15366 obj = VME_OBJECT(entry);
15367 last = vm_map_to_entry(map);
15368 my_refs = 0;
15369
15370 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
15371 ref_count--;
15372 }
15373 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) {
15374 my_refs += vm_map_region_count_obj_refs(cur, obj);
15375 }
15376
15377 if (my_refs == ref_count) {
15378 extended->share_mode = SM_PRIVATE_ALIASED;
15379 } else if (my_refs > 1) {
15380 extended->share_mode = SM_SHARED_ALIASED;
15381 }
15382 }
15383 }
15384
15385
15386 /* object is locked on entry and locked on return */
15387
15388
15389 static void
vm_map_region_look_for_page(__unused vm_map_t map,__unused vm_map_offset_t va,vm_object_t object,vm_object_offset_t offset,int max_refcnt,unsigned short depth,vm_region_extended_info_t extended,mach_msg_type_number_t count)15390 vm_map_region_look_for_page(
15391 __unused vm_map_t map,
15392 __unused vm_map_offset_t va,
15393 vm_object_t object,
15394 vm_object_offset_t offset,
15395 int max_refcnt,
15396 unsigned short depth,
15397 vm_region_extended_info_t extended,
15398 mach_msg_type_number_t count)
15399 {
15400 vm_page_t p;
15401 vm_object_t shadow;
15402 int ref_count;
15403 vm_object_t caller_object;
15404
15405 shadow = object->shadow;
15406 caller_object = object;
15407
15408
15409 while (TRUE) {
15410 if (!(object->internal)) {
15411 extended->external_pager = 1;
15412 }
15413
15414 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
15415 if (shadow && (max_refcnt == 1)) {
15416 extended->pages_shared_now_private++;
15417 }
15418
15419 if (!p->vmp_fictitious &&
15420 (p->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p)))) {
15421 extended->pages_dirtied++;
15422 } else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
15423 if (p->vmp_reusable || object->all_reusable) {
15424 extended->pages_reusable++;
15425 }
15426 }
15427
15428 extended->pages_resident++;
15429
15430 if (object != caller_object) {
15431 vm_object_unlock(object);
15432 }
15433
15434 return;
15435 }
15436 if (object->internal &&
15437 object->alive &&
15438 !object->terminating &&
15439 object->pager_ready) {
15440 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
15441 == VM_EXTERNAL_STATE_EXISTS) {
15442 /* the pager has that page */
15443 extended->pages_swapped_out++;
15444 if (object != caller_object) {
15445 vm_object_unlock(object);
15446 }
15447 return;
15448 }
15449 }
15450
15451 if (shadow) {
15452 vm_object_lock(shadow);
15453
15454 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) {
15455 ref_count--;
15456 }
15457
15458 if (++depth > extended->shadow_depth) {
15459 extended->shadow_depth = depth;
15460 }
15461
15462 if (ref_count > max_refcnt) {
15463 max_refcnt = ref_count;
15464 }
15465
15466 if (object != caller_object) {
15467 vm_object_unlock(object);
15468 }
15469
15470 offset = offset + object->vo_shadow_offset;
15471 object = shadow;
15472 shadow = object->shadow;
15473 continue;
15474 }
15475 if (object != caller_object) {
15476 vm_object_unlock(object);
15477 }
15478 break;
15479 }
15480 }
15481
15482 static int
vm_map_region_count_obj_refs(vm_map_entry_t entry,vm_object_t object)15483 vm_map_region_count_obj_refs(
15484 vm_map_entry_t entry,
15485 vm_object_t object)
15486 {
15487 int ref_count;
15488 vm_object_t chk_obj;
15489 vm_object_t tmp_obj;
15490
15491 if (entry->is_sub_map || VME_OBJECT(entry) == VM_OBJECT_NULL) {
15492 return 0;
15493 }
15494
15495 ref_count = 0;
15496 chk_obj = VME_OBJECT(entry);
15497 vm_object_lock(chk_obj);
15498
15499 while (chk_obj) {
15500 if (chk_obj == object) {
15501 ref_count++;
15502 }
15503 tmp_obj = chk_obj->shadow;
15504 if (tmp_obj) {
15505 vm_object_lock(tmp_obj);
15506 }
15507 vm_object_unlock(chk_obj);
15508
15509 chk_obj = tmp_obj;
15510 }
15511
15512 return ref_count;
15513 }
15514
15515
15516 /*
15517 * Routine: vm_map_simplify
15518 *
15519 * Description:
15520 * Attempt to simplify the map representation in
15521 * the vicinity of the given starting address.
15522 * Note:
15523 * This routine is intended primarily to keep the
15524 * kernel maps more compact -- they generally don't
15525 * benefit from the "expand a map entry" technology
15526 * at allocation time because the adjacent entry
15527 * is often wired down.
15528 */
15529 void
vm_map_simplify_entry(vm_map_t map,vm_map_entry_t this_entry)15530 vm_map_simplify_entry(
15531 vm_map_t map,
15532 vm_map_entry_t this_entry)
15533 {
15534 vm_map_entry_t prev_entry;
15535
15536 prev_entry = this_entry->vme_prev;
15537
15538 if ((this_entry != vm_map_to_entry(map)) &&
15539 (prev_entry != vm_map_to_entry(map)) &&
15540
15541 (prev_entry->vme_end == this_entry->vme_start) &&
15542
15543 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
15544 (prev_entry->vme_object_value == this_entry->vme_object_value) &&
15545 (prev_entry->vme_kernel_object == this_entry->vme_kernel_object) &&
15546 ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
15547 prev_entry->vme_start))
15548 == VME_OFFSET(this_entry)) &&
15549
15550 (prev_entry->behavior == this_entry->behavior) &&
15551 (prev_entry->needs_copy == this_entry->needs_copy) &&
15552 (prev_entry->protection == this_entry->protection) &&
15553 (prev_entry->max_protection == this_entry->max_protection) &&
15554 (prev_entry->inheritance == this_entry->inheritance) &&
15555 (prev_entry->use_pmap == this_entry->use_pmap) &&
15556 (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
15557 (prev_entry->no_cache == this_entry->no_cache) &&
15558 (prev_entry->vme_permanent == this_entry->vme_permanent) &&
15559 (prev_entry->map_aligned == this_entry->map_aligned) &&
15560 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
15561 (prev_entry->used_for_jit == this_entry->used_for_jit) &&
15562 (prev_entry->pmap_cs_associated == this_entry->pmap_cs_associated) &&
15563 (prev_entry->iokit_acct == this_entry->iokit_acct) &&
15564 (prev_entry->vme_resilient_codesign ==
15565 this_entry->vme_resilient_codesign) &&
15566 (prev_entry->vme_resilient_media ==
15567 this_entry->vme_resilient_media) &&
15568 (prev_entry->vme_no_copy_on_read == this_entry->vme_no_copy_on_read) &&
15569
15570 (prev_entry->wired_count == this_entry->wired_count) &&
15571 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
15572
15573 ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
15574 (prev_entry->in_transition == FALSE) &&
15575 (this_entry->in_transition == FALSE) &&
15576 (prev_entry->needs_wakeup == FALSE) &&
15577 (this_entry->needs_wakeup == FALSE) &&
15578 (prev_entry->is_shared == this_entry->is_shared) &&
15579 (prev_entry->superpage_size == FALSE) &&
15580 (this_entry->superpage_size == FALSE)
15581 ) {
15582 if (prev_entry->vme_permanent) {
15583 assert(this_entry->vme_permanent);
15584 prev_entry->vme_permanent = false;
15585 }
15586 vm_map_store_entry_unlink(map, prev_entry, true);
15587 assert(prev_entry->vme_start < this_entry->vme_end);
15588 if (prev_entry->map_aligned) {
15589 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
15590 VM_MAP_PAGE_MASK(map)));
15591 }
15592 this_entry->vme_start = prev_entry->vme_start;
15593 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
15594
15595 if (map->holelistenabled) {
15596 vm_map_store_update_first_free(map, this_entry, TRUE);
15597 }
15598
15599 if (prev_entry->is_sub_map) {
15600 vm_map_deallocate(VME_SUBMAP(prev_entry));
15601 } else {
15602 vm_object_deallocate(VME_OBJECT(prev_entry));
15603 }
15604 vm_map_entry_dispose(prev_entry);
15605 SAVE_HINT_MAP_WRITE(map, this_entry);
15606 }
15607 }
15608
15609 void
vm_map_simplify(vm_map_t map,vm_map_offset_t start)15610 vm_map_simplify(
15611 vm_map_t map,
15612 vm_map_offset_t start)
15613 {
15614 vm_map_entry_t this_entry;
15615
15616 vm_map_lock(map);
15617 if (vm_map_lookup_entry(map, start, &this_entry)) {
15618 vm_map_simplify_entry(map, this_entry);
15619 vm_map_simplify_entry(map, this_entry->vme_next);
15620 }
15621 vm_map_unlock(map);
15622 }
15623
15624 static void
vm_map_simplify_range(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)15625 vm_map_simplify_range(
15626 vm_map_t map,
15627 vm_map_offset_t start,
15628 vm_map_offset_t end)
15629 {
15630 vm_map_entry_t entry;
15631
15632 /*
15633 * The map should be locked (for "write") by the caller.
15634 */
15635
15636 if (start >= end) {
15637 /* invalid address range */
15638 return;
15639 }
15640
15641 start = vm_map_trunc_page(start,
15642 VM_MAP_PAGE_MASK(map));
15643 end = vm_map_round_page(end,
15644 VM_MAP_PAGE_MASK(map));
15645
15646 if (!vm_map_lookup_entry(map, start, &entry)) {
15647 /* "start" is not mapped and "entry" ends before "start" */
15648 if (entry == vm_map_to_entry(map)) {
15649 /* start with first entry in the map */
15650 entry = vm_map_first_entry(map);
15651 } else {
15652 /* start with next entry */
15653 entry = entry->vme_next;
15654 }
15655 }
15656
15657 while (entry != vm_map_to_entry(map) &&
15658 entry->vme_start <= end) {
15659 /* try and coalesce "entry" with its previous entry */
15660 vm_map_simplify_entry(map, entry);
15661 entry = entry->vme_next;
15662 }
15663 }
15664
15665
15666 /*
15667 * Routine: vm_map_machine_attribute
15668 * Purpose:
15669 * Provide machine-specific attributes to mappings,
15670 * such as cachability etc. for machines that provide
15671 * them. NUMA architectures and machines with big/strange
15672 * caches will use this.
15673 * Note:
15674 * Responsibilities for locking and checking are handled here,
15675 * everything else in the pmap module. If any non-volatile
15676 * information must be kept, the pmap module should handle
15677 * it itself. [This assumes that attributes do not
15678 * need to be inherited, which seems ok to me]
15679 */
15680 kern_return_t
vm_map_machine_attribute(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_machine_attribute_t attribute,vm_machine_attribute_val_t * value)15681 vm_map_machine_attribute(
15682 vm_map_t map,
15683 vm_map_offset_t start,
15684 vm_map_offset_t end,
15685 vm_machine_attribute_t attribute,
15686 vm_machine_attribute_val_t* value) /* IN/OUT */
15687 {
15688 kern_return_t ret;
15689 vm_map_size_t sync_size;
15690 vm_map_entry_t entry;
15691
15692 if (start < vm_map_min(map) || end > vm_map_max(map)) {
15693 return KERN_INVALID_ADDRESS;
15694 }
15695
15696 /* Figure how much memory we need to flush (in page increments) */
15697 sync_size = end - start;
15698
15699 vm_map_lock(map);
15700
15701 if (attribute != MATTR_CACHE) {
15702 /* If we don't have to find physical addresses, we */
15703 /* don't have to do an explicit traversal here. */
15704 ret = pmap_attribute(map->pmap, start, end - start,
15705 attribute, value);
15706 vm_map_unlock(map);
15707 return ret;
15708 }
15709
15710 ret = KERN_SUCCESS; /* Assume it all worked */
15711
15712 while (sync_size) {
15713 if (vm_map_lookup_entry(map, start, &entry)) {
15714 vm_map_size_t sub_size;
15715 if ((entry->vme_end - start) > sync_size) {
15716 sub_size = sync_size;
15717 sync_size = 0;
15718 } else {
15719 sub_size = entry->vme_end - start;
15720 sync_size -= sub_size;
15721 }
15722 if (entry->is_sub_map) {
15723 vm_map_offset_t sub_start;
15724 vm_map_offset_t sub_end;
15725
15726 sub_start = (start - entry->vme_start)
15727 + VME_OFFSET(entry);
15728 sub_end = sub_start + sub_size;
15729 vm_map_machine_attribute(
15730 VME_SUBMAP(entry),
15731 sub_start,
15732 sub_end,
15733 attribute, value);
15734 } else if (VME_OBJECT(entry)) {
15735 vm_page_t m;
15736 vm_object_t object;
15737 vm_object_t base_object;
15738 vm_object_t last_object;
15739 vm_object_offset_t offset;
15740 vm_object_offset_t base_offset;
15741 vm_map_size_t range;
15742 range = sub_size;
15743 offset = (start - entry->vme_start)
15744 + VME_OFFSET(entry);
15745 offset = vm_object_trunc_page(offset);
15746 base_offset = offset;
15747 object = VME_OBJECT(entry);
15748 base_object = object;
15749 last_object = NULL;
15750
15751 vm_object_lock(object);
15752
15753 while (range) {
15754 m = vm_page_lookup(
15755 object, offset);
15756
15757 if (m && !m->vmp_fictitious) {
15758 ret =
15759 pmap_attribute_cache_sync(
15760 VM_PAGE_GET_PHYS_PAGE(m),
15761 PAGE_SIZE,
15762 attribute, value);
15763 } else if (object->shadow) {
15764 offset = offset + object->vo_shadow_offset;
15765 last_object = object;
15766 object = object->shadow;
15767 vm_object_lock(last_object->shadow);
15768 vm_object_unlock(last_object);
15769 continue;
15770 }
15771 if (range < PAGE_SIZE) {
15772 range = 0;
15773 } else {
15774 range -= PAGE_SIZE;
15775 }
15776
15777 if (base_object != object) {
15778 vm_object_unlock(object);
15779 vm_object_lock(base_object);
15780 object = base_object;
15781 }
15782 /* Bump to the next page */
15783 base_offset += PAGE_SIZE;
15784 offset = base_offset;
15785 }
15786 vm_object_unlock(object);
15787 }
15788 start += sub_size;
15789 } else {
15790 vm_map_unlock(map);
15791 return KERN_FAILURE;
15792 }
15793 }
15794
15795 vm_map_unlock(map);
15796
15797 return ret;
15798 }
15799
15800 /*
15801 * vm_map_behavior_set:
15802 *
15803 * Sets the paging reference behavior of the specified address
15804 * range in the target map. Paging reference behavior affects
15805 * how pagein operations resulting from faults on the map will be
15806 * clustered.
15807 */
15808 kern_return_t
vm_map_behavior_set(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_behavior_t new_behavior)15809 vm_map_behavior_set(
15810 vm_map_t map,
15811 vm_map_offset_t start,
15812 vm_map_offset_t end,
15813 vm_behavior_t new_behavior)
15814 {
15815 vm_map_entry_t entry;
15816 vm_map_entry_t temp_entry;
15817
15818 if (start > end ||
15819 start < vm_map_min(map) ||
15820 end > vm_map_max(map)) {
15821 return KERN_NO_SPACE;
15822 }
15823
15824 switch (new_behavior) {
15825 /*
15826 * This first block of behaviors all set a persistent state on the specified
15827 * memory range. All we have to do here is to record the desired behavior
15828 * in the vm_map_entry_t's.
15829 */
15830
15831 case VM_BEHAVIOR_DEFAULT:
15832 case VM_BEHAVIOR_RANDOM:
15833 case VM_BEHAVIOR_SEQUENTIAL:
15834 case VM_BEHAVIOR_RSEQNTL:
15835 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
15836 vm_map_lock(map);
15837
15838 /*
15839 * The entire address range must be valid for the map.
15840 * Note that vm_map_range_check() does a
15841 * vm_map_lookup_entry() internally and returns the
15842 * entry containing the start of the address range if
15843 * the entire range is valid.
15844 */
15845 if (vm_map_range_check(map, start, end, &temp_entry)) {
15846 entry = temp_entry;
15847 vm_map_clip_start(map, entry, start);
15848 } else {
15849 vm_map_unlock(map);
15850 return KERN_INVALID_ADDRESS;
15851 }
15852
15853 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
15854 vm_map_clip_end(map, entry, end);
15855 if (entry->is_sub_map) {
15856 assert(!entry->use_pmap);
15857 }
15858
15859 if (new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES) {
15860 entry->zero_wired_pages = TRUE;
15861 } else {
15862 entry->behavior = new_behavior;
15863 }
15864 entry = entry->vme_next;
15865 }
15866
15867 vm_map_unlock(map);
15868 break;
15869
15870 /*
15871 * The rest of these are different from the above in that they cause
15872 * an immediate action to take place as opposed to setting a behavior that
15873 * affects future actions.
15874 */
15875
15876 case VM_BEHAVIOR_WILLNEED:
15877 return vm_map_willneed(map, start, end);
15878
15879 case VM_BEHAVIOR_DONTNEED:
15880 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
15881
15882 case VM_BEHAVIOR_FREE:
15883 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
15884
15885 case VM_BEHAVIOR_REUSABLE:
15886 return vm_map_reusable_pages(map, start, end);
15887
15888 case VM_BEHAVIOR_REUSE:
15889 return vm_map_reuse_pages(map, start, end);
15890
15891 case VM_BEHAVIOR_CAN_REUSE:
15892 return vm_map_can_reuse(map, start, end);
15893
15894 #if MACH_ASSERT
15895 case VM_BEHAVIOR_PAGEOUT:
15896 return vm_map_pageout(map, start, end);
15897 #endif /* MACH_ASSERT */
15898
15899 default:
15900 return KERN_INVALID_ARGUMENT;
15901 }
15902
15903 return KERN_SUCCESS;
15904 }
15905
15906
15907 /*
15908 * Internals for madvise(MADV_WILLNEED) system call.
15909 *
15910 * The implementation is to do:-
15911 * a) read-ahead if the mapping corresponds to a mapped regular file
15912 * b) or, fault in the pages (zero-fill, decompress etc) if it's an anonymous mapping
15913 */
15914
15915
15916 static kern_return_t
vm_map_willneed(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)15917 vm_map_willneed(
15918 vm_map_t map,
15919 vm_map_offset_t start,
15920 vm_map_offset_t end
15921 )
15922 {
15923 vm_map_entry_t entry;
15924 vm_object_t object;
15925 memory_object_t pager;
15926 struct vm_object_fault_info fault_info = {};
15927 kern_return_t kr;
15928 vm_object_size_t len;
15929 vm_object_offset_t offset;
15930
15931 fault_info.interruptible = THREAD_UNINT; /* ignored value */
15932 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
15933 fault_info.stealth = TRUE;
15934
15935 /*
15936 * The MADV_WILLNEED operation doesn't require any changes to the
15937 * vm_map_entry_t's, so the read lock is sufficient.
15938 */
15939
15940 vm_map_lock_read(map);
15941
15942 /*
15943 * The madvise semantics require that the address range be fully
15944 * allocated with no holes. Otherwise, we're required to return
15945 * an error.
15946 */
15947
15948 if (!vm_map_range_check(map, start, end, &entry)) {
15949 vm_map_unlock_read(map);
15950 return KERN_INVALID_ADDRESS;
15951 }
15952
15953 /*
15954 * Examine each vm_map_entry_t in the range.
15955 */
15956 for (; entry != vm_map_to_entry(map) && start < end;) {
15957 /*
15958 * The first time through, the start address could be anywhere
15959 * within the vm_map_entry we found. So adjust the offset to
15960 * correspond. After that, the offset will always be zero to
15961 * correspond to the beginning of the current vm_map_entry.
15962 */
15963 offset = (start - entry->vme_start) + VME_OFFSET(entry);
15964
15965 /*
15966 * Set the length so we don't go beyond the end of the
15967 * map_entry or beyond the end of the range we were given.
15968 * This range could span also multiple map entries all of which
15969 * map different files, so make sure we only do the right amount
15970 * of I/O for each object. Note that it's possible for there
15971 * to be multiple map entries all referring to the same object
15972 * but with different page permissions, but it's not worth
15973 * trying to optimize that case.
15974 */
15975 len = MIN(entry->vme_end - start, end - start);
15976
15977 if ((vm_size_t) len != len) {
15978 /* 32-bit overflow */
15979 len = (vm_size_t) (0 - PAGE_SIZE);
15980 }
15981 fault_info.cluster_size = (vm_size_t) len;
15982 fault_info.lo_offset = offset;
15983 fault_info.hi_offset = offset + len;
15984 fault_info.user_tag = VME_ALIAS(entry);
15985 fault_info.pmap_options = 0;
15986 if (entry->iokit_acct ||
15987 (!entry->is_sub_map && !entry->use_pmap)) {
15988 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
15989 }
15990
15991 /*
15992 * If the entry is a submap OR there's no read permission
15993 * to this mapping, then just skip it.
15994 */
15995 if ((entry->is_sub_map) || (entry->protection & VM_PROT_READ) == 0) {
15996 entry = entry->vme_next;
15997 start = entry->vme_start;
15998 continue;
15999 }
16000
16001 object = VME_OBJECT(entry);
16002
16003 if (object == NULL ||
16004 (object && object->internal)) {
16005 /*
16006 * Memory range backed by anonymous memory.
16007 */
16008 vm_size_t region_size = 0, effective_page_size = 0;
16009 vm_map_offset_t addr = 0, effective_page_mask = 0;
16010
16011 region_size = len;
16012 addr = start;
16013
16014 effective_page_mask = MIN(vm_map_page_mask(current_map()), PAGE_MASK);
16015 effective_page_size = effective_page_mask + 1;
16016
16017 vm_map_unlock_read(map);
16018
16019 while (region_size) {
16020 vm_pre_fault(
16021 vm_map_trunc_page(addr, effective_page_mask),
16022 VM_PROT_READ | VM_PROT_WRITE);
16023
16024 region_size -= effective_page_size;
16025 addr += effective_page_size;
16026 }
16027 } else {
16028 /*
16029 * Find the file object backing this map entry. If there is
16030 * none, then we simply ignore the "will need" advice for this
16031 * entry and go on to the next one.
16032 */
16033 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
16034 entry = entry->vme_next;
16035 start = entry->vme_start;
16036 continue;
16037 }
16038
16039 vm_object_paging_begin(object);
16040 pager = object->pager;
16041 vm_object_unlock(object);
16042
16043 /*
16044 * The data_request() could take a long time, so let's
16045 * release the map lock to avoid blocking other threads.
16046 */
16047 vm_map_unlock_read(map);
16048
16049 /*
16050 * Get the data from the object asynchronously.
16051 *
16052 * Note that memory_object_data_request() places limits on the
16053 * amount of I/O it will do. Regardless of the len we
16054 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
16055 * silently truncates the len to that size. This isn't
16056 * necessarily bad since madvise shouldn't really be used to
16057 * page in unlimited amounts of data. Other Unix variants
16058 * limit the willneed case as well. If this turns out to be an
16059 * issue for developers, then we can always adjust the policy
16060 * here and still be backwards compatible since this is all
16061 * just "advice".
16062 */
16063 kr = memory_object_data_request(
16064 pager,
16065 vm_object_trunc_page(offset) + object->paging_offset,
16066 0, /* ignored */
16067 VM_PROT_READ,
16068 (memory_object_fault_info_t)&fault_info);
16069
16070 vm_object_lock(object);
16071 vm_object_paging_end(object);
16072 vm_object_unlock(object);
16073
16074 /*
16075 * If we couldn't do the I/O for some reason, just give up on
16076 * the madvise. We still return success to the user since
16077 * madvise isn't supposed to fail when the advice can't be
16078 * taken.
16079 */
16080
16081 if (kr != KERN_SUCCESS) {
16082 return KERN_SUCCESS;
16083 }
16084 }
16085
16086 start += len;
16087 if (start >= end) {
16088 /* done */
16089 return KERN_SUCCESS;
16090 }
16091
16092 /* look up next entry */
16093 vm_map_lock_read(map);
16094 if (!vm_map_lookup_entry(map, start, &entry)) {
16095 /*
16096 * There's a new hole in the address range.
16097 */
16098 vm_map_unlock_read(map);
16099 return KERN_INVALID_ADDRESS;
16100 }
16101 }
16102
16103 vm_map_unlock_read(map);
16104 return KERN_SUCCESS;
16105 }
16106
16107 static boolean_t
vm_map_entry_is_reusable(vm_map_entry_t entry)16108 vm_map_entry_is_reusable(
16109 vm_map_entry_t entry)
16110 {
16111 /* Only user map entries */
16112
16113 vm_object_t object;
16114
16115 if (entry->is_sub_map) {
16116 return FALSE;
16117 }
16118
16119 switch (VME_ALIAS(entry)) {
16120 case VM_MEMORY_MALLOC:
16121 case VM_MEMORY_MALLOC_SMALL:
16122 case VM_MEMORY_MALLOC_LARGE:
16123 case VM_MEMORY_REALLOC:
16124 case VM_MEMORY_MALLOC_TINY:
16125 case VM_MEMORY_MALLOC_LARGE_REUSABLE:
16126 case VM_MEMORY_MALLOC_LARGE_REUSED:
16127 /*
16128 * This is a malloc() memory region: check if it's still
16129 * in its original state and can be re-used for more
16130 * malloc() allocations.
16131 */
16132 break;
16133 default:
16134 /*
16135 * Not a malloc() memory region: let the caller decide if
16136 * it's re-usable.
16137 */
16138 return TRUE;
16139 }
16140
16141 if (/*entry->is_shared ||*/
16142 entry->is_sub_map ||
16143 entry->in_transition ||
16144 entry->protection != VM_PROT_DEFAULT ||
16145 entry->max_protection != VM_PROT_ALL ||
16146 entry->inheritance != VM_INHERIT_DEFAULT ||
16147 entry->no_cache ||
16148 entry->vme_permanent ||
16149 entry->superpage_size != FALSE ||
16150 entry->zero_wired_pages ||
16151 entry->wired_count != 0 ||
16152 entry->user_wired_count != 0) {
16153 return FALSE;
16154 }
16155
16156 object = VME_OBJECT(entry);
16157 if (object == VM_OBJECT_NULL) {
16158 return TRUE;
16159 }
16160 if (
16161 #if 0
16162 /*
16163 * Let's proceed even if the VM object is potentially
16164 * shared.
16165 * We check for this later when processing the actual
16166 * VM pages, so the contents will be safe if shared.
16167 *
16168 * But we can still mark this memory region as "reusable" to
16169 * acknowledge that the caller did let us know that the memory
16170 * could be re-used and should not be penalized for holding
16171 * on to it. This allows its "resident size" to not include
16172 * the reusable range.
16173 */
16174 object->ref_count == 1 &&
16175 #endif
16176 object->wired_page_count == 0 &&
16177 object->copy == VM_OBJECT_NULL &&
16178 object->shadow == VM_OBJECT_NULL &&
16179 object->internal &&
16180 object->purgable == VM_PURGABLE_DENY &&
16181 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
16182 !object->code_signed) {
16183 return TRUE;
16184 }
16185 return FALSE;
16186 }
16187
16188 static kern_return_t
vm_map_reuse_pages(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)16189 vm_map_reuse_pages(
16190 vm_map_t map,
16191 vm_map_offset_t start,
16192 vm_map_offset_t end)
16193 {
16194 vm_map_entry_t entry;
16195 vm_object_t object;
16196 vm_object_offset_t start_offset, end_offset;
16197
16198 /*
16199 * The MADV_REUSE operation doesn't require any changes to the
16200 * vm_map_entry_t's, so the read lock is sufficient.
16201 */
16202
16203 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
16204 /*
16205 * XXX TODO4K
16206 * need to figure out what reusable means for a
16207 * portion of a native page.
16208 */
16209 return KERN_SUCCESS;
16210 }
16211
16212 vm_map_lock_read(map);
16213 assert(map->pmap != kernel_pmap); /* protect alias access */
16214
16215 /*
16216 * The madvise semantics require that the address range be fully
16217 * allocated with no holes. Otherwise, we're required to return
16218 * an error.
16219 */
16220
16221 if (!vm_map_range_check(map, start, end, &entry)) {
16222 vm_map_unlock_read(map);
16223 vm_page_stats_reusable.reuse_pages_failure++;
16224 return KERN_INVALID_ADDRESS;
16225 }
16226
16227 /*
16228 * Examine each vm_map_entry_t in the range.
16229 */
16230 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16231 entry = entry->vme_next) {
16232 /*
16233 * Sanity check on the VM map entry.
16234 */
16235 if (!vm_map_entry_is_reusable(entry)) {
16236 vm_map_unlock_read(map);
16237 vm_page_stats_reusable.reuse_pages_failure++;
16238 return KERN_INVALID_ADDRESS;
16239 }
16240
16241 /*
16242 * The first time through, the start address could be anywhere
16243 * within the vm_map_entry we found. So adjust the offset to
16244 * correspond.
16245 */
16246 if (entry->vme_start < start) {
16247 start_offset = start - entry->vme_start;
16248 } else {
16249 start_offset = 0;
16250 }
16251 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
16252 start_offset += VME_OFFSET(entry);
16253 end_offset += VME_OFFSET(entry);
16254
16255 object = VME_OBJECT(entry);
16256 if (object != VM_OBJECT_NULL) {
16257 vm_object_lock(object);
16258 vm_object_reuse_pages(object, start_offset, end_offset,
16259 TRUE);
16260 vm_object_unlock(object);
16261 }
16262
16263 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
16264 /*
16265 * XXX
16266 * We do not hold the VM map exclusively here.
16267 * The "alias" field is not that critical, so it's
16268 * safe to update it here, as long as it is the only
16269 * one that can be modified while holding the VM map
16270 * "shared".
16271 */
16272 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
16273 }
16274 }
16275
16276 vm_map_unlock_read(map);
16277 vm_page_stats_reusable.reuse_pages_success++;
16278 return KERN_SUCCESS;
16279 }
16280
16281
16282 static kern_return_t
vm_map_reusable_pages(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)16283 vm_map_reusable_pages(
16284 vm_map_t map,
16285 vm_map_offset_t start,
16286 vm_map_offset_t end)
16287 {
16288 vm_map_entry_t entry;
16289 vm_object_t object;
16290 vm_object_offset_t start_offset, end_offset;
16291 vm_map_offset_t pmap_offset;
16292
16293 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
16294 /*
16295 * XXX TODO4K
16296 * need to figure out what reusable means for a portion
16297 * of a native page.
16298 */
16299 return KERN_SUCCESS;
16300 }
16301
16302 /*
16303 * The MADV_REUSABLE operation doesn't require any changes to the
16304 * vm_map_entry_t's, so the read lock is sufficient.
16305 */
16306
16307 vm_map_lock_read(map);
16308 assert(map->pmap != kernel_pmap); /* protect alias access */
16309
16310 /*
16311 * The madvise semantics require that the address range be fully
16312 * allocated with no holes. Otherwise, we're required to return
16313 * an error.
16314 */
16315
16316 if (!vm_map_range_check(map, start, end, &entry)) {
16317 vm_map_unlock_read(map);
16318 vm_page_stats_reusable.reusable_pages_failure++;
16319 return KERN_INVALID_ADDRESS;
16320 }
16321
16322 /*
16323 * Examine each vm_map_entry_t in the range.
16324 */
16325 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16326 entry = entry->vme_next) {
16327 int kill_pages = 0;
16328
16329 /*
16330 * Sanity check on the VM map entry.
16331 */
16332 if (!vm_map_entry_is_reusable(entry)) {
16333 vm_map_unlock_read(map);
16334 vm_page_stats_reusable.reusable_pages_failure++;
16335 return KERN_INVALID_ADDRESS;
16336 }
16337
16338 if (!(entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
16339 /* not writable: can't discard contents */
16340 vm_map_unlock_read(map);
16341 vm_page_stats_reusable.reusable_nonwritable++;
16342 vm_page_stats_reusable.reusable_pages_failure++;
16343 return KERN_PROTECTION_FAILURE;
16344 }
16345
16346 /*
16347 * The first time through, the start address could be anywhere
16348 * within the vm_map_entry we found. So adjust the offset to
16349 * correspond.
16350 */
16351 if (entry->vme_start < start) {
16352 start_offset = start - entry->vme_start;
16353 pmap_offset = start;
16354 } else {
16355 start_offset = 0;
16356 pmap_offset = entry->vme_start;
16357 }
16358 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
16359 start_offset += VME_OFFSET(entry);
16360 end_offset += VME_OFFSET(entry);
16361
16362 object = VME_OBJECT(entry);
16363 if (object == VM_OBJECT_NULL) {
16364 continue;
16365 }
16366
16367
16368 vm_object_lock(object);
16369 if (((object->ref_count == 1) ||
16370 (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
16371 object->copy == VM_OBJECT_NULL)) &&
16372 object->shadow == VM_OBJECT_NULL &&
16373 /*
16374 * "iokit_acct" entries are billed for their virtual size
16375 * (rather than for their resident pages only), so they
16376 * wouldn't benefit from making pages reusable, and it
16377 * would be hard to keep track of pages that are both
16378 * "iokit_acct" and "reusable" in the pmap stats and
16379 * ledgers.
16380 */
16381 !(entry->iokit_acct ||
16382 (!entry->is_sub_map && !entry->use_pmap))) {
16383 if (object->ref_count != 1) {
16384 vm_page_stats_reusable.reusable_shared++;
16385 }
16386 kill_pages = 1;
16387 } else {
16388 kill_pages = -1;
16389 }
16390 if (kill_pages != -1) {
16391 vm_object_deactivate_pages(object,
16392 start_offset,
16393 end_offset - start_offset,
16394 kill_pages,
16395 TRUE /*reusable_pages*/,
16396 map->pmap,
16397 pmap_offset);
16398 } else {
16399 vm_page_stats_reusable.reusable_pages_shared++;
16400 DTRACE_VM4(vm_map_reusable_pages_shared,
16401 unsigned int, VME_ALIAS(entry),
16402 vm_map_t, map,
16403 vm_map_entry_t, entry,
16404 vm_object_t, object);
16405 }
16406 vm_object_unlock(object);
16407
16408 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
16409 VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
16410 /*
16411 * XXX
16412 * We do not hold the VM map exclusively here.
16413 * The "alias" field is not that critical, so it's
16414 * safe to update it here, as long as it is the only
16415 * one that can be modified while holding the VM map
16416 * "shared".
16417 */
16418 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
16419 }
16420 }
16421
16422 vm_map_unlock_read(map);
16423 vm_page_stats_reusable.reusable_pages_success++;
16424 return KERN_SUCCESS;
16425 }
16426
16427
16428 static kern_return_t
vm_map_can_reuse(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)16429 vm_map_can_reuse(
16430 vm_map_t map,
16431 vm_map_offset_t start,
16432 vm_map_offset_t end)
16433 {
16434 vm_map_entry_t entry;
16435
16436 /*
16437 * The MADV_REUSABLE operation doesn't require any changes to the
16438 * vm_map_entry_t's, so the read lock is sufficient.
16439 */
16440
16441 vm_map_lock_read(map);
16442 assert(map->pmap != kernel_pmap); /* protect alias access */
16443
16444 /*
16445 * The madvise semantics require that the address range be fully
16446 * allocated with no holes. Otherwise, we're required to return
16447 * an error.
16448 */
16449
16450 if (!vm_map_range_check(map, start, end, &entry)) {
16451 vm_map_unlock_read(map);
16452 vm_page_stats_reusable.can_reuse_failure++;
16453 return KERN_INVALID_ADDRESS;
16454 }
16455
16456 /*
16457 * Examine each vm_map_entry_t in the range.
16458 */
16459 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16460 entry = entry->vme_next) {
16461 /*
16462 * Sanity check on the VM map entry.
16463 */
16464 if (!vm_map_entry_is_reusable(entry)) {
16465 vm_map_unlock_read(map);
16466 vm_page_stats_reusable.can_reuse_failure++;
16467 return KERN_INVALID_ADDRESS;
16468 }
16469 }
16470
16471 vm_map_unlock_read(map);
16472 vm_page_stats_reusable.can_reuse_success++;
16473 return KERN_SUCCESS;
16474 }
16475
16476
16477 #if MACH_ASSERT
16478 static kern_return_t
vm_map_pageout(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)16479 vm_map_pageout(
16480 vm_map_t map,
16481 vm_map_offset_t start,
16482 vm_map_offset_t end)
16483 {
16484 vm_map_entry_t entry;
16485
16486 /*
16487 * The MADV_PAGEOUT operation doesn't require any changes to the
16488 * vm_map_entry_t's, so the read lock is sufficient.
16489 */
16490
16491 vm_map_lock_read(map);
16492
16493 /*
16494 * The madvise semantics require that the address range be fully
16495 * allocated with no holes. Otherwise, we're required to return
16496 * an error.
16497 */
16498
16499 if (!vm_map_range_check(map, start, end, &entry)) {
16500 vm_map_unlock_read(map);
16501 return KERN_INVALID_ADDRESS;
16502 }
16503
16504 /*
16505 * Examine each vm_map_entry_t in the range.
16506 */
16507 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16508 entry = entry->vme_next) {
16509 vm_object_t object;
16510
16511 /*
16512 * Sanity check on the VM map entry.
16513 */
16514 if (entry->is_sub_map) {
16515 vm_map_t submap;
16516 vm_map_offset_t submap_start;
16517 vm_map_offset_t submap_end;
16518 vm_map_entry_t submap_entry;
16519
16520 submap = VME_SUBMAP(entry);
16521 submap_start = VME_OFFSET(entry);
16522 submap_end = submap_start + (entry->vme_end -
16523 entry->vme_start);
16524
16525 vm_map_lock_read(submap);
16526
16527 if (!vm_map_range_check(submap,
16528 submap_start,
16529 submap_end,
16530 &submap_entry)) {
16531 vm_map_unlock_read(submap);
16532 vm_map_unlock_read(map);
16533 return KERN_INVALID_ADDRESS;
16534 }
16535
16536 if (submap_entry->is_sub_map) {
16537 vm_map_unlock_read(submap);
16538 continue;
16539 }
16540
16541 object = VME_OBJECT(submap_entry);
16542 if (object == VM_OBJECT_NULL || !object->internal) {
16543 vm_map_unlock_read(submap);
16544 continue;
16545 }
16546
16547 vm_object_pageout(object);
16548
16549 vm_map_unlock_read(submap);
16550 submap = VM_MAP_NULL;
16551 submap_entry = VM_MAP_ENTRY_NULL;
16552 continue;
16553 }
16554
16555 object = VME_OBJECT(entry);
16556 if (object == VM_OBJECT_NULL || !object->internal) {
16557 continue;
16558 }
16559
16560 vm_object_pageout(object);
16561 }
16562
16563 vm_map_unlock_read(map);
16564 return KERN_SUCCESS;
16565 }
16566 #endif /* MACH_ASSERT */
16567
16568
16569 /*
16570 * Routine: vm_map_entry_insert
16571 *
16572 * Description: This routine inserts a new vm_entry in a locked map.
16573 */
16574 static vm_map_entry_t
vm_map_entry_insert(vm_map_t map,vm_map_entry_t insp_entry,vm_map_offset_t start,vm_map_offset_t end,vm_object_t object,vm_object_offset_t offset,vm_map_kernel_flags_t vmk_flags,boolean_t needs_copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance,boolean_t no_cache,boolean_t permanent,unsigned int superpage_size,boolean_t clear_map_aligned,int alias)16575 vm_map_entry_insert(
16576 vm_map_t map,
16577 vm_map_entry_t insp_entry,
16578 vm_map_offset_t start,
16579 vm_map_offset_t end,
16580 vm_object_t object,
16581 vm_object_offset_t offset,
16582 vm_map_kernel_flags_t vmk_flags,
16583 boolean_t needs_copy,
16584 vm_prot_t cur_protection,
16585 vm_prot_t max_protection,
16586 vm_inherit_t inheritance,
16587 boolean_t no_cache,
16588 boolean_t permanent,
16589 unsigned int superpage_size,
16590 boolean_t clear_map_aligned,
16591 int alias)
16592 {
16593 vm_map_entry_t new_entry;
16594 boolean_t map_aligned = FALSE;
16595
16596 assert(insp_entry != (vm_map_entry_t)0);
16597 vm_map_lock_assert_exclusive(map);
16598
16599 #if DEVELOPMENT || DEBUG
16600 vm_object_offset_t end_offset = 0;
16601 assertf(!os_add_overflow(end - start, offset, &end_offset), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end - start), offset);
16602 #endif /* DEVELOPMENT || DEBUG */
16603
16604 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
16605 map_aligned = TRUE;
16606 }
16607 if (clear_map_aligned &&
16608 (!VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
16609 !VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
16610 map_aligned = FALSE;
16611 }
16612 if (map_aligned) {
16613 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
16614 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
16615 } else {
16616 assert(page_aligned(start));
16617 assert(page_aligned(end));
16618 }
16619 assert(start < end);
16620
16621 new_entry = vm_map_entry_create(map);
16622
16623 new_entry->vme_start = start;
16624 new_entry->vme_end = end;
16625
16626 if (vmk_flags.vmkf_submap) {
16627 new_entry->vme_atomic = vmk_flags.vmkf_submap_atomic;
16628 VME_SUBMAP_SET(new_entry, (vm_map_t)object);
16629 } else {
16630 VME_OBJECT_SET(new_entry, object, false, 0);
16631 }
16632 VME_OFFSET_SET(new_entry, offset);
16633 VME_ALIAS_SET(new_entry, alias);
16634
16635 new_entry->map_aligned = map_aligned;
16636 new_entry->needs_copy = needs_copy;
16637 new_entry->inheritance = inheritance;
16638 new_entry->protection = cur_protection;
16639 new_entry->max_protection = max_protection;
16640 /*
16641 * submap: "use_pmap" means "nested".
16642 * default: false.
16643 *
16644 * object: "use_pmap" means "use pmap accounting" for footprint.
16645 * default: true.
16646 */
16647 new_entry->use_pmap = !vmk_flags.vmkf_submap;
16648 new_entry->no_cache = no_cache;
16649 new_entry->vme_permanent = permanent;
16650 new_entry->translated_allow_execute = vmk_flags.vmkf_translated_allow_execute;
16651 new_entry->vme_no_copy_on_read = vmk_flags.vmkf_no_copy_on_read;
16652 new_entry->superpage_size = (superpage_size != 0);
16653
16654 if (vmk_flags.vmkf_map_jit) {
16655 if (!(map->jit_entry_exists) ||
16656 VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map)) {
16657 new_entry->used_for_jit = TRUE;
16658 map->jit_entry_exists = TRUE;
16659 }
16660 }
16661
16662 /*
16663 * Insert the new entry into the list.
16664 */
16665
16666 vm_map_store_entry_link(map, insp_entry, new_entry, vmk_flags);
16667 map->size += end - start;
16668
16669 /*
16670 * Update the free space hint and the lookup hint.
16671 */
16672
16673 SAVE_HINT_MAP_WRITE(map, new_entry);
16674 return new_entry;
16675 }
16676
16677 /*
16678 * Routine: vm_map_remap_extract
16679 *
16680 * Description: This routine returns a vm_entry list from a map.
16681 */
16682 static kern_return_t
vm_map_remap_extract(vm_map_t map,vm_map_offset_t addr,vm_map_size_t size,boolean_t copy,struct vm_map_header * map_header,vm_prot_t * cur_protection,vm_prot_t * max_protection,vm_inherit_t inheritance,vm_map_kernel_flags_t vmk_flags)16683 vm_map_remap_extract(
16684 vm_map_t map,
16685 vm_map_offset_t addr,
16686 vm_map_size_t size,
16687 boolean_t copy,
16688 struct vm_map_header *map_header,
16689 vm_prot_t *cur_protection, /* IN/OUT */
16690 vm_prot_t *max_protection, /* IN/OUT */
16691 /* What, no behavior? */
16692 vm_inherit_t inheritance,
16693 vm_map_kernel_flags_t vmk_flags)
16694 {
16695 kern_return_t result;
16696 vm_map_size_t mapped_size;
16697 vm_map_size_t tmp_size;
16698 vm_map_entry_t src_entry; /* result of last map lookup */
16699 vm_map_entry_t new_entry;
16700 vm_object_offset_t offset;
16701 vm_map_offset_t map_address;
16702 vm_map_offset_t src_start; /* start of entry to map */
16703 vm_map_offset_t src_end; /* end of region to be mapped */
16704 vm_object_t object;
16705 vm_map_version_t version;
16706 boolean_t src_needs_copy;
16707 boolean_t new_entry_needs_copy;
16708 vm_map_entry_t saved_src_entry;
16709 boolean_t src_entry_was_wired;
16710 vm_prot_t max_prot_for_prot_copy;
16711 vm_map_offset_t effective_page_mask;
16712 boolean_t pageable, same_map;
16713 boolean_t vm_remap_legacy;
16714 vm_prot_t required_cur_prot, required_max_prot;
16715 vm_object_t new_copy_object; /* vm_object_copy_* result */
16716 boolean_t saved_used_for_jit; /* Saved used_for_jit. */
16717
16718 pageable = vmk_flags.vmkf_copy_pageable;
16719 same_map = vmk_flags.vmkf_copy_same_map;
16720
16721 effective_page_mask = MIN(PAGE_MASK, VM_MAP_PAGE_MASK(map));
16722
16723 assert(map != VM_MAP_NULL);
16724 assert(size != 0);
16725 assert(size == vm_map_round_page(size, effective_page_mask));
16726 assert(inheritance == VM_INHERIT_NONE ||
16727 inheritance == VM_INHERIT_COPY ||
16728 inheritance == VM_INHERIT_SHARE);
16729 assert(!(*cur_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)));
16730 assert(!(*max_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)));
16731 assert((*cur_protection & *max_protection) == *cur_protection);
16732
16733 /*
16734 * Compute start and end of region.
16735 */
16736 src_start = vm_map_trunc_page(addr, effective_page_mask);
16737 src_end = vm_map_round_page(src_start + size, effective_page_mask);
16738
16739 /*
16740 * Initialize map_header.
16741 */
16742 map_header->links.next = CAST_TO_VM_MAP_ENTRY(&map_header->links);
16743 map_header->links.prev = CAST_TO_VM_MAP_ENTRY(&map_header->links);
16744 map_header->nentries = 0;
16745 map_header->entries_pageable = pageable;
16746 // map_header->page_shift = MIN(VM_MAP_PAGE_SHIFT(map), PAGE_SHIFT);
16747 map_header->page_shift = (uint16_t)VM_MAP_PAGE_SHIFT(map);
16748 map_header->rb_head_store.rbh_root = (void *)(int)SKIP_RB_TREE;
16749
16750 vm_map_store_init( map_header );
16751
16752 if (copy && vmk_flags.vmkf_remap_prot_copy) {
16753 /*
16754 * Special case for vm_map_protect(VM_PROT_COPY):
16755 * we want to set the new mappings' max protection to the
16756 * specified *max_protection...
16757 */
16758 max_prot_for_prot_copy = *max_protection & (VM_PROT_ALL | VM_PROT_ALLEXEC);
16759 /* ... but we want to use the vm_remap() legacy mode */
16760 *max_protection = VM_PROT_NONE;
16761 *cur_protection = VM_PROT_NONE;
16762 } else {
16763 max_prot_for_prot_copy = VM_PROT_NONE;
16764 }
16765
16766 if (*cur_protection == VM_PROT_NONE &&
16767 *max_protection == VM_PROT_NONE) {
16768 /*
16769 * vm_remap() legacy mode:
16770 * Extract all memory regions in the specified range and
16771 * collect the strictest set of protections allowed on the
16772 * entire range, so the caller knows what they can do with
16773 * the remapped range.
16774 * We start with VM_PROT_ALL and we'll remove the protections
16775 * missing from each memory region.
16776 */
16777 vm_remap_legacy = TRUE;
16778 *cur_protection = VM_PROT_ALL;
16779 *max_protection = VM_PROT_ALL;
16780 required_cur_prot = VM_PROT_NONE;
16781 required_max_prot = VM_PROT_NONE;
16782 } else {
16783 /*
16784 * vm_remap_new() mode:
16785 * Extract all memory regions in the specified range and
16786 * ensure that they have at least the protections specified
16787 * by the caller via *cur_protection and *max_protection.
16788 * The resulting mapping should have these protections.
16789 */
16790 vm_remap_legacy = FALSE;
16791 if (copy) {
16792 required_cur_prot = VM_PROT_NONE;
16793 required_max_prot = VM_PROT_READ;
16794 } else {
16795 required_cur_prot = *cur_protection;
16796 required_max_prot = *max_protection;
16797 }
16798 }
16799
16800 map_address = 0;
16801 mapped_size = 0;
16802 result = KERN_SUCCESS;
16803
16804 /*
16805 * The specified source virtual space might correspond to
16806 * multiple map entries, need to loop on them.
16807 */
16808 vm_map_lock(map);
16809 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
16810 /*
16811 * This address space uses sub-pages so the range might
16812 * not be re-mappable in an address space with larger
16813 * pages. Re-assemble any broken-up VM map entries to
16814 * improve our chances of making it work.
16815 */
16816 vm_map_simplify_range(map, src_start, src_end);
16817 }
16818 while (mapped_size != size) {
16819 vm_map_size_t entry_size;
16820
16821 /*
16822 * Find the beginning of the region.
16823 */
16824 if (!vm_map_lookup_entry(map, src_start, &src_entry)) {
16825 result = KERN_INVALID_ADDRESS;
16826 break;
16827 }
16828
16829 if (src_start < src_entry->vme_start ||
16830 (mapped_size && src_start != src_entry->vme_start)) {
16831 result = KERN_INVALID_ADDRESS;
16832 break;
16833 }
16834
16835 tmp_size = size - mapped_size;
16836 if (src_end > src_entry->vme_end) {
16837 tmp_size -= (src_end - src_entry->vme_end);
16838 }
16839
16840 entry_size = (vm_map_size_t)(src_entry->vme_end -
16841 src_entry->vme_start);
16842
16843 if (src_entry->is_sub_map &&
16844 vmk_flags.vmkf_copy_single_object) {
16845 vm_map_t submap;
16846 vm_map_offset_t submap_start;
16847 vm_map_size_t submap_size;
16848 boolean_t submap_needs_copy;
16849
16850 /*
16851 * No check for "required protection" on "src_entry"
16852 * because the protections that matter are the ones
16853 * on the submap's VM map entry, which will be checked
16854 * during the call to vm_map_remap_extract() below.
16855 */
16856 submap_size = src_entry->vme_end - src_start;
16857 if (submap_size > size) {
16858 submap_size = size;
16859 }
16860 submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
16861 submap = VME_SUBMAP(src_entry);
16862 if (copy) {
16863 /*
16864 * The caller wants a copy-on-write re-mapping,
16865 * so let's extract from the submap accordingly.
16866 */
16867 submap_needs_copy = TRUE;
16868 } else if (src_entry->needs_copy) {
16869 /*
16870 * The caller wants a shared re-mapping but the
16871 * submap is mapped with "needs_copy", so its
16872 * contents can't be shared as is. Extract the
16873 * contents of the submap as "copy-on-write".
16874 * The re-mapping won't be shared with the
16875 * original mapping but this is equivalent to
16876 * what happened with the original "remap from
16877 * submap" code.
16878 * The shared region is mapped "needs_copy", for
16879 * example.
16880 */
16881 submap_needs_copy = TRUE;
16882 } else {
16883 /*
16884 * The caller wants a shared re-mapping and
16885 * this mapping can be shared (no "needs_copy"),
16886 * so let's extract from the submap accordingly.
16887 * Kernel submaps are mapped without
16888 * "needs_copy", for example.
16889 */
16890 submap_needs_copy = FALSE;
16891 }
16892 vm_map_reference(submap);
16893 vm_map_unlock(map);
16894 src_entry = NULL;
16895 if (vm_remap_legacy) {
16896 *cur_protection = VM_PROT_NONE;
16897 *max_protection = VM_PROT_NONE;
16898 }
16899
16900 DTRACE_VM7(remap_submap_recurse,
16901 vm_map_t, map,
16902 vm_map_offset_t, addr,
16903 vm_map_size_t, size,
16904 boolean_t, copy,
16905 vm_map_offset_t, submap_start,
16906 vm_map_size_t, submap_size,
16907 boolean_t, submap_needs_copy);
16908
16909 result = vm_map_remap_extract(submap,
16910 submap_start,
16911 submap_size,
16912 submap_needs_copy,
16913 map_header,
16914 cur_protection,
16915 max_protection,
16916 inheritance,
16917 vmk_flags);
16918 vm_map_deallocate(submap);
16919 return result;
16920 }
16921
16922 if (src_entry->is_sub_map) {
16923 /* protections for submap mapping are irrelevant here */
16924 } else if (((src_entry->protection & required_cur_prot) !=
16925 required_cur_prot) ||
16926 ((src_entry->max_protection & required_max_prot) !=
16927 required_max_prot)) {
16928 if (vmk_flags.vmkf_copy_single_object &&
16929 mapped_size != 0) {
16930 /*
16931 * Single object extraction.
16932 * We can't extract more with the required
16933 * protection but we've extracted some, so
16934 * stop there and declare success.
16935 * The caller should check the size of
16936 * the copy entry we've extracted.
16937 */
16938 result = KERN_SUCCESS;
16939 } else {
16940 /*
16941 * VM range extraction.
16942 * Required proctection is not available
16943 * for this part of the range: fail.
16944 */
16945 result = KERN_PROTECTION_FAILURE;
16946 }
16947 break;
16948 }
16949
16950 if (src_entry->is_sub_map) {
16951 vm_map_t submap;
16952 vm_map_offset_t submap_start;
16953 vm_map_size_t submap_size;
16954 vm_map_copy_t submap_copy;
16955 vm_prot_t submap_curprot, submap_maxprot;
16956 boolean_t submap_needs_copy;
16957
16958 /*
16959 * No check for "required protection" on "src_entry"
16960 * because the protections that matter are the ones
16961 * on the submap's VM map entry, which will be checked
16962 * during the call to vm_map_copy_extract() below.
16963 */
16964 object = VM_OBJECT_NULL;
16965 submap_copy = VM_MAP_COPY_NULL;
16966
16967 /* find equivalent range in the submap */
16968 submap = VME_SUBMAP(src_entry);
16969 submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
16970 submap_size = tmp_size;
16971 if (copy) {
16972 /*
16973 * The caller wants a copy-on-write re-mapping,
16974 * so let's extract from the submap accordingly.
16975 */
16976 submap_needs_copy = TRUE;
16977 } else if (src_entry->needs_copy) {
16978 /*
16979 * The caller wants a shared re-mapping but the
16980 * submap is mapped with "needs_copy", so its
16981 * contents can't be shared as is. Extract the
16982 * contents of the submap as "copy-on-write".
16983 * The re-mapping won't be shared with the
16984 * original mapping but this is equivalent to
16985 * what happened with the original "remap from
16986 * submap" code.
16987 * The shared region is mapped "needs_copy", for
16988 * example.
16989 */
16990 submap_needs_copy = TRUE;
16991 } else {
16992 /*
16993 * The caller wants a shared re-mapping and
16994 * this mapping can be shared (no "needs_copy"),
16995 * so let's extract from the submap accordingly.
16996 * Kernel submaps are mapped without
16997 * "needs_copy", for example.
16998 */
16999 submap_needs_copy = FALSE;
17000 }
17001 /* extra ref to keep submap alive */
17002 vm_map_reference(submap);
17003
17004 DTRACE_VM7(remap_submap_recurse,
17005 vm_map_t, map,
17006 vm_map_offset_t, addr,
17007 vm_map_size_t, size,
17008 boolean_t, copy,
17009 vm_map_offset_t, submap_start,
17010 vm_map_size_t, submap_size,
17011 boolean_t, submap_needs_copy);
17012
17013 /*
17014 * The map can be safely unlocked since we
17015 * already hold a reference on the submap.
17016 *
17017 * No timestamp since we don't care if the map
17018 * gets modified while we're down in the submap.
17019 * We'll resume the extraction at src_start + tmp_size
17020 * anyway.
17021 */
17022 vm_map_unlock(map);
17023 src_entry = NULL; /* not valid once map is unlocked */
17024
17025 if (vm_remap_legacy) {
17026 submap_curprot = VM_PROT_NONE;
17027 submap_maxprot = VM_PROT_NONE;
17028 if (max_prot_for_prot_copy) {
17029 submap_maxprot = max_prot_for_prot_copy;
17030 }
17031 } else {
17032 assert(!max_prot_for_prot_copy);
17033 submap_curprot = *cur_protection;
17034 submap_maxprot = *max_protection;
17035 }
17036 result = vm_map_copy_extract(submap,
17037 submap_start,
17038 submap_size,
17039 submap_needs_copy,
17040 &submap_copy,
17041 &submap_curprot,
17042 &submap_maxprot,
17043 inheritance,
17044 vmk_flags);
17045
17046 /* release extra ref on submap */
17047 vm_map_deallocate(submap);
17048 submap = VM_MAP_NULL;
17049
17050 if (result != KERN_SUCCESS) {
17051 vm_map_lock(map);
17052 break;
17053 }
17054
17055 /* transfer submap_copy entries to map_header */
17056 while (vm_map_copy_first_entry(submap_copy) !=
17057 vm_map_copy_to_entry(submap_copy)) {
17058 vm_map_entry_t copy_entry;
17059 vm_map_size_t copy_entry_size;
17060
17061 copy_entry = vm_map_copy_first_entry(submap_copy);
17062
17063 /*
17064 * Prevent kernel_object from being exposed to
17065 * user space.
17066 */
17067 if (__improbable(copy_entry->vme_kernel_object)) {
17068 printf("%d[%s]: rejecting attempt to extract from kernel_object\n",
17069 proc_selfpid(),
17070 (get_bsdtask_info(current_task())
17071 ? proc_name_address(get_bsdtask_info(current_task()))
17072 : "?"));
17073 DTRACE_VM(extract_kernel_only);
17074 result = KERN_INVALID_RIGHT;
17075 vm_map_copy_discard(submap_copy);
17076 submap_copy = VM_MAP_COPY_NULL;
17077 vm_map_lock(map);
17078 break;
17079 }
17080
17081 vm_map_copy_entry_unlink(submap_copy, copy_entry);
17082 copy_entry_size = copy_entry->vme_end - copy_entry->vme_start;
17083 copy_entry->vme_start = map_address;
17084 copy_entry->vme_end = map_address + copy_entry_size;
17085 map_address += copy_entry_size;
17086 mapped_size += copy_entry_size;
17087 src_start += copy_entry_size;
17088 assert(src_start <= src_end);
17089 _vm_map_store_entry_link(map_header,
17090 map_header->links.prev,
17091 copy_entry);
17092 }
17093 /* done with submap_copy */
17094 vm_map_copy_discard(submap_copy);
17095
17096 if (vm_remap_legacy) {
17097 *cur_protection &= submap_curprot;
17098 *max_protection &= submap_maxprot;
17099 }
17100
17101 /* re-acquire the map lock and continue to next entry */
17102 vm_map_lock(map);
17103 continue;
17104 } else {
17105 object = VME_OBJECT(src_entry);
17106
17107 /*
17108 * Prevent kernel_object from being exposed to
17109 * user space.
17110 */
17111 if (__improbable(object == kernel_object)) {
17112 printf("%d[%s]: rejecting attempt to extract from kernel_object\n",
17113 proc_selfpid(),
17114 (get_bsdtask_info(current_task())
17115 ? proc_name_address(get_bsdtask_info(current_task()))
17116 : "?"));
17117 DTRACE_VM(extract_kernel_only);
17118 result = KERN_INVALID_RIGHT;
17119 break;
17120 }
17121
17122 if (src_entry->iokit_acct) {
17123 /*
17124 * This entry uses "IOKit accounting".
17125 */
17126 } else if (object != VM_OBJECT_NULL &&
17127 (object->purgable != VM_PURGABLE_DENY ||
17128 object->vo_ledger_tag != VM_LEDGER_TAG_NONE)) {
17129 /*
17130 * Purgeable objects have their own accounting:
17131 * no pmap accounting for them.
17132 */
17133 assertf(!src_entry->use_pmap,
17134 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
17135 map,
17136 src_entry,
17137 (uint64_t)src_entry->vme_start,
17138 (uint64_t)src_entry->vme_end,
17139 src_entry->protection,
17140 src_entry->max_protection,
17141 VME_ALIAS(src_entry));
17142 } else {
17143 /*
17144 * Not IOKit or purgeable:
17145 * must be accounted by pmap stats.
17146 */
17147 assertf(src_entry->use_pmap,
17148 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
17149 map,
17150 src_entry,
17151 (uint64_t)src_entry->vme_start,
17152 (uint64_t)src_entry->vme_end,
17153 src_entry->protection,
17154 src_entry->max_protection,
17155 VME_ALIAS(src_entry));
17156 }
17157
17158 if (object == VM_OBJECT_NULL) {
17159 assert(!src_entry->needs_copy);
17160 if (src_entry->max_protection == VM_PROT_NONE) {
17161 assert(src_entry->protection == VM_PROT_NONE);
17162 /*
17163 * No VM object and no permissions:
17164 * this must be a reserved range with
17165 * nothing to share or copy.
17166 * There could also be all sorts of
17167 * pmap shenanigans within that reserved
17168 * range, so let's just copy the map
17169 * entry as is to remap a similar
17170 * reserved range.
17171 */
17172 offset = 0; /* no object => no offset */
17173 goto copy_src_entry;
17174 }
17175 object = vm_object_allocate(entry_size);
17176 VME_OFFSET_SET(src_entry, 0);
17177 VME_OBJECT_SET(src_entry, object, false, 0);
17178 assert(src_entry->use_pmap);
17179 assert(!map->mapped_in_other_pmaps);
17180 } else if (src_entry->wired_count ||
17181 object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
17182 /*
17183 * A wired memory region should not have
17184 * any pending copy-on-write and needs to
17185 * keep pointing at the VM object that
17186 * contains the wired pages.
17187 * If we're sharing this memory (copy=false),
17188 * we'll share this VM object.
17189 * If we're copying this memory (copy=true),
17190 * we'll call vm_object_copy_slowly() below
17191 * and use the new VM object for the remapping.
17192 *
17193 * Or, we are already using an asymmetric
17194 * copy, and therefore we already have
17195 * the right object.
17196 */
17197 assert(!src_entry->needs_copy);
17198 } else if (src_entry->needs_copy || object->shadowed ||
17199 (object->internal && !object->true_share &&
17200 !src_entry->is_shared &&
17201 object->vo_size > entry_size)) {
17202 VME_OBJECT_SHADOW(src_entry, entry_size,
17203 vm_map_always_shadow(map));
17204 assert(src_entry->use_pmap);
17205
17206 if (!src_entry->needs_copy &&
17207 (src_entry->protection & VM_PROT_WRITE)) {
17208 vm_prot_t prot;
17209
17210 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
17211
17212 prot = src_entry->protection & ~VM_PROT_WRITE;
17213
17214 if (override_nx(map,
17215 VME_ALIAS(src_entry))
17216 && prot) {
17217 prot |= VM_PROT_EXECUTE;
17218 }
17219
17220 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
17221
17222 if (map->mapped_in_other_pmaps) {
17223 vm_object_pmap_protect(
17224 VME_OBJECT(src_entry),
17225 VME_OFFSET(src_entry),
17226 entry_size,
17227 PMAP_NULL,
17228 PAGE_SIZE,
17229 src_entry->vme_start,
17230 prot);
17231 #if MACH_ASSERT
17232 } else if (__improbable(map->pmap == PMAP_NULL)) {
17233 extern boolean_t vm_tests_in_progress;
17234 assert(vm_tests_in_progress);
17235 /*
17236 * Some VM tests (in vm_tests.c)
17237 * sometimes want to use a VM
17238 * map without a pmap.
17239 * Otherwise, this should never
17240 * happen.
17241 */
17242 #endif /* MACH_ASSERT */
17243 } else {
17244 pmap_protect(vm_map_pmap(map),
17245 src_entry->vme_start,
17246 src_entry->vme_end,
17247 prot);
17248 }
17249 }
17250
17251 object = VME_OBJECT(src_entry);
17252 src_entry->needs_copy = FALSE;
17253 }
17254
17255
17256 vm_object_lock(object);
17257 vm_object_reference_locked(object); /* object ref. for new entry */
17258 assert(!src_entry->needs_copy);
17259 if (object->copy_strategy ==
17260 MEMORY_OBJECT_COPY_SYMMETRIC) {
17261 /*
17262 * If we want to share this object (copy==0),
17263 * it needs to be COPY_DELAY.
17264 * If we want to copy this object (copy==1),
17265 * we can't just set "needs_copy" on our side
17266 * and expect the other side to do the same
17267 * (symmetrically), so we can't let the object
17268 * stay COPY_SYMMETRIC.
17269 * So we always switch from COPY_SYMMETRIC to
17270 * COPY_DELAY.
17271 */
17272 object->copy_strategy =
17273 MEMORY_OBJECT_COPY_DELAY;
17274 object->true_share = TRUE;
17275 }
17276 vm_object_unlock(object);
17277 }
17278
17279 offset = (VME_OFFSET(src_entry) +
17280 (src_start - src_entry->vme_start));
17281
17282 copy_src_entry:
17283 new_entry = _vm_map_entry_create(map_header);
17284 vm_map_entry_copy(map, new_entry, src_entry);
17285 if (new_entry->is_sub_map) {
17286 /* clr address space specifics */
17287 new_entry->use_pmap = FALSE;
17288 } else if (copy) {
17289 /*
17290 * We're dealing with a copy-on-write operation,
17291 * so the resulting mapping should not inherit the
17292 * original mapping's accounting settings.
17293 * "use_pmap" should be reset to its default (TRUE)
17294 * so that the new mapping gets accounted for in
17295 * the task's memory footprint.
17296 */
17297 new_entry->use_pmap = TRUE;
17298 }
17299 /* "iokit_acct" was cleared in vm_map_entry_copy() */
17300 assert(!new_entry->iokit_acct);
17301
17302 new_entry->map_aligned = FALSE;
17303
17304 new_entry->vme_start = map_address;
17305 new_entry->vme_end = map_address + tmp_size;
17306 assert(new_entry->vme_start < new_entry->vme_end);
17307 if (copy && vmk_flags.vmkf_remap_prot_copy) {
17308 /* security: keep "permanent" and "pmap_cs_associated" */
17309 new_entry->vme_permanent = src_entry->vme_permanent;
17310 new_entry->pmap_cs_associated = src_entry->pmap_cs_associated;
17311 /*
17312 * Remapping for vm_map_protect(VM_PROT_COPY)
17313 * to convert a read-only mapping into a
17314 * copy-on-write version of itself but
17315 * with write access:
17316 * keep the original inheritance but let's not
17317 * add VM_PROT_WRITE to the max protection yet
17318 * since we want to do more security checks against
17319 * the target map.
17320 */
17321 new_entry->inheritance = src_entry->inheritance;
17322 new_entry->protection &= max_prot_for_prot_copy;
17323 } else {
17324 new_entry->inheritance = inheritance;
17325 if (!vm_remap_legacy) {
17326 new_entry->protection = *cur_protection;
17327 new_entry->max_protection = *max_protection;
17328 }
17329 }
17330 VME_OFFSET_SET(new_entry, offset);
17331
17332 /*
17333 * The new region has to be copied now if required.
17334 */
17335 RestartCopy:
17336 if (!copy) {
17337 if (src_entry->used_for_jit == TRUE) {
17338 if (same_map) {
17339 } else if (!VM_MAP_POLICY_ALLOW_JIT_SHARING(map)) {
17340 /*
17341 * Cannot allow an entry describing a JIT
17342 * region to be shared across address spaces.
17343 */
17344 result = KERN_INVALID_ARGUMENT;
17345 vm_object_deallocate(object);
17346 vm_map_entry_dispose(new_entry);
17347 new_entry = VM_MAP_ENTRY_NULL;
17348 break;
17349 }
17350 }
17351
17352 src_entry->is_shared = TRUE;
17353 new_entry->is_shared = TRUE;
17354 if (!(new_entry->is_sub_map)) {
17355 new_entry->needs_copy = FALSE;
17356 }
17357 } else if (src_entry->is_sub_map) {
17358 /* make this a COW sub_map if not already */
17359 assert(new_entry->wired_count == 0);
17360 new_entry->needs_copy = TRUE;
17361 object = VM_OBJECT_NULL;
17362 } else if (src_entry->wired_count == 0 &&
17363 !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) &&
17364 vm_object_copy_quickly(VME_OBJECT(new_entry),
17365 VME_OFFSET(new_entry),
17366 (new_entry->vme_end -
17367 new_entry->vme_start),
17368 &src_needs_copy,
17369 &new_entry_needs_copy)) {
17370 new_entry->needs_copy = new_entry_needs_copy;
17371 new_entry->is_shared = FALSE;
17372 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
17373
17374 /*
17375 * Handle copy_on_write semantics.
17376 */
17377 if (src_needs_copy && !src_entry->needs_copy) {
17378 vm_prot_t prot;
17379
17380 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
17381
17382 prot = src_entry->protection & ~VM_PROT_WRITE;
17383
17384 if (override_nx(map,
17385 VME_ALIAS(src_entry))
17386 && prot) {
17387 prot |= VM_PROT_EXECUTE;
17388 }
17389
17390 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
17391
17392 vm_object_pmap_protect(object,
17393 offset,
17394 entry_size,
17395 ((src_entry->is_shared
17396 || map->mapped_in_other_pmaps) ?
17397 PMAP_NULL : map->pmap),
17398 VM_MAP_PAGE_SIZE(map),
17399 src_entry->vme_start,
17400 prot);
17401
17402 assert(src_entry->wired_count == 0);
17403 src_entry->needs_copy = TRUE;
17404 }
17405 /*
17406 * Throw away the old object reference of the new entry.
17407 */
17408 vm_object_deallocate(object);
17409 } else {
17410 new_entry->is_shared = FALSE;
17411 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
17412
17413 src_entry_was_wired = (src_entry->wired_count > 0);
17414 saved_src_entry = src_entry;
17415 src_entry = VM_MAP_ENTRY_NULL;
17416
17417 /*
17418 * The map can be safely unlocked since we
17419 * already hold a reference on the object.
17420 *
17421 * Record the timestamp of the map for later
17422 * verification, and unlock the map.
17423 */
17424 version.main_timestamp = map->timestamp;
17425 vm_map_unlock(map); /* Increments timestamp once! */
17426
17427 /*
17428 * Perform the copy.
17429 */
17430 if (src_entry_was_wired > 0 ||
17431 (debug4k_no_cow_copyin &&
17432 VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT)) {
17433 vm_object_lock(object);
17434 result = vm_object_copy_slowly(
17435 object,
17436 offset,
17437 (new_entry->vme_end -
17438 new_entry->vme_start),
17439 THREAD_UNINT,
17440 &new_copy_object);
17441 /* VME_OBJECT_SET will reset used_for_jit, so preserve it. */
17442 saved_used_for_jit = new_entry->used_for_jit;
17443 VME_OBJECT_SET(new_entry, new_copy_object, false, 0);
17444 new_entry->used_for_jit = saved_used_for_jit;
17445 VME_OFFSET_SET(new_entry, offset - vm_object_trunc_page(offset));
17446 new_entry->needs_copy = FALSE;
17447 } else {
17448 vm_object_offset_t new_offset;
17449
17450 new_offset = VME_OFFSET(new_entry);
17451 result = vm_object_copy_strategically(
17452 object,
17453 offset,
17454 (new_entry->vme_end -
17455 new_entry->vme_start),
17456 &new_copy_object,
17457 &new_offset,
17458 &new_entry_needs_copy);
17459 /* VME_OBJECT_SET will reset used_for_jit, so preserve it. */
17460 saved_used_for_jit = new_entry->used_for_jit;
17461 VME_OBJECT_SET(new_entry, new_copy_object, false, 0);
17462 new_entry->used_for_jit = saved_used_for_jit;
17463 if (new_offset != VME_OFFSET(new_entry)) {
17464 VME_OFFSET_SET(new_entry, new_offset);
17465 }
17466
17467 new_entry->needs_copy = new_entry_needs_copy;
17468 }
17469
17470 /*
17471 * Throw away the old object reference of the new entry.
17472 */
17473 vm_object_deallocate(object);
17474
17475 if (result != KERN_SUCCESS &&
17476 result != KERN_MEMORY_RESTART_COPY) {
17477 vm_map_entry_dispose(new_entry);
17478 vm_map_lock(map);
17479 break;
17480 }
17481
17482 /*
17483 * Verify that the map has not substantially
17484 * changed while the copy was being made.
17485 */
17486
17487 vm_map_lock(map);
17488 if (version.main_timestamp + 1 != map->timestamp) {
17489 /*
17490 * Simple version comparison failed.
17491 *
17492 * Retry the lookup and verify that the
17493 * same object/offset are still present.
17494 */
17495 saved_src_entry = VM_MAP_ENTRY_NULL;
17496 vm_object_deallocate(VME_OBJECT(new_entry));
17497 vm_map_entry_dispose(new_entry);
17498 if (result == KERN_MEMORY_RESTART_COPY) {
17499 result = KERN_SUCCESS;
17500 }
17501 continue;
17502 }
17503 /* map hasn't changed: src_entry is still valid */
17504 src_entry = saved_src_entry;
17505 saved_src_entry = VM_MAP_ENTRY_NULL;
17506
17507 if (result == KERN_MEMORY_RESTART_COPY) {
17508 vm_object_reference(object);
17509 goto RestartCopy;
17510 }
17511 }
17512
17513 _vm_map_store_entry_link(map_header,
17514 map_header->links.prev, new_entry);
17515
17516 /* protections for submap mapping are irrelevant here */
17517 if (vm_remap_legacy && !src_entry->is_sub_map) {
17518 *cur_protection &= src_entry->protection;
17519 *max_protection &= src_entry->max_protection;
17520 }
17521
17522 map_address += tmp_size;
17523 mapped_size += tmp_size;
17524 src_start += tmp_size;
17525
17526 if (vmk_flags.vmkf_copy_single_object) {
17527 if (mapped_size != size) {
17528 DEBUG4K_SHARE("map %p addr 0x%llx size 0x%llx clipped copy at mapped_size 0x%llx\n",
17529 map, (uint64_t)addr, (uint64_t)size, (uint64_t)mapped_size);
17530 if (src_entry->vme_next != vm_map_to_entry(map) &&
17531 src_entry->vme_next->vme_object_value ==
17532 src_entry->vme_object_value) {
17533 /* XXX TODO4K */
17534 DEBUG4K_ERROR("could have extended copy to next entry...\n");
17535 }
17536 }
17537 break;
17538 }
17539 } /* end while */
17540
17541 vm_map_unlock(map);
17542 if (result != KERN_SUCCESS) {
17543 /*
17544 * Free all allocated elements.
17545 */
17546 for (src_entry = map_header->links.next;
17547 src_entry != CAST_TO_VM_MAP_ENTRY(&map_header->links);
17548 src_entry = new_entry) {
17549 new_entry = src_entry->vme_next;
17550 _vm_map_store_entry_unlink(map_header, src_entry, false);
17551 if (src_entry->is_sub_map) {
17552 vm_map_deallocate(VME_SUBMAP(src_entry));
17553 } else {
17554 vm_object_deallocate(VME_OBJECT(src_entry));
17555 }
17556 vm_map_entry_dispose(src_entry);
17557 }
17558 }
17559 return result;
17560 }
17561
17562 bool
vm_map_is_exotic(vm_map_t map)17563 vm_map_is_exotic(
17564 vm_map_t map)
17565 {
17566 return VM_MAP_IS_EXOTIC(map);
17567 }
17568
17569 bool
vm_map_is_alien(vm_map_t map)17570 vm_map_is_alien(
17571 vm_map_t map)
17572 {
17573 return VM_MAP_IS_ALIEN(map);
17574 }
17575
17576 #if XNU_TARGET_OS_OSX
17577 void
vm_map_mark_alien(vm_map_t map)17578 vm_map_mark_alien(
17579 vm_map_t map)
17580 {
17581 vm_map_lock(map);
17582 map->is_alien = true;
17583 vm_map_unlock(map);
17584 }
17585
17586 void
vm_map_single_jit(vm_map_t map)17587 vm_map_single_jit(
17588 vm_map_t map)
17589 {
17590 vm_map_lock(map);
17591 map->single_jit = true;
17592 vm_map_unlock(map);
17593 }
17594 #endif /* XNU_TARGET_OS_OSX */
17595
17596 /*
17597 * Callers of this function must call vm_map_copy_require on
17598 * previously created vm_map_copy_t or pass a newly created
17599 * one to ensure that it hasn't been forged.
17600 */
17601 static kern_return_t
vm_map_copy_to_physcopy(vm_map_copy_t copy_map,vm_map_t target_map)17602 vm_map_copy_to_physcopy(
17603 vm_map_copy_t copy_map,
17604 vm_map_t target_map)
17605 {
17606 vm_map_size_t size;
17607 vm_map_entry_t entry;
17608 vm_map_entry_t new_entry;
17609 vm_object_t new_object;
17610 unsigned int pmap_flags;
17611 pmap_t new_pmap;
17612 vm_map_t new_map;
17613 vm_map_address_t src_start, src_end, src_cur;
17614 vm_map_address_t dst_start, dst_end, dst_cur;
17615 kern_return_t kr;
17616 void *kbuf;
17617
17618 /*
17619 * Perform the equivalent of vm_allocate() and memcpy().
17620 * Replace the mappings in "copy_map" with the newly allocated mapping.
17621 */
17622 DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) BEFORE\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
17623
17624 assert(copy_map->cpy_hdr.page_shift != VM_MAP_PAGE_MASK(target_map));
17625
17626 /* create a new pmap to map "copy_map" */
17627 pmap_flags = 0;
17628 assert(copy_map->cpy_hdr.page_shift == FOURK_PAGE_SHIFT);
17629 #if PMAP_CREATE_FORCE_4K_PAGES
17630 pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
17631 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
17632 pmap_flags |= PMAP_CREATE_64BIT;
17633 new_pmap = pmap_create_options(NULL, (vm_map_size_t)0, pmap_flags);
17634 if (new_pmap == NULL) {
17635 return KERN_RESOURCE_SHORTAGE;
17636 }
17637
17638 /* allocate new VM object */
17639 size = VM_MAP_ROUND_PAGE(copy_map->size, PAGE_MASK);
17640 new_object = vm_object_allocate(size);
17641 assert(new_object);
17642
17643 /* allocate new VM map entry */
17644 new_entry = vm_map_copy_entry_create(copy_map);
17645 assert(new_entry);
17646
17647 /* finish initializing new VM map entry */
17648 new_entry->protection = VM_PROT_DEFAULT;
17649 new_entry->max_protection = VM_PROT_DEFAULT;
17650 new_entry->use_pmap = TRUE;
17651
17652 /* make new VM map entry point to new VM object */
17653 new_entry->vme_start = 0;
17654 new_entry->vme_end = size;
17655 VME_OBJECT_SET(new_entry, new_object, false, 0);
17656 VME_OFFSET_SET(new_entry, 0);
17657
17658 /* create a new pageable VM map to map "copy_map" */
17659 new_map = vm_map_create_options(new_pmap, 0, MACH_VM_MAX_ADDRESS,
17660 VM_MAP_CREATE_PAGEABLE);
17661 assert(new_map);
17662 vm_map_set_page_shift(new_map, copy_map->cpy_hdr.page_shift);
17663
17664 /* map "copy_map" in the new VM map */
17665 src_start = 0;
17666 kr = vm_map_copyout_internal(
17667 new_map,
17668 &src_start,
17669 copy_map,
17670 copy_map->size,
17671 FALSE, /* consume_on_success */
17672 VM_PROT_DEFAULT,
17673 VM_PROT_DEFAULT,
17674 VM_INHERIT_DEFAULT);
17675 assert(kr == KERN_SUCCESS);
17676 src_end = src_start + copy_map->size;
17677
17678 /* map "new_object" in the new VM map */
17679 vm_object_reference(new_object);
17680 dst_start = 0;
17681 kr = vm_map_enter(new_map,
17682 &dst_start,
17683 size,
17684 0, /* mask */
17685 VM_FLAGS_ANYWHERE,
17686 VM_MAP_KERNEL_FLAGS_NONE,
17687 VM_KERN_MEMORY_OSFMK,
17688 new_object,
17689 0, /* offset */
17690 FALSE, /* needs copy */
17691 VM_PROT_DEFAULT,
17692 VM_PROT_DEFAULT,
17693 VM_INHERIT_DEFAULT);
17694 assert(kr == KERN_SUCCESS);
17695 dst_end = dst_start + size;
17696
17697 /* get a kernel buffer */
17698 kbuf = kalloc_data(PAGE_SIZE, Z_WAITOK | Z_NOFAIL);
17699
17700 /* physically copy "copy_map" mappings to new VM object */
17701 for (src_cur = src_start, dst_cur = dst_start;
17702 src_cur < src_end;
17703 src_cur += PAGE_SIZE, dst_cur += PAGE_SIZE) {
17704 vm_size_t bytes;
17705
17706 bytes = PAGE_SIZE;
17707 if (src_cur + PAGE_SIZE > src_end) {
17708 /* partial copy for last page */
17709 bytes = src_end - src_cur;
17710 assert(bytes > 0 && bytes < PAGE_SIZE);
17711 /* rest of dst page should be zero-filled */
17712 }
17713 /* get bytes from src mapping */
17714 kr = copyinmap(new_map, src_cur, kbuf, bytes);
17715 if (kr != KERN_SUCCESS) {
17716 DEBUG4K_COPY("copyinmap(%p, 0x%llx, %p, 0x%llx) kr 0x%x\n", new_map, (uint64_t)src_cur, kbuf, (uint64_t)bytes, kr);
17717 }
17718 /* put bytes in dst mapping */
17719 assert(dst_cur < dst_end);
17720 assert(dst_cur + bytes <= dst_end);
17721 kr = copyoutmap(new_map, kbuf, dst_cur, bytes);
17722 if (kr != KERN_SUCCESS) {
17723 DEBUG4K_COPY("copyoutmap(%p, %p, 0x%llx, 0x%llx) kr 0x%x\n", new_map, kbuf, (uint64_t)dst_cur, (uint64_t)bytes, kr);
17724 }
17725 }
17726
17727 /* free kernel buffer */
17728 kfree_data(kbuf, PAGE_SIZE);
17729
17730 /* destroy new map */
17731 vm_map_destroy(new_map);
17732 new_map = VM_MAP_NULL;
17733
17734 /* dispose of the old map entries in "copy_map" */
17735 while (vm_map_copy_first_entry(copy_map) !=
17736 vm_map_copy_to_entry(copy_map)) {
17737 entry = vm_map_copy_first_entry(copy_map);
17738 vm_map_copy_entry_unlink(copy_map, entry);
17739 if (entry->is_sub_map) {
17740 vm_map_deallocate(VME_SUBMAP(entry));
17741 } else {
17742 vm_object_deallocate(VME_OBJECT(entry));
17743 }
17744 vm_map_copy_entry_dispose(entry);
17745 }
17746
17747 /* change "copy_map"'s page_size to match "target_map" */
17748 copy_map->cpy_hdr.page_shift = (uint16_t)VM_MAP_PAGE_SHIFT(target_map);
17749 copy_map->offset = 0;
17750 copy_map->size = size;
17751
17752 /* insert new map entry in "copy_map" */
17753 assert(vm_map_copy_last_entry(copy_map) == vm_map_copy_to_entry(copy_map));
17754 vm_map_copy_entry_link(copy_map, vm_map_copy_last_entry(copy_map), new_entry);
17755
17756 DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) AFTER\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
17757 return KERN_SUCCESS;
17758 }
17759
17760 void
17761 vm_map_copy_adjust_get_target_copy_map(
17762 vm_map_copy_t copy_map,
17763 vm_map_copy_t *target_copy_map_p);
17764 void
vm_map_copy_adjust_get_target_copy_map(vm_map_copy_t copy_map,vm_map_copy_t * target_copy_map_p)17765 vm_map_copy_adjust_get_target_copy_map(
17766 vm_map_copy_t copy_map,
17767 vm_map_copy_t *target_copy_map_p)
17768 {
17769 vm_map_copy_t target_copy_map;
17770 vm_map_entry_t entry, target_entry;
17771
17772 if (*target_copy_map_p != VM_MAP_COPY_NULL) {
17773 /* the caller already has a "target_copy_map": use it */
17774 return;
17775 }
17776
17777 /* the caller wants us to create a new copy of "copy_map" */
17778 target_copy_map = vm_map_copy_allocate();
17779 target_copy_map->type = copy_map->type;
17780 assert(target_copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17781 target_copy_map->offset = copy_map->offset;
17782 target_copy_map->size = copy_map->size;
17783 target_copy_map->cpy_hdr.page_shift = copy_map->cpy_hdr.page_shift;
17784 vm_map_store_init(&target_copy_map->cpy_hdr);
17785 for (entry = vm_map_copy_first_entry(copy_map);
17786 entry != vm_map_copy_to_entry(copy_map);
17787 entry = entry->vme_next) {
17788 target_entry = vm_map_copy_entry_create(target_copy_map);
17789 vm_map_entry_copy_full(target_entry, entry);
17790 if (target_entry->is_sub_map) {
17791 vm_map_reference(VME_SUBMAP(target_entry));
17792 } else {
17793 vm_object_reference(VME_OBJECT(target_entry));
17794 }
17795 vm_map_copy_entry_link(
17796 target_copy_map,
17797 vm_map_copy_last_entry(target_copy_map),
17798 target_entry);
17799 }
17800 entry = VM_MAP_ENTRY_NULL;
17801 *target_copy_map_p = target_copy_map;
17802 }
17803
17804 /*
17805 * Callers of this function must call vm_map_copy_require on
17806 * previously created vm_map_copy_t or pass a newly created
17807 * one to ensure that it hasn't been forged.
17808 */
17809 static void
vm_map_copy_trim(vm_map_copy_t copy_map,uint16_t new_page_shift,vm_map_offset_t trim_start,vm_map_offset_t trim_end)17810 vm_map_copy_trim(
17811 vm_map_copy_t copy_map,
17812 uint16_t new_page_shift,
17813 vm_map_offset_t trim_start,
17814 vm_map_offset_t trim_end)
17815 {
17816 uint16_t copy_page_shift;
17817 vm_map_entry_t entry, next_entry;
17818
17819 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17820 assert(copy_map->cpy_hdr.nentries > 0);
17821
17822 trim_start += vm_map_copy_first_entry(copy_map)->vme_start;
17823 trim_end += vm_map_copy_first_entry(copy_map)->vme_start;
17824
17825 /* use the new page_shift to do the clipping */
17826 copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
17827 copy_map->cpy_hdr.page_shift = new_page_shift;
17828
17829 for (entry = vm_map_copy_first_entry(copy_map);
17830 entry != vm_map_copy_to_entry(copy_map);
17831 entry = next_entry) {
17832 next_entry = entry->vme_next;
17833 if (entry->vme_end <= trim_start) {
17834 /* entry fully before trim range: skip */
17835 continue;
17836 }
17837 if (entry->vme_start >= trim_end) {
17838 /* entry fully after trim range: done */
17839 break;
17840 }
17841 /* clip entry if needed */
17842 vm_map_copy_clip_start(copy_map, entry, trim_start);
17843 vm_map_copy_clip_end(copy_map, entry, trim_end);
17844 /* dispose of entry */
17845 copy_map->size -= entry->vme_end - entry->vme_start;
17846 vm_map_copy_entry_unlink(copy_map, entry);
17847 if (entry->is_sub_map) {
17848 vm_map_deallocate(VME_SUBMAP(entry));
17849 } else {
17850 vm_object_deallocate(VME_OBJECT(entry));
17851 }
17852 vm_map_copy_entry_dispose(entry);
17853 entry = VM_MAP_ENTRY_NULL;
17854 }
17855
17856 /* restore copy_map's original page_shift */
17857 copy_map->cpy_hdr.page_shift = copy_page_shift;
17858 }
17859
17860 /*
17861 * Make any necessary adjustments to "copy_map" to allow it to be
17862 * mapped into "target_map".
17863 * If no changes were necessary, "target_copy_map" points to the
17864 * untouched "copy_map".
17865 * If changes are necessary, changes will be made to "target_copy_map".
17866 * If "target_copy_map" was NULL, we create a new "vm_map_copy_t" and
17867 * copy the original "copy_map" to it before applying the changes.
17868 * The caller should discard "target_copy_map" if it's not the same as
17869 * the original "copy_map".
17870 */
17871 /* TODO4K: also adjust to sub-range in the copy_map -> add start&end? */
17872 kern_return_t
vm_map_copy_adjust_to_target(vm_map_copy_t src_copy_map,vm_map_offset_t offset,vm_map_size_t size,vm_map_t target_map,boolean_t copy,vm_map_copy_t * target_copy_map_p,vm_map_offset_t * overmap_start_p,vm_map_offset_t * overmap_end_p,vm_map_offset_t * trimmed_start_p)17873 vm_map_copy_adjust_to_target(
17874 vm_map_copy_t src_copy_map,
17875 vm_map_offset_t offset,
17876 vm_map_size_t size,
17877 vm_map_t target_map,
17878 boolean_t copy,
17879 vm_map_copy_t *target_copy_map_p,
17880 vm_map_offset_t *overmap_start_p,
17881 vm_map_offset_t *overmap_end_p,
17882 vm_map_offset_t *trimmed_start_p)
17883 {
17884 vm_map_copy_t copy_map, target_copy_map;
17885 vm_map_size_t target_size;
17886 vm_map_size_t src_copy_map_size;
17887 vm_map_size_t overmap_start, overmap_end;
17888 int misalignments;
17889 vm_map_entry_t entry, target_entry;
17890 vm_map_offset_t addr_adjustment;
17891 vm_map_offset_t new_start, new_end;
17892 int copy_page_mask, target_page_mask;
17893 uint16_t copy_page_shift, target_page_shift;
17894 vm_map_offset_t trimmed_end;
17895
17896 /*
17897 * Assert that the vm_map_copy is coming from the right
17898 * zone and hasn't been forged
17899 */
17900 vm_map_copy_require(src_copy_map);
17901 assert(src_copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17902
17903 /*
17904 * Start working with "src_copy_map" but we'll switch
17905 * to "target_copy_map" as soon as we start making adjustments.
17906 */
17907 copy_map = src_copy_map;
17908 src_copy_map_size = src_copy_map->size;
17909
17910 copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
17911 copy_page_mask = VM_MAP_COPY_PAGE_MASK(copy_map);
17912 target_page_shift = (uint16_t)VM_MAP_PAGE_SHIFT(target_map);
17913 target_page_mask = VM_MAP_PAGE_MASK(target_map);
17914
17915 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p...\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, *target_copy_map_p);
17916
17917 target_copy_map = *target_copy_map_p;
17918 if (target_copy_map != VM_MAP_COPY_NULL) {
17919 vm_map_copy_require(target_copy_map);
17920 }
17921
17922 if (offset + size > copy_map->size) {
17923 DEBUG4K_ERROR("copy_map %p (%d->%d) copy_map->size 0x%llx offset 0x%llx size 0x%llx KERN_INVALID_ARGUMENT\n", copy_map, copy_page_shift, target_page_shift, (uint64_t)copy_map->size, (uint64_t)offset, (uint64_t)size);
17924 return KERN_INVALID_ARGUMENT;
17925 }
17926
17927 /* trim the end */
17928 trimmed_end = 0;
17929 new_end = VM_MAP_ROUND_PAGE(offset + size, target_page_mask);
17930 if (new_end < copy_map->size) {
17931 trimmed_end = src_copy_map_size - new_end;
17932 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim end from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)new_end, (uint64_t)copy_map->size);
17933 /* get "target_copy_map" if needed and adjust it */
17934 vm_map_copy_adjust_get_target_copy_map(copy_map,
17935 &target_copy_map);
17936 copy_map = target_copy_map;
17937 vm_map_copy_trim(target_copy_map, target_page_shift,
17938 new_end, copy_map->size);
17939 }
17940
17941 /* trim the start */
17942 new_start = VM_MAP_TRUNC_PAGE(offset, target_page_mask);
17943 if (new_start != 0) {
17944 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim start from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)0, (uint64_t)new_start);
17945 /* get "target_copy_map" if needed and adjust it */
17946 vm_map_copy_adjust_get_target_copy_map(copy_map,
17947 &target_copy_map);
17948 copy_map = target_copy_map;
17949 vm_map_copy_trim(target_copy_map, target_page_shift,
17950 0, new_start);
17951 }
17952 *trimmed_start_p = new_start;
17953
17954 /* target_size starts with what's left after trimming */
17955 target_size = copy_map->size;
17956 assertf(target_size == src_copy_map_size - *trimmed_start_p - trimmed_end,
17957 "target_size 0x%llx src_copy_map_size 0x%llx trimmed_start 0x%llx trimmed_end 0x%llx\n",
17958 (uint64_t)target_size, (uint64_t)src_copy_map_size,
17959 (uint64_t)*trimmed_start_p, (uint64_t)trimmed_end);
17960
17961 /* check for misalignments but don't adjust yet */
17962 misalignments = 0;
17963 overmap_start = 0;
17964 overmap_end = 0;
17965 if (copy_page_shift < target_page_shift) {
17966 /*
17967 * Remapping from 4K to 16K: check the VM object alignments
17968 * throughout the range.
17969 * If the start and end of the range are mis-aligned, we can
17970 * over-map to re-align, and adjust the "overmap" start/end
17971 * and "target_size" of the range accordingly.
17972 * If there is any mis-alignment within the range:
17973 * if "copy":
17974 * we can do immediate-copy instead of copy-on-write,
17975 * else:
17976 * no way to remap and share; fail.
17977 */
17978 for (entry = vm_map_copy_first_entry(copy_map);
17979 entry != vm_map_copy_to_entry(copy_map);
17980 entry = entry->vme_next) {
17981 vm_object_offset_t object_offset_start, object_offset_end;
17982
17983 object_offset_start = VME_OFFSET(entry);
17984 object_offset_end = object_offset_start;
17985 object_offset_end += entry->vme_end - entry->vme_start;
17986 if (object_offset_start & target_page_mask) {
17987 if (entry == vm_map_copy_first_entry(copy_map) && !copy) {
17988 overmap_start++;
17989 } else {
17990 misalignments++;
17991 }
17992 }
17993 if (object_offset_end & target_page_mask) {
17994 if (entry->vme_next == vm_map_copy_to_entry(copy_map) && !copy) {
17995 overmap_end++;
17996 } else {
17997 misalignments++;
17998 }
17999 }
18000 }
18001 }
18002 entry = VM_MAP_ENTRY_NULL;
18003
18004 /* decide how to deal with misalignments */
18005 assert(overmap_start <= 1);
18006 assert(overmap_end <= 1);
18007 if (!overmap_start && !overmap_end && !misalignments) {
18008 /* copy_map is properly aligned for target_map ... */
18009 if (*trimmed_start_p) {
18010 /* ... but we trimmed it, so still need to adjust */
18011 } else {
18012 /* ... and we didn't trim anything: we're done */
18013 if (target_copy_map == VM_MAP_COPY_NULL) {
18014 target_copy_map = copy_map;
18015 }
18016 *target_copy_map_p = target_copy_map;
18017 *overmap_start_p = 0;
18018 *overmap_end_p = 0;
18019 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
18020 return KERN_SUCCESS;
18021 }
18022 } else if (misalignments && !copy) {
18023 /* can't "share" if misaligned */
18024 DEBUG4K_ADJUST("unsupported sharing\n");
18025 #if MACH_ASSERT
18026 if (debug4k_panic_on_misaligned_sharing) {
18027 panic("DEBUG4k %s:%d unsupported sharing", __FUNCTION__, __LINE__);
18028 }
18029 #endif /* MACH_ASSERT */
18030 DEBUG4K_ADJUST("copy_map %p (%d) target_map %p (%d) copy %d target_copy_map %p -> KERN_NOT_SUPPORTED\n", copy_map, copy_page_shift, target_map, target_page_shift, copy, *target_copy_map_p);
18031 return KERN_NOT_SUPPORTED;
18032 } else {
18033 /* can't virtual-copy if misaligned (but can physical-copy) */
18034 DEBUG4K_ADJUST("mis-aligned copying\n");
18035 }
18036
18037 /* get a "target_copy_map" if needed and switch to it */
18038 vm_map_copy_adjust_get_target_copy_map(copy_map, &target_copy_map);
18039 copy_map = target_copy_map;
18040
18041 if (misalignments && copy) {
18042 vm_map_size_t target_copy_map_size;
18043
18044 /*
18045 * Can't do copy-on-write with misaligned mappings.
18046 * Replace the mappings with a physical copy of the original
18047 * mappings' contents.
18048 */
18049 target_copy_map_size = target_copy_map->size;
18050 kern_return_t kr = vm_map_copy_to_physcopy(target_copy_map, target_map);
18051 if (kr != KERN_SUCCESS) {
18052 return kr;
18053 }
18054 *target_copy_map_p = target_copy_map;
18055 *overmap_start_p = 0;
18056 *overmap_end_p = target_copy_map->size - target_copy_map_size;
18057 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx)-> trimmed 0x%llx overmap start 0x%llx end 0x%llx PHYSCOPY\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
18058 return KERN_SUCCESS;
18059 }
18060
18061 /* apply the adjustments */
18062 misalignments = 0;
18063 overmap_start = 0;
18064 overmap_end = 0;
18065 /* remove copy_map->offset, so that everything starts at offset 0 */
18066 addr_adjustment = copy_map->offset;
18067 /* also remove whatever we trimmed from the start */
18068 addr_adjustment += *trimmed_start_p;
18069 for (target_entry = vm_map_copy_first_entry(target_copy_map);
18070 target_entry != vm_map_copy_to_entry(target_copy_map);
18071 target_entry = target_entry->vme_next) {
18072 vm_object_offset_t object_offset_start, object_offset_end;
18073
18074 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx BEFORE\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
18075 object_offset_start = VME_OFFSET(target_entry);
18076 if (object_offset_start & target_page_mask) {
18077 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at start\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
18078 if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
18079 /*
18080 * start of 1st entry is mis-aligned:
18081 * re-adjust by over-mapping.
18082 */
18083 overmap_start = object_offset_start - trunc_page_mask_64(object_offset_start, target_page_mask);
18084 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_start 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_start);
18085 VME_OFFSET_SET(target_entry, VME_OFFSET(target_entry) - overmap_start);
18086 } else {
18087 misalignments++;
18088 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
18089 assert(copy);
18090 }
18091 }
18092
18093 if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
18094 target_size += overmap_start;
18095 } else {
18096 target_entry->vme_start += overmap_start;
18097 }
18098 target_entry->vme_end += overmap_start;
18099
18100 object_offset_end = VME_OFFSET(target_entry) + target_entry->vme_end - target_entry->vme_start;
18101 if (object_offset_end & target_page_mask) {
18102 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at end\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
18103 if (target_entry->vme_next == vm_map_copy_to_entry(target_copy_map)) {
18104 /*
18105 * end of last entry is mis-aligned: re-adjust by over-mapping.
18106 */
18107 overmap_end = round_page_mask_64(object_offset_end, target_page_mask) - object_offset_end;
18108 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_end 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_end);
18109 target_entry->vme_end += overmap_end;
18110 target_size += overmap_end;
18111 } else {
18112 misalignments++;
18113 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
18114 assert(copy);
18115 }
18116 }
18117 target_entry->vme_start -= addr_adjustment;
18118 target_entry->vme_end -= addr_adjustment;
18119 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx AFTER\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
18120 }
18121
18122 target_copy_map->size = target_size;
18123 target_copy_map->offset += overmap_start;
18124 target_copy_map->offset -= addr_adjustment;
18125 target_copy_map->cpy_hdr.page_shift = target_page_shift;
18126
18127 // assert(VM_MAP_PAGE_ALIGNED(target_copy_map->size, target_page_mask));
18128 // assert(VM_MAP_PAGE_ALIGNED(target_copy_map->offset, FOURK_PAGE_MASK));
18129 assert(overmap_start < VM_MAP_PAGE_SIZE(target_map));
18130 assert(overmap_end < VM_MAP_PAGE_SIZE(target_map));
18131
18132 *target_copy_map_p = target_copy_map;
18133 *overmap_start_p = overmap_start;
18134 *overmap_end_p = overmap_end;
18135
18136 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
18137 return KERN_SUCCESS;
18138 }
18139
18140 kern_return_t
vm_map_range_physical_size(vm_map_t map,vm_map_address_t start,mach_vm_size_t size,mach_vm_size_t * phys_size)18141 vm_map_range_physical_size(
18142 vm_map_t map,
18143 vm_map_address_t start,
18144 mach_vm_size_t size,
18145 mach_vm_size_t * phys_size)
18146 {
18147 kern_return_t kr;
18148 vm_map_copy_t copy_map, target_copy_map;
18149 vm_map_offset_t adjusted_start, adjusted_end;
18150 vm_map_size_t adjusted_size;
18151 vm_prot_t cur_prot, max_prot;
18152 vm_map_offset_t overmap_start, overmap_end, trimmed_start, end;
18153 vm_map_kernel_flags_t vmk_flags;
18154
18155 if (size == 0) {
18156 DEBUG4K_SHARE("map %p start 0x%llx size 0x%llx -> phys_size 0!\n", map, (uint64_t)start, (uint64_t)size);
18157 *phys_size = 0;
18158 return KERN_SUCCESS;
18159 }
18160
18161 adjusted_start = vm_map_trunc_page(start, VM_MAP_PAGE_MASK(map));
18162 adjusted_end = vm_map_round_page(start + size, VM_MAP_PAGE_MASK(map));
18163 if (__improbable(os_add_overflow(start, size, &end) ||
18164 adjusted_end <= adjusted_start)) {
18165 /* wraparound */
18166 printf("%s:%d(start=0x%llx, size=0x%llx) pgmask 0x%x: wraparound\n", __FUNCTION__, __LINE__, (uint64_t)start, (uint64_t)size, VM_MAP_PAGE_MASK(map));
18167 *phys_size = 0;
18168 return KERN_INVALID_ARGUMENT;
18169 }
18170 assert(adjusted_end > adjusted_start);
18171 adjusted_size = adjusted_end - adjusted_start;
18172 *phys_size = adjusted_size;
18173 if (VM_MAP_PAGE_SIZE(map) == PAGE_SIZE) {
18174 return KERN_SUCCESS;
18175 }
18176 if (start == 0) {
18177 adjusted_start = vm_map_trunc_page(start, PAGE_MASK);
18178 adjusted_end = vm_map_round_page(start + size, PAGE_MASK);
18179 if (__improbable(adjusted_end <= adjusted_start)) {
18180 /* wraparound */
18181 printf("%s:%d(start=0x%llx, size=0x%llx) pgmask 0x%x: wraparound\n", __FUNCTION__, __LINE__, (uint64_t)start, (uint64_t)size, PAGE_MASK);
18182 *phys_size = 0;
18183 return KERN_INVALID_ARGUMENT;
18184 }
18185 assert(adjusted_end > adjusted_start);
18186 adjusted_size = adjusted_end - adjusted_start;
18187 *phys_size = adjusted_size;
18188 return KERN_SUCCESS;
18189 }
18190
18191 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
18192 vmk_flags.vmkf_copy_pageable = TRUE;
18193 vmk_flags.vmkf_copy_same_map = TRUE;
18194 assert(adjusted_size != 0);
18195 cur_prot = VM_PROT_NONE; /* legacy mode */
18196 max_prot = VM_PROT_NONE; /* legacy mode */
18197 kr = vm_map_copy_extract(map, adjusted_start, adjusted_size,
18198 FALSE /* copy */,
18199 ©_map,
18200 &cur_prot, &max_prot, VM_INHERIT_DEFAULT,
18201 vmk_flags);
18202 if (kr != KERN_SUCCESS) {
18203 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
18204 //assert(0);
18205 *phys_size = 0;
18206 return kr;
18207 }
18208 assert(copy_map != VM_MAP_COPY_NULL);
18209 target_copy_map = copy_map;
18210 DEBUG4K_ADJUST("adjusting...\n");
18211 kr = vm_map_copy_adjust_to_target(
18212 copy_map,
18213 start - adjusted_start, /* offset */
18214 size, /* size */
18215 kernel_map,
18216 FALSE, /* copy */
18217 &target_copy_map,
18218 &overmap_start,
18219 &overmap_end,
18220 &trimmed_start);
18221 if (kr == KERN_SUCCESS) {
18222 if (target_copy_map->size != *phys_size) {
18223 DEBUG4K_ADJUST("map %p (%d) start 0x%llx size 0x%llx adjusted_start 0x%llx adjusted_end 0x%llx overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx phys_size 0x%llx -> 0x%llx\n", map, VM_MAP_PAGE_SHIFT(map), (uint64_t)start, (uint64_t)size, (uint64_t)adjusted_start, (uint64_t)adjusted_end, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start, (uint64_t)*phys_size, (uint64_t)target_copy_map->size);
18224 }
18225 *phys_size = target_copy_map->size;
18226 } else {
18227 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
18228 //assert(0);
18229 *phys_size = 0;
18230 }
18231 vm_map_copy_discard(copy_map);
18232 copy_map = VM_MAP_COPY_NULL;
18233
18234 return kr;
18235 }
18236
18237
18238 kern_return_t
memory_entry_check_for_adjustment(vm_map_t src_map,ipc_port_t port,vm_map_offset_t * overmap_start,vm_map_offset_t * overmap_end)18239 memory_entry_check_for_adjustment(
18240 vm_map_t src_map,
18241 ipc_port_t port,
18242 vm_map_offset_t *overmap_start,
18243 vm_map_offset_t *overmap_end)
18244 {
18245 kern_return_t kr = KERN_SUCCESS;
18246 vm_map_copy_t copy_map = VM_MAP_COPY_NULL, target_copy_map = VM_MAP_COPY_NULL;
18247
18248 assert(port);
18249 assertf(ip_kotype(port) == IKOT_NAMED_ENTRY, "Port Type expected: %d...received:%d\n", IKOT_NAMED_ENTRY, ip_kotype(port));
18250
18251 vm_named_entry_t named_entry;
18252
18253 named_entry = mach_memory_entry_from_port(port);
18254 named_entry_lock(named_entry);
18255 copy_map = named_entry->backing.copy;
18256 target_copy_map = copy_map;
18257
18258 if (src_map && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT) {
18259 vm_map_offset_t trimmed_start;
18260
18261 trimmed_start = 0;
18262 DEBUG4K_ADJUST("adjusting...\n");
18263 kr = vm_map_copy_adjust_to_target(
18264 copy_map,
18265 0, /* offset */
18266 copy_map->size, /* size */
18267 src_map,
18268 FALSE, /* copy */
18269 &target_copy_map,
18270 overmap_start,
18271 overmap_end,
18272 &trimmed_start);
18273 assert(trimmed_start == 0);
18274 }
18275 named_entry_unlock(named_entry);
18276
18277 return kr;
18278 }
18279
18280
18281 /*
18282 * Routine: vm_remap
18283 *
18284 * Map portion of a task's address space.
18285 * Mapped region must not overlap more than
18286 * one vm memory object. Protections and
18287 * inheritance attributes remain the same
18288 * as in the original task and are out parameters.
18289 * Source and Target task can be identical
18290 * Other attributes are identical as for vm_map()
18291 */
18292 kern_return_t
vm_map_remap(vm_map_t target_map,vm_map_address_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,vm_map_t src_map,vm_map_offset_t memory_address,boolean_t copy,vm_prot_t * cur_protection,vm_prot_t * max_protection,vm_inherit_t inheritance)18293 vm_map_remap(
18294 vm_map_t target_map,
18295 vm_map_address_t *address,
18296 vm_map_size_t size,
18297 vm_map_offset_t mask,
18298 int flags,
18299 vm_map_kernel_flags_t vmk_flags,
18300 vm_tag_t tag,
18301 vm_map_t src_map,
18302 vm_map_offset_t memory_address,
18303 boolean_t copy,
18304 vm_prot_t *cur_protection, /* IN/OUT */
18305 vm_prot_t *max_protection, /* IN/OUT */
18306 vm_inherit_t inheritance)
18307 {
18308 kern_return_t result;
18309 vm_map_entry_t entry;
18310 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
18311 vm_map_entry_t new_entry;
18312 vm_map_copy_t copy_map;
18313 vm_map_offset_t offset_in_mapping;
18314 vm_map_size_t target_size = 0;
18315 vm_map_size_t src_page_mask, target_page_mask;
18316 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
18317 vm_map_offset_t initial_memory_address;
18318 vm_map_size_t initial_size;
18319 VM_MAP_ZAP_DECLARE(zap_list);
18320
18321 if (target_map == VM_MAP_NULL) {
18322 return KERN_INVALID_ARGUMENT;
18323 }
18324
18325 initial_memory_address = memory_address;
18326 initial_size = size;
18327 src_page_mask = VM_MAP_PAGE_MASK(src_map);
18328 target_page_mask = VM_MAP_PAGE_MASK(target_map);
18329
18330 switch (inheritance) {
18331 case VM_INHERIT_NONE:
18332 case VM_INHERIT_COPY:
18333 case VM_INHERIT_SHARE:
18334 if (size != 0 && src_map != VM_MAP_NULL) {
18335 break;
18336 }
18337 OS_FALLTHROUGH;
18338 default:
18339 return KERN_INVALID_ARGUMENT;
18340 }
18341
18342 if (src_page_mask != target_page_mask) {
18343 if (copy) {
18344 DEBUG4K_COPY("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map, VM_MAP_PAGE_SIZE(src_map), (uint64_t)memory_address, (uint64_t)size, copy, target_map, VM_MAP_PAGE_SIZE(target_map));
18345 } else {
18346 DEBUG4K_SHARE("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map, VM_MAP_PAGE_SIZE(src_map), (uint64_t)memory_address, (uint64_t)size, copy, target_map, VM_MAP_PAGE_SIZE(target_map));
18347 }
18348 }
18349
18350 /*
18351 * If the user is requesting that we return the address of the
18352 * first byte of the data (rather than the base of the page),
18353 * then we use different rounding semantics: specifically,
18354 * we assume that (memory_address, size) describes a region
18355 * all of whose pages we must cover, rather than a base to be truncated
18356 * down and a size to be added to that base. So we figure out
18357 * the highest page that the requested region includes and make
18358 * sure that the size will cover it.
18359 *
18360 * The key example we're worried about it is of the form:
18361 *
18362 * memory_address = 0x1ff0, size = 0x20
18363 *
18364 * With the old semantics, we round down the memory_address to 0x1000
18365 * and round up the size to 0x1000, resulting in our covering *only*
18366 * page 0x1000. With the new semantics, we'd realize that the region covers
18367 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
18368 * 0x1000 and page 0x2000 in the region we remap.
18369 */
18370 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
18371 vm_map_offset_t range_start, range_end;
18372
18373 range_start = vm_map_trunc_page(memory_address, src_page_mask);
18374 range_end = vm_map_round_page(memory_address + size, src_page_mask);
18375 memory_address = range_start;
18376 size = range_end - range_start;
18377 offset_in_mapping = initial_memory_address - memory_address;
18378 } else {
18379 /*
18380 * IMPORTANT:
18381 * This legacy code path is broken: for the range mentioned
18382 * above [ memory_address = 0x1ff0,size = 0x20 ], which spans
18383 * two 4k pages, it yields [ memory_address = 0x1000,
18384 * size = 0x1000 ], which covers only the first 4k page.
18385 * BUT some code unfortunately depends on this bug, so we
18386 * can't fix it without breaking something.
18387 * New code should get automatically opted in the new
18388 * behavior with the new VM_FLAGS_RETURN_DATA_ADDR flags.
18389 */
18390 offset_in_mapping = 0;
18391 memory_address = vm_map_trunc_page(memory_address, src_page_mask);
18392 size = vm_map_round_page(size, src_page_mask);
18393 initial_memory_address = memory_address;
18394 initial_size = size;
18395 }
18396
18397
18398 if (size == 0) {
18399 return KERN_INVALID_ARGUMENT;
18400 }
18401
18402 if (flags & VM_FLAGS_RESILIENT_MEDIA) {
18403 /* must be copy-on-write to be "media resilient" */
18404 if (!copy) {
18405 return KERN_INVALID_ARGUMENT;
18406 }
18407 }
18408
18409 vmk_flags.vmkf_copy_pageable = target_map->hdr.entries_pageable;
18410 vmk_flags.vmkf_copy_same_map = (src_map == target_map);
18411
18412 assert(size != 0);
18413 result = vm_map_copy_extract(src_map,
18414 memory_address,
18415 size,
18416 copy, ©_map,
18417 cur_protection, /* IN/OUT */
18418 max_protection, /* IN/OUT */
18419 inheritance,
18420 vmk_flags);
18421 if (result != KERN_SUCCESS) {
18422 return result;
18423 }
18424 assert(copy_map != VM_MAP_COPY_NULL);
18425
18426 overmap_start = 0;
18427 overmap_end = 0;
18428 trimmed_start = 0;
18429 target_size = size;
18430 if (src_page_mask != target_page_mask) {
18431 vm_map_copy_t target_copy_map;
18432
18433 target_copy_map = copy_map; /* can modify "copy_map" itself */
18434 DEBUG4K_ADJUST("adjusting...\n");
18435 result = vm_map_copy_adjust_to_target(
18436 copy_map,
18437 offset_in_mapping, /* offset */
18438 initial_size,
18439 target_map,
18440 copy,
18441 &target_copy_map,
18442 &overmap_start,
18443 &overmap_end,
18444 &trimmed_start);
18445 if (result != KERN_SUCCESS) {
18446 DEBUG4K_COPY("failed to adjust 0x%x\n", result);
18447 vm_map_copy_discard(copy_map);
18448 return result;
18449 }
18450 if (trimmed_start == 0) {
18451 /* nothing trimmed: no adjustment needed */
18452 } else if (trimmed_start >= offset_in_mapping) {
18453 /* trimmed more than offset_in_mapping: nothing left */
18454 assert(overmap_start == 0);
18455 assert(overmap_end == 0);
18456 offset_in_mapping = 0;
18457 } else {
18458 /* trimmed some of offset_in_mapping: adjust */
18459 assert(overmap_start == 0);
18460 assert(overmap_end == 0);
18461 offset_in_mapping -= trimmed_start;
18462 }
18463 offset_in_mapping += overmap_start;
18464 target_size = target_copy_map->size;
18465 }
18466
18467 /*
18468 * Allocate/check a range of free virtual address
18469 * space for the target
18470 */
18471 *address = vm_map_trunc_page(*address, target_page_mask);
18472 vm_map_lock(target_map);
18473 target_size = vm_map_round_page(target_size, target_page_mask);
18474 result = vm_map_remap_range_allocate(target_map, address,
18475 target_size, mask, flags, vmk_flags, tag,
18476 &insp_entry, &zap_list);
18477
18478 for (entry = vm_map_copy_first_entry(copy_map);
18479 entry != vm_map_copy_to_entry(copy_map);
18480 entry = new_entry) {
18481 new_entry = entry->vme_next;
18482 vm_map_copy_entry_unlink(copy_map, entry);
18483 if (result == KERN_SUCCESS) {
18484 if (vmk_flags.vmkf_remap_prot_copy) {
18485 /*
18486 * This vm_map_remap() is for a
18487 * vm_protect(VM_PROT_COPY), so the caller
18488 * expects to be allowed to add write access
18489 * to this new mapping. This is done by
18490 * adding VM_PROT_WRITE to each entry's
18491 * max_protection... unless some security
18492 * settings disallow it.
18493 */
18494 bool allow_write = false;
18495 if (entry->vme_permanent) {
18496 /* immutable mapping... */
18497 if ((entry->max_protection & VM_PROT_EXECUTE) &&
18498 developer_mode_state()) {
18499 /*
18500 * ... but executable and
18501 * possibly being debugged,
18502 * so let's allow it to become
18503 * writable, for breakpoints
18504 * and dtrace probes, for
18505 * example.
18506 */
18507 allow_write = true;
18508 } else {
18509 printf("%d[%s] vm_remap(0x%llx,0x%llx) VM_PROT_COPY denied on permanent mapping prot 0x%x/0x%x developer %d\n",
18510 proc_selfpid(),
18511 (get_bsdtask_info(current_task())
18512 ? proc_name_address(get_bsdtask_info(current_task()))
18513 : "?"),
18514 (uint64_t)memory_address,
18515 (uint64_t)size,
18516 entry->protection,
18517 entry->max_protection,
18518 developer_mode_state());
18519 DTRACE_VM6(vm_map_delete_permanent_deny_protcopy,
18520 vm_map_entry_t, entry,
18521 vm_map_offset_t, entry->vme_start,
18522 vm_map_offset_t, entry->vme_end,
18523 vm_prot_t, entry->protection,
18524 vm_prot_t, entry->max_protection,
18525 int, VME_ALIAS(entry));
18526 }
18527 } else {
18528 allow_write = true;
18529 }
18530
18531 /*
18532 * VM_PROT_COPY: allow this mapping to become
18533 * writable, unless it was "permanent".
18534 */
18535 if (allow_write) {
18536 entry->max_protection |= VM_PROT_WRITE;
18537 }
18538 }
18539 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
18540 /* no codesigning -> read-only access */
18541 entry->max_protection = VM_PROT_READ;
18542 entry->protection = VM_PROT_READ;
18543 entry->vme_resilient_codesign = TRUE;
18544 }
18545 entry->vme_start += *address;
18546 entry->vme_end += *address;
18547 assert(!entry->map_aligned);
18548 if ((flags & VM_FLAGS_RESILIENT_MEDIA) &&
18549 !entry->is_sub_map &&
18550 (VME_OBJECT(entry) == VM_OBJECT_NULL ||
18551 VME_OBJECT(entry)->internal)) {
18552 entry->vme_resilient_media = TRUE;
18553 }
18554 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start, MIN(target_page_mask, PAGE_MASK)));
18555 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end, MIN(target_page_mask, PAGE_MASK)));
18556 assert(VM_MAP_PAGE_ALIGNED(VME_OFFSET(entry), MIN(target_page_mask, PAGE_MASK)));
18557 vm_map_store_entry_link(target_map, insp_entry, entry,
18558 vmk_flags);
18559 insp_entry = entry;
18560 } else {
18561 if (!entry->is_sub_map) {
18562 vm_object_deallocate(VME_OBJECT(entry));
18563 } else {
18564 vm_map_deallocate(VME_SUBMAP(entry));
18565 }
18566 vm_map_copy_entry_dispose(entry);
18567 }
18568 }
18569
18570 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
18571 *cur_protection = VM_PROT_READ;
18572 *max_protection = VM_PROT_READ;
18573 }
18574
18575 if (result == KERN_SUCCESS) {
18576 target_map->size += target_size;
18577 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
18578
18579 }
18580 vm_map_unlock(target_map);
18581
18582 vm_map_zap_dispose(&zap_list);
18583
18584 if (result == KERN_SUCCESS && target_map->wiring_required) {
18585 result = vm_map_wire_kernel(target_map, *address,
18586 *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
18587 TRUE);
18588 }
18589
18590 /*
18591 * If requested, return the address of the data pointed to by the
18592 * request, rather than the base of the resulting page.
18593 */
18594 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
18595 *address += offset_in_mapping;
18596 }
18597
18598 if (src_page_mask != target_page_mask) {
18599 DEBUG4K_SHARE("vm_remap(%p 0x%llx 0x%llx copy=%d-> %p 0x%llx 0x%llx result=0x%x\n", src_map, (uint64_t)memory_address, (uint64_t)size, copy, target_map, (uint64_t)*address, (uint64_t)offset_in_mapping, result);
18600 }
18601 vm_map_copy_discard(copy_map);
18602 copy_map = VM_MAP_COPY_NULL;
18603
18604 return result;
18605 }
18606
18607 /*
18608 * Routine: vm_map_remap_range_allocate
18609 *
18610 * Description:
18611 * Allocate a range in the specified virtual address map.
18612 * returns the address and the map entry just before the allocated
18613 * range
18614 *
18615 * Map must be locked.
18616 */
18617
18618 static kern_return_t
vm_map_remap_range_allocate(vm_map_t map,vm_map_address_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,__unused vm_tag_t tag,vm_map_entry_t * map_entry,vm_map_zap_t zap_list)18619 vm_map_remap_range_allocate(
18620 vm_map_t map,
18621 vm_map_address_t *address, /* IN/OUT */
18622 vm_map_size_t size,
18623 vm_map_offset_t mask,
18624 int flags,
18625 vm_map_kernel_flags_t vmk_flags,
18626 __unused vm_tag_t tag,
18627 vm_map_entry_t *map_entry, /* OUT */
18628 vm_map_zap_t zap_list)
18629 {
18630 vm_map_entry_t entry;
18631 vm_map_offset_t start;
18632 kern_return_t kr;
18633
18634 start = *address;
18635
18636 if (flags & VM_FLAGS_ANYWHERE) {
18637 if (flags & VM_FLAGS_RANDOM_ADDR) {
18638 vmk_flags.vmkf_random_address = true;
18639 }
18640
18641 if (start) {
18642 /* override the target range if a hint has been provided */
18643 vmk_flags.vmkf_range_id = (map == kernel_map ?
18644 kmem_addr_get_range(start, size) :
18645 VM_MAP_REMAP_RANGE_ID(map, NULL, start, size));
18646 }
18647
18648 kr = vm_map_locate_space(map, size, mask, vmk_flags,
18649 &start, &entry);
18650 if (kr != KERN_SUCCESS) {
18651 return kr;
18652 }
18653 *address = start;
18654 } else {
18655 vm_map_entry_t temp_entry;
18656 vm_map_offset_t end;
18657
18658 /*
18659 * Verify that:
18660 * the address doesn't itself violate
18661 * the mask requirement.
18662 */
18663
18664 if ((start & mask) != 0) {
18665 return KERN_NO_SPACE;
18666 }
18667
18668
18669 /*
18670 * ... the address is within bounds
18671 */
18672
18673 end = start + size;
18674
18675 if ((start < map->min_offset) ||
18676 (end > map->max_offset) ||
18677 (start >= end)) {
18678 return KERN_INVALID_ADDRESS;
18679 }
18680
18681 /*
18682 * If we're asked to overwrite whatever was mapped in that
18683 * range, first deallocate that range.
18684 */
18685 if (flags & VM_FLAGS_OVERWRITE) {
18686 vmr_flags_t remove_flags = VM_MAP_REMOVE_NO_MAP_ALIGN;
18687
18688 /*
18689 * We use a "zap_list" to avoid having to unlock
18690 * the "map" in vm_map_delete(), which would compromise
18691 * the atomicity of the "deallocate" and then "remap"
18692 * combination.
18693 */
18694 remove_flags |= VM_MAP_REMOVE_NO_YIELD;
18695
18696 if (vmk_flags.vmkf_overwrite_immutable) {
18697 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
18698 }
18699 if (vmk_flags.vmkf_remap_prot_copy) {
18700 remove_flags |= VM_MAP_REMOVE_IMMUTABLE_CODE;
18701 }
18702 kr = vm_map_delete(map, start, end, remove_flags,
18703 KMEM_GUARD_NONE, zap_list).kmr_return;
18704 if (kr != KERN_SUCCESS) {
18705 /* XXX FBDP restore zap_list? */
18706 return kr;
18707 }
18708 }
18709
18710 /*
18711 * ... the starting address isn't allocated
18712 */
18713
18714 if (vm_map_lookup_entry(map, start, &temp_entry)) {
18715 return KERN_NO_SPACE;
18716 }
18717
18718 entry = temp_entry;
18719
18720 /*
18721 * ... the next region doesn't overlap the
18722 * end point.
18723 */
18724
18725 if ((entry->vme_next != vm_map_to_entry(map)) &&
18726 (entry->vme_next->vme_start < end)) {
18727 return KERN_NO_SPACE;
18728 }
18729 }
18730 *map_entry = entry;
18731 return KERN_SUCCESS;
18732 }
18733
18734 /*
18735 * vm_map_switch:
18736 *
18737 * Set the address map for the current thread to the specified map
18738 */
18739
18740 vm_map_t
vm_map_switch(vm_map_t map)18741 vm_map_switch(
18742 vm_map_t map)
18743 {
18744 int mycpu;
18745 thread_t thread = current_thread();
18746 vm_map_t oldmap = thread->map;
18747
18748 mp_disable_preemption();
18749 mycpu = cpu_number();
18750
18751 /*
18752 * Deactivate the current map and activate the requested map
18753 */
18754 PMAP_SWITCH_USER(thread, map, mycpu);
18755
18756 mp_enable_preemption();
18757 return oldmap;
18758 }
18759
18760
18761 /*
18762 * Routine: vm_map_write_user
18763 *
18764 * Description:
18765 * Copy out data from a kernel space into space in the
18766 * destination map. The space must already exist in the
18767 * destination map.
18768 * NOTE: This routine should only be called by threads
18769 * which can block on a page fault. i.e. kernel mode user
18770 * threads.
18771 *
18772 */
18773 kern_return_t
vm_map_write_user(vm_map_t map,void * src_p,vm_map_address_t dst_addr,vm_size_t size)18774 vm_map_write_user(
18775 vm_map_t map,
18776 void *src_p,
18777 vm_map_address_t dst_addr,
18778 vm_size_t size)
18779 {
18780 kern_return_t kr = KERN_SUCCESS;
18781
18782 if (current_map() == map) {
18783 if (copyout(src_p, dst_addr, size)) {
18784 kr = KERN_INVALID_ADDRESS;
18785 }
18786 } else {
18787 vm_map_t oldmap;
18788
18789 /* take on the identity of the target map while doing */
18790 /* the transfer */
18791
18792 vm_map_reference(map);
18793 oldmap = vm_map_switch(map);
18794 if (copyout(src_p, dst_addr, size)) {
18795 kr = KERN_INVALID_ADDRESS;
18796 }
18797 vm_map_switch(oldmap);
18798 vm_map_deallocate(map);
18799 }
18800 return kr;
18801 }
18802
18803 /*
18804 * Routine: vm_map_read_user
18805 *
18806 * Description:
18807 * Copy in data from a user space source map into the
18808 * kernel map. The space must already exist in the
18809 * kernel map.
18810 * NOTE: This routine should only be called by threads
18811 * which can block on a page fault. i.e. kernel mode user
18812 * threads.
18813 *
18814 */
18815 kern_return_t
vm_map_read_user(vm_map_t map,vm_map_address_t src_addr,void * dst_p,vm_size_t size)18816 vm_map_read_user(
18817 vm_map_t map,
18818 vm_map_address_t src_addr,
18819 void *dst_p,
18820 vm_size_t size)
18821 {
18822 kern_return_t kr = KERN_SUCCESS;
18823
18824 if (current_map() == map) {
18825 if (copyin(src_addr, dst_p, size)) {
18826 kr = KERN_INVALID_ADDRESS;
18827 }
18828 } else {
18829 vm_map_t oldmap;
18830
18831 /* take on the identity of the target map while doing */
18832 /* the transfer */
18833
18834 vm_map_reference(map);
18835 oldmap = vm_map_switch(map);
18836 if (copyin(src_addr, dst_p, size)) {
18837 kr = KERN_INVALID_ADDRESS;
18838 }
18839 vm_map_switch(oldmap);
18840 vm_map_deallocate(map);
18841 }
18842 return kr;
18843 }
18844
18845
18846 /*
18847 * vm_map_check_protection:
18848 *
18849 * Assert that the target map allows the specified
18850 * privilege on the entire address region given.
18851 * The entire region must be allocated.
18852 */
18853 boolean_t
vm_map_check_protection(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t protection)18854 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
18855 vm_map_offset_t end, vm_prot_t protection)
18856 {
18857 vm_map_entry_t entry;
18858 vm_map_entry_t tmp_entry;
18859
18860 vm_map_lock(map);
18861
18862 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
18863 vm_map_unlock(map);
18864 return FALSE;
18865 }
18866
18867 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
18868 vm_map_unlock(map);
18869 return FALSE;
18870 }
18871
18872 entry = tmp_entry;
18873
18874 while (start < end) {
18875 if (entry == vm_map_to_entry(map)) {
18876 vm_map_unlock(map);
18877 return FALSE;
18878 }
18879
18880 /*
18881 * No holes allowed!
18882 */
18883
18884 if (start < entry->vme_start) {
18885 vm_map_unlock(map);
18886 return FALSE;
18887 }
18888
18889 /*
18890 * Check protection associated with entry.
18891 */
18892
18893 if ((entry->protection & protection) != protection) {
18894 vm_map_unlock(map);
18895 return FALSE;
18896 }
18897
18898 /* go to next entry */
18899
18900 start = entry->vme_end;
18901 entry = entry->vme_next;
18902 }
18903 vm_map_unlock(map);
18904 return TRUE;
18905 }
18906
18907 kern_return_t
vm_map_purgable_control(vm_map_t map,vm_map_offset_t address,vm_purgable_t control,int * state)18908 vm_map_purgable_control(
18909 vm_map_t map,
18910 vm_map_offset_t address,
18911 vm_purgable_t control,
18912 int *state)
18913 {
18914 vm_map_entry_t entry;
18915 vm_object_t object;
18916 kern_return_t kr;
18917 boolean_t was_nonvolatile;
18918
18919 /*
18920 * Vet all the input parameters and current type and state of the
18921 * underlaying object. Return with an error if anything is amiss.
18922 */
18923 if (map == VM_MAP_NULL) {
18924 return KERN_INVALID_ARGUMENT;
18925 }
18926
18927 if (control != VM_PURGABLE_SET_STATE &&
18928 control != VM_PURGABLE_GET_STATE &&
18929 control != VM_PURGABLE_PURGE_ALL &&
18930 control != VM_PURGABLE_SET_STATE_FROM_KERNEL) {
18931 return KERN_INVALID_ARGUMENT;
18932 }
18933
18934 if (control == VM_PURGABLE_PURGE_ALL) {
18935 vm_purgeable_object_purge_all();
18936 return KERN_SUCCESS;
18937 }
18938
18939 if ((control == VM_PURGABLE_SET_STATE ||
18940 control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
18941 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
18942 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) {
18943 return KERN_INVALID_ARGUMENT;
18944 }
18945
18946 vm_map_lock_read(map);
18947
18948 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
18949 /*
18950 * Must pass a valid non-submap address.
18951 */
18952 vm_map_unlock_read(map);
18953 return KERN_INVALID_ADDRESS;
18954 }
18955
18956 if ((entry->protection & VM_PROT_WRITE) == 0 &&
18957 control != VM_PURGABLE_GET_STATE) {
18958 /*
18959 * Can't apply purgable controls to something you can't write.
18960 */
18961 vm_map_unlock_read(map);
18962 return KERN_PROTECTION_FAILURE;
18963 }
18964
18965 object = VME_OBJECT(entry);
18966 if (object == VM_OBJECT_NULL ||
18967 object->purgable == VM_PURGABLE_DENY) {
18968 /*
18969 * Object must already be present and be purgeable.
18970 */
18971 vm_map_unlock_read(map);
18972 return KERN_INVALID_ARGUMENT;
18973 }
18974
18975 vm_object_lock(object);
18976
18977 #if 00
18978 if (VME_OFFSET(entry) != 0 ||
18979 entry->vme_end - entry->vme_start != object->vo_size) {
18980 /*
18981 * Can only apply purgable controls to the whole (existing)
18982 * object at once.
18983 */
18984 vm_map_unlock_read(map);
18985 vm_object_unlock(object);
18986 return KERN_INVALID_ARGUMENT;
18987 }
18988 #endif
18989
18990 assert(!entry->is_sub_map);
18991 assert(!entry->use_pmap); /* purgeable has its own accounting */
18992
18993 vm_map_unlock_read(map);
18994
18995 was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
18996
18997 kr = vm_object_purgable_control(object, control, state);
18998
18999 if (was_nonvolatile &&
19000 object->purgable != VM_PURGABLE_NONVOLATILE &&
19001 map->pmap == kernel_pmap) {
19002 #if DEBUG
19003 object->vo_purgeable_volatilizer = kernel_task;
19004 #endif /* DEBUG */
19005 }
19006
19007 vm_object_unlock(object);
19008
19009 return kr;
19010 }
19011
19012 void
vm_map_footprint_query_page_info(vm_map_t map,vm_map_entry_t map_entry,vm_map_offset_t curr_s_offset,int * disposition_p)19013 vm_map_footprint_query_page_info(
19014 vm_map_t map,
19015 vm_map_entry_t map_entry,
19016 vm_map_offset_t curr_s_offset,
19017 int *disposition_p)
19018 {
19019 int pmap_disp;
19020 vm_object_t object = VM_OBJECT_NULL;
19021 int disposition;
19022 int effective_page_size;
19023
19024 vm_map_lock_assert_held(map);
19025 assert(!map->has_corpse_footprint);
19026 assert(curr_s_offset >= map_entry->vme_start);
19027 assert(curr_s_offset < map_entry->vme_end);
19028
19029 if (map_entry->is_sub_map) {
19030 if (!map_entry->use_pmap) {
19031 /* nested pmap: no footprint */
19032 *disposition_p = 0;
19033 return;
19034 }
19035 } else {
19036 object = VME_OBJECT(map_entry);
19037 if (object == VM_OBJECT_NULL) {
19038 /* nothing mapped here: no need to ask */
19039 *disposition_p = 0;
19040 return;
19041 }
19042 }
19043
19044 effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(map));
19045
19046 pmap_disp = 0;
19047
19048 /*
19049 * Query the pmap.
19050 */
19051 pmap_query_page_info(map->pmap, curr_s_offset, &pmap_disp);
19052
19053 /*
19054 * Compute this page's disposition.
19055 */
19056 disposition = 0;
19057
19058 /* deal with "alternate accounting" first */
19059 if (!map_entry->is_sub_map &&
19060 object->vo_no_footprint) {
19061 /* does not count in footprint */
19062 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
19063 } else if (!map_entry->is_sub_map &&
19064 (object->purgable == VM_PURGABLE_NONVOLATILE ||
19065 (object->purgable == VM_PURGABLE_DENY &&
19066 object->vo_ledger_tag)) &&
19067 VM_OBJECT_OWNER(object) != NULL &&
19068 VM_OBJECT_OWNER(object)->map == map) {
19069 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
19070 if ((((curr_s_offset
19071 - map_entry->vme_start
19072 + VME_OFFSET(map_entry))
19073 / effective_page_size) <
19074 (object->resident_page_count +
19075 vm_compressor_pager_get_count(object->pager)))) {
19076 /*
19077 * Non-volatile purgeable object owned
19078 * by this task: report the first
19079 * "#resident + #compressed" pages as
19080 * "resident" (to show that they
19081 * contribute to the footprint) but not
19082 * "dirty" (to avoid double-counting
19083 * with the fake "non-volatile" region
19084 * we'll report at the end of the
19085 * address space to account for all
19086 * (mapped or not) non-volatile memory
19087 * owned by this task.
19088 */
19089 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19090 }
19091 } else if (!map_entry->is_sub_map &&
19092 (object->purgable == VM_PURGABLE_VOLATILE ||
19093 object->purgable == VM_PURGABLE_EMPTY) &&
19094 VM_OBJECT_OWNER(object) != NULL &&
19095 VM_OBJECT_OWNER(object)->map == map) {
19096 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
19097 if ((((curr_s_offset
19098 - map_entry->vme_start
19099 + VME_OFFSET(map_entry))
19100 / effective_page_size) <
19101 object->wired_page_count)) {
19102 /*
19103 * Volatile|empty purgeable object owned
19104 * by this task: report the first
19105 * "#wired" pages as "resident" (to
19106 * show that they contribute to the
19107 * footprint) but not "dirty" (to avoid
19108 * double-counting with the fake
19109 * "non-volatile" region we'll report
19110 * at the end of the address space to
19111 * account for all (mapped or not)
19112 * non-volatile memory owned by this
19113 * task.
19114 */
19115 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19116 }
19117 } else if (!map_entry->is_sub_map &&
19118 map_entry->iokit_acct &&
19119 object->internal &&
19120 object->purgable == VM_PURGABLE_DENY) {
19121 /*
19122 * Non-purgeable IOKit memory: phys_footprint
19123 * includes the entire virtual mapping.
19124 */
19125 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
19126 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19127 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
19128 } else if (pmap_disp & (PMAP_QUERY_PAGE_ALTACCT |
19129 PMAP_QUERY_PAGE_COMPRESSED_ALTACCT)) {
19130 /* alternate accounting */
19131 #if __arm64__ && (DEVELOPMENT || DEBUG)
19132 if (map->pmap->footprint_was_suspended) {
19133 /*
19134 * The assertion below can fail if dyld
19135 * suspended footprint accounting
19136 * while doing some adjustments to
19137 * this page; the mapping would say
19138 * "use pmap accounting" but the page
19139 * would be marked "alternate
19140 * accounting".
19141 */
19142 } else
19143 #endif /* __arm64__ && (DEVELOPMENT || DEBUG) */
19144 {
19145 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
19146 }
19147 disposition = 0;
19148 } else {
19149 if (pmap_disp & PMAP_QUERY_PAGE_PRESENT) {
19150 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
19151 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19152 disposition |= VM_PAGE_QUERY_PAGE_REF;
19153 if (pmap_disp & PMAP_QUERY_PAGE_INTERNAL) {
19154 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
19155 } else {
19156 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
19157 }
19158 if (pmap_disp & PMAP_QUERY_PAGE_REUSABLE) {
19159 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
19160 }
19161 } else if (pmap_disp & PMAP_QUERY_PAGE_COMPRESSED) {
19162 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
19163 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
19164 }
19165 }
19166
19167 *disposition_p = disposition;
19168 }
19169
19170 kern_return_t
vm_map_page_query_internal(vm_map_t target_map,vm_map_offset_t offset,int * disposition,int * ref_count)19171 vm_map_page_query_internal(
19172 vm_map_t target_map,
19173 vm_map_offset_t offset,
19174 int *disposition,
19175 int *ref_count)
19176 {
19177 kern_return_t kr;
19178 vm_page_info_basic_data_t info;
19179 mach_msg_type_number_t count;
19180
19181 count = VM_PAGE_INFO_BASIC_COUNT;
19182 kr = vm_map_page_info(target_map,
19183 offset,
19184 VM_PAGE_INFO_BASIC,
19185 (vm_page_info_t) &info,
19186 &count);
19187 if (kr == KERN_SUCCESS) {
19188 *disposition = info.disposition;
19189 *ref_count = info.ref_count;
19190 } else {
19191 *disposition = 0;
19192 *ref_count = 0;
19193 }
19194
19195 return kr;
19196 }
19197
19198 kern_return_t
vm_map_page_info(vm_map_t map,vm_map_offset_t offset,vm_page_info_flavor_t flavor,vm_page_info_t info,mach_msg_type_number_t * count)19199 vm_map_page_info(
19200 vm_map_t map,
19201 vm_map_offset_t offset,
19202 vm_page_info_flavor_t flavor,
19203 vm_page_info_t info,
19204 mach_msg_type_number_t *count)
19205 {
19206 return vm_map_page_range_info_internal(map,
19207 offset, /* start of range */
19208 (offset + 1), /* this will get rounded in the call to the page boundary */
19209 (int)-1, /* effective_page_shift: unspecified */
19210 flavor,
19211 info,
19212 count);
19213 }
19214
19215 kern_return_t
vm_map_page_range_info_internal(vm_map_t map,vm_map_offset_t start_offset,vm_map_offset_t end_offset,int effective_page_shift,vm_page_info_flavor_t flavor,vm_page_info_t info,mach_msg_type_number_t * count)19216 vm_map_page_range_info_internal(
19217 vm_map_t map,
19218 vm_map_offset_t start_offset,
19219 vm_map_offset_t end_offset,
19220 int effective_page_shift,
19221 vm_page_info_flavor_t flavor,
19222 vm_page_info_t info,
19223 mach_msg_type_number_t *count)
19224 {
19225 vm_map_entry_t map_entry = VM_MAP_ENTRY_NULL;
19226 vm_object_t object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
19227 vm_page_t m = VM_PAGE_NULL;
19228 kern_return_t retval = KERN_SUCCESS;
19229 int disposition = 0;
19230 int ref_count = 0;
19231 int depth = 0, info_idx = 0;
19232 vm_page_info_basic_t basic_info = 0;
19233 vm_map_offset_t offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
19234 vm_map_offset_t start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
19235 boolean_t do_region_footprint;
19236 ledger_amount_t ledger_resident, ledger_compressed;
19237 int effective_page_size;
19238 vm_map_offset_t effective_page_mask;
19239
19240 switch (flavor) {
19241 case VM_PAGE_INFO_BASIC:
19242 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
19243 /*
19244 * The "vm_page_info_basic_data" structure was not
19245 * properly padded, so allow the size to be off by
19246 * one to maintain backwards binary compatibility...
19247 */
19248 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1) {
19249 return KERN_INVALID_ARGUMENT;
19250 }
19251 }
19252 break;
19253 default:
19254 return KERN_INVALID_ARGUMENT;
19255 }
19256
19257 if (effective_page_shift == -1) {
19258 effective_page_shift = vm_self_region_page_shift_safely(map);
19259 if (effective_page_shift == -1) {
19260 return KERN_INVALID_ARGUMENT;
19261 }
19262 }
19263 effective_page_size = (1 << effective_page_shift);
19264 effective_page_mask = effective_page_size - 1;
19265
19266 do_region_footprint = task_self_region_footprint();
19267 disposition = 0;
19268 ref_count = 0;
19269 depth = 0;
19270 info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
19271 retval = KERN_SUCCESS;
19272
19273 offset_in_page = start_offset & effective_page_mask;
19274 start = vm_map_trunc_page(start_offset, effective_page_mask);
19275 end = vm_map_round_page(end_offset, effective_page_mask);
19276
19277 if (end < start) {
19278 return KERN_INVALID_ARGUMENT;
19279 }
19280
19281 assert((end - start) <= MAX_PAGE_RANGE_QUERY);
19282
19283 vm_map_lock_read(map);
19284
19285 task_ledgers_footprint(map->pmap->ledger, &ledger_resident, &ledger_compressed);
19286
19287 for (curr_s_offset = start; curr_s_offset < end;) {
19288 /*
19289 * New lookup needs reset of these variables.
19290 */
19291 curr_object = object = VM_OBJECT_NULL;
19292 offset_in_object = 0;
19293 ref_count = 0;
19294 depth = 0;
19295
19296 if (do_region_footprint &&
19297 curr_s_offset >= vm_map_last_entry(map)->vme_end) {
19298 /*
19299 * Request for "footprint" info about a page beyond
19300 * the end of address space: this must be for
19301 * the fake region vm_map_region_recurse_64()
19302 * reported to account for non-volatile purgeable
19303 * memory owned by this task.
19304 */
19305 disposition = 0;
19306
19307 if (curr_s_offset - vm_map_last_entry(map)->vme_end <=
19308 (unsigned) ledger_compressed) {
19309 /*
19310 * We haven't reported all the "non-volatile
19311 * compressed" pages yet, so report this fake
19312 * page as "compressed".
19313 */
19314 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
19315 } else {
19316 /*
19317 * We've reported all the non-volatile
19318 * compressed page but not all the non-volatile
19319 * pages , so report this fake page as
19320 * "resident dirty".
19321 */
19322 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19323 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
19324 disposition |= VM_PAGE_QUERY_PAGE_REF;
19325 }
19326 switch (flavor) {
19327 case VM_PAGE_INFO_BASIC:
19328 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19329 basic_info->disposition = disposition;
19330 basic_info->ref_count = 1;
19331 basic_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
19332 basic_info->offset = 0;
19333 basic_info->depth = 0;
19334
19335 info_idx++;
19336 break;
19337 }
19338 curr_s_offset += effective_page_size;
19339 continue;
19340 }
19341
19342 /*
19343 * First, find the map entry covering "curr_s_offset", going down
19344 * submaps if necessary.
19345 */
19346 if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
19347 /* no entry -> no object -> no page */
19348
19349 if (curr_s_offset < vm_map_min(map)) {
19350 /*
19351 * Illegal address that falls below map min.
19352 */
19353 curr_e_offset = MIN(end, vm_map_min(map));
19354 } else if (curr_s_offset >= vm_map_max(map)) {
19355 /*
19356 * Illegal address that falls on/after map max.
19357 */
19358 curr_e_offset = end;
19359 } else if (map_entry == vm_map_to_entry(map)) {
19360 /*
19361 * Hit a hole.
19362 */
19363 if (map_entry->vme_next == vm_map_to_entry(map)) {
19364 /*
19365 * Empty map.
19366 */
19367 curr_e_offset = MIN(map->max_offset, end);
19368 } else {
19369 /*
19370 * Hole at start of the map.
19371 */
19372 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
19373 }
19374 } else {
19375 if (map_entry->vme_next == vm_map_to_entry(map)) {
19376 /*
19377 * Hole at the end of the map.
19378 */
19379 curr_e_offset = MIN(map->max_offset, end);
19380 } else {
19381 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
19382 }
19383 }
19384
19385 assert(curr_e_offset >= curr_s_offset);
19386
19387 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> effective_page_shift;
19388
19389 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19390
19391 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
19392
19393 curr_s_offset = curr_e_offset;
19394
19395 info_idx += num_pages;
19396
19397 continue;
19398 }
19399
19400 /* compute offset from this map entry's start */
19401 offset_in_object = curr_s_offset - map_entry->vme_start;
19402
19403 /* compute offset into this map entry's object (or submap) */
19404 offset_in_object += VME_OFFSET(map_entry);
19405
19406 if (map_entry->is_sub_map) {
19407 vm_map_t sub_map = VM_MAP_NULL;
19408 vm_page_info_t submap_info = 0;
19409 vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
19410
19411 range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
19412
19413 submap_s_offset = offset_in_object;
19414 submap_e_offset = submap_s_offset + range_len;
19415
19416 sub_map = VME_SUBMAP(map_entry);
19417
19418 vm_map_reference(sub_map);
19419 vm_map_unlock_read(map);
19420
19421 submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19422
19423 assertf(VM_MAP_PAGE_SHIFT(sub_map) >= VM_MAP_PAGE_SHIFT(map),
19424 "Submap page size (%d) differs from current map (%d)\n", VM_MAP_PAGE_SIZE(sub_map), VM_MAP_PAGE_SIZE(map));
19425
19426 retval = vm_map_page_range_info_internal(sub_map,
19427 submap_s_offset,
19428 submap_e_offset,
19429 effective_page_shift,
19430 VM_PAGE_INFO_BASIC,
19431 (vm_page_info_t) submap_info,
19432 count);
19433
19434 assert(retval == KERN_SUCCESS);
19435
19436 vm_map_lock_read(map);
19437 vm_map_deallocate(sub_map);
19438
19439 /* Move the "info" index by the number of pages we inspected.*/
19440 info_idx += range_len >> effective_page_shift;
19441
19442 /* Move our current offset by the size of the range we inspected.*/
19443 curr_s_offset += range_len;
19444
19445 continue;
19446 }
19447
19448 object = VME_OBJECT(map_entry);
19449
19450 if (object == VM_OBJECT_NULL) {
19451 /*
19452 * We don't have an object here and, hence,
19453 * no pages to inspect. We'll fill up the
19454 * info structure appropriately.
19455 */
19456
19457 curr_e_offset = MIN(map_entry->vme_end, end);
19458
19459 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> effective_page_shift;
19460
19461 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19462
19463 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
19464
19465 curr_s_offset = curr_e_offset;
19466
19467 info_idx += num_pages;
19468
19469 continue;
19470 }
19471
19472 if (do_region_footprint) {
19473 disposition = 0;
19474 if (map->has_corpse_footprint) {
19475 /*
19476 * Query the page info data we saved
19477 * while forking the corpse.
19478 */
19479 vm_map_corpse_footprint_query_page_info(
19480 map,
19481 curr_s_offset,
19482 &disposition);
19483 } else {
19484 /*
19485 * Query the live pmap for footprint info
19486 * about this page.
19487 */
19488 vm_map_footprint_query_page_info(
19489 map,
19490 map_entry,
19491 curr_s_offset,
19492 &disposition);
19493 }
19494 switch (flavor) {
19495 case VM_PAGE_INFO_BASIC:
19496 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19497 basic_info->disposition = disposition;
19498 basic_info->ref_count = 1;
19499 basic_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
19500 basic_info->offset = 0;
19501 basic_info->depth = 0;
19502
19503 info_idx++;
19504 break;
19505 }
19506 curr_s_offset += effective_page_size;
19507 continue;
19508 }
19509
19510 vm_object_reference(object);
19511 /*
19512 * Shared mode -- so we can allow other readers
19513 * to grab the lock too.
19514 */
19515 vm_object_lock_shared(object);
19516
19517 curr_e_offset = MIN(map_entry->vme_end, end);
19518
19519 vm_map_unlock_read(map);
19520
19521 map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
19522
19523 curr_object = object;
19524
19525 for (; curr_s_offset < curr_e_offset;) {
19526 if (object == curr_object) {
19527 ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
19528 } else {
19529 ref_count = curr_object->ref_count;
19530 }
19531
19532 curr_offset_in_object = offset_in_object;
19533
19534 for (;;) {
19535 m = vm_page_lookup(curr_object, vm_object_trunc_page(curr_offset_in_object));
19536
19537 if (m != VM_PAGE_NULL) {
19538 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19539 break;
19540 } else {
19541 if (curr_object->internal &&
19542 curr_object->alive &&
19543 !curr_object->terminating &&
19544 curr_object->pager_ready) {
19545 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, vm_object_trunc_page(curr_offset_in_object))
19546 == VM_EXTERNAL_STATE_EXISTS) {
19547 /* the pager has that page */
19548 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
19549 break;
19550 }
19551 }
19552
19553 /*
19554 * Go down the VM object shadow chain until we find the page
19555 * we're looking for.
19556 */
19557
19558 if (curr_object->shadow != VM_OBJECT_NULL) {
19559 vm_object_t shadow = VM_OBJECT_NULL;
19560
19561 curr_offset_in_object += curr_object->vo_shadow_offset;
19562 shadow = curr_object->shadow;
19563
19564 vm_object_lock_shared(shadow);
19565 vm_object_unlock(curr_object);
19566
19567 curr_object = shadow;
19568 depth++;
19569 continue;
19570 } else {
19571 break;
19572 }
19573 }
19574 }
19575
19576 /* The ref_count is not strictly accurate, it measures the number */
19577 /* of entities holding a ref on the object, they may not be mapping */
19578 /* the object or may not be mapping the section holding the */
19579 /* target page but its still a ball park number and though an over- */
19580 /* count, it picks up the copy-on-write cases */
19581
19582 /* We could also get a picture of page sharing from pmap_attributes */
19583 /* but this would under count as only faulted-in mappings would */
19584 /* show up. */
19585
19586 if ((curr_object == object) && curr_object->shadow) {
19587 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
19588 }
19589
19590 if (!curr_object->internal) {
19591 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
19592 }
19593
19594 if (m != VM_PAGE_NULL) {
19595 if (m->vmp_fictitious) {
19596 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
19597 } else {
19598 if (m->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m))) {
19599 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
19600 }
19601
19602 if (m->vmp_reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m))) {
19603 disposition |= VM_PAGE_QUERY_PAGE_REF;
19604 }
19605
19606 if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
19607 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
19608 }
19609
19610 /*
19611 * XXX TODO4K:
19612 * when this routine deals with 4k
19613 * pages, check the appropriate CS bit
19614 * here.
19615 */
19616 if (m->vmp_cs_validated) {
19617 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
19618 }
19619 if (m->vmp_cs_tainted) {
19620 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
19621 }
19622 if (m->vmp_cs_nx) {
19623 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
19624 }
19625 if (m->vmp_reusable || curr_object->all_reusable) {
19626 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
19627 }
19628 }
19629 }
19630
19631 switch (flavor) {
19632 case VM_PAGE_INFO_BASIC:
19633 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19634 basic_info->disposition = disposition;
19635 basic_info->ref_count = ref_count;
19636 basic_info->object_id = (vm_object_id_t) (uintptr_t)
19637 VM_KERNEL_ADDRPERM(curr_object);
19638 basic_info->offset =
19639 (memory_object_offset_t) curr_offset_in_object + offset_in_page;
19640 basic_info->depth = depth;
19641
19642 info_idx++;
19643 break;
19644 }
19645
19646 disposition = 0;
19647 offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
19648
19649 /*
19650 * Move to next offset in the range and in our object.
19651 */
19652 curr_s_offset += effective_page_size;
19653 offset_in_object += effective_page_size;
19654 curr_offset_in_object = offset_in_object;
19655
19656 if (curr_object != object) {
19657 vm_object_unlock(curr_object);
19658
19659 curr_object = object;
19660
19661 vm_object_lock_shared(curr_object);
19662 } else {
19663 vm_object_lock_yield_shared(curr_object);
19664 }
19665 }
19666
19667 vm_object_unlock(curr_object);
19668 vm_object_deallocate(curr_object);
19669
19670 vm_map_lock_read(map);
19671 }
19672
19673 vm_map_unlock_read(map);
19674 return retval;
19675 }
19676
19677 /*
19678 * vm_map_msync
19679 *
19680 * Synchronises the memory range specified with its backing store
19681 * image by either flushing or cleaning the contents to the appropriate
19682 * memory manager engaging in a memory object synchronize dialog with
19683 * the manager. The client doesn't return until the manager issues
19684 * m_o_s_completed message. MIG Magically converts user task parameter
19685 * to the task's address map.
19686 *
19687 * interpretation of sync_flags
19688 * VM_SYNC_INVALIDATE - discard pages, only return precious
19689 * pages to manager.
19690 *
19691 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
19692 * - discard pages, write dirty or precious
19693 * pages back to memory manager.
19694 *
19695 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
19696 * - write dirty or precious pages back to
19697 * the memory manager.
19698 *
19699 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
19700 * is a hole in the region, and we would
19701 * have returned KERN_SUCCESS, return
19702 * KERN_INVALID_ADDRESS instead.
19703 *
19704 * NOTE
19705 * The memory object attributes have not yet been implemented, this
19706 * function will have to deal with the invalidate attribute
19707 *
19708 * RETURNS
19709 * KERN_INVALID_TASK Bad task parameter
19710 * KERN_INVALID_ARGUMENT both sync and async were specified.
19711 * KERN_SUCCESS The usual.
19712 * KERN_INVALID_ADDRESS There was a hole in the region.
19713 */
19714
19715 kern_return_t
vm_map_msync(vm_map_t map,vm_map_address_t address,vm_map_size_t size,vm_sync_t sync_flags)19716 vm_map_msync(
19717 vm_map_t map,
19718 vm_map_address_t address,
19719 vm_map_size_t size,
19720 vm_sync_t sync_flags)
19721 {
19722 vm_map_entry_t entry;
19723 vm_map_size_t amount_left;
19724 vm_object_offset_t offset;
19725 vm_object_offset_t start_offset, end_offset;
19726 boolean_t do_sync_req;
19727 boolean_t had_hole = FALSE;
19728 vm_map_offset_t pmap_offset;
19729
19730 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
19731 (sync_flags & VM_SYNC_SYNCHRONOUS)) {
19732 return KERN_INVALID_ARGUMENT;
19733 }
19734
19735 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19736 DEBUG4K_SHARE("map %p address 0x%llx size 0x%llx flags 0x%x\n", map, (uint64_t)address, (uint64_t)size, sync_flags);
19737 }
19738
19739 /*
19740 * align address and size on page boundaries
19741 */
19742 size = (vm_map_round_page(address + size,
19743 VM_MAP_PAGE_MASK(map)) -
19744 vm_map_trunc_page(address,
19745 VM_MAP_PAGE_MASK(map)));
19746 address = vm_map_trunc_page(address,
19747 VM_MAP_PAGE_MASK(map));
19748
19749 if (map == VM_MAP_NULL) {
19750 return KERN_INVALID_TASK;
19751 }
19752
19753 if (size == 0) {
19754 return KERN_SUCCESS;
19755 }
19756
19757 amount_left = size;
19758
19759 while (amount_left > 0) {
19760 vm_object_size_t flush_size;
19761 vm_object_t object;
19762
19763 vm_map_lock(map);
19764 if (!vm_map_lookup_entry(map,
19765 address,
19766 &entry)) {
19767 vm_map_size_t skip;
19768
19769 /*
19770 * hole in the address map.
19771 */
19772 had_hole = TRUE;
19773
19774 if (sync_flags & VM_SYNC_KILLPAGES) {
19775 /*
19776 * For VM_SYNC_KILLPAGES, there should be
19777 * no holes in the range, since we couldn't
19778 * prevent someone else from allocating in
19779 * that hole and we wouldn't want to "kill"
19780 * their pages.
19781 */
19782 vm_map_unlock(map);
19783 break;
19784 }
19785
19786 /*
19787 * Check for empty map.
19788 */
19789 if (entry == vm_map_to_entry(map) &&
19790 entry->vme_next == entry) {
19791 vm_map_unlock(map);
19792 break;
19793 }
19794 /*
19795 * Check that we don't wrap and that
19796 * we have at least one real map entry.
19797 */
19798 if ((map->hdr.nentries == 0) ||
19799 (entry->vme_next->vme_start < address)) {
19800 vm_map_unlock(map);
19801 break;
19802 }
19803 /*
19804 * Move up to the next entry if needed
19805 */
19806 skip = (entry->vme_next->vme_start - address);
19807 if (skip >= amount_left) {
19808 amount_left = 0;
19809 } else {
19810 amount_left -= skip;
19811 }
19812 address = entry->vme_next->vme_start;
19813 vm_map_unlock(map);
19814 continue;
19815 }
19816
19817 offset = address - entry->vme_start;
19818 pmap_offset = address;
19819
19820 /*
19821 * do we have more to flush than is contained in this
19822 * entry ?
19823 */
19824 if (amount_left + entry->vme_start + offset > entry->vme_end) {
19825 flush_size = entry->vme_end -
19826 (entry->vme_start + offset);
19827 } else {
19828 flush_size = amount_left;
19829 }
19830 amount_left -= flush_size;
19831 address += flush_size;
19832
19833 if (entry->is_sub_map == TRUE) {
19834 vm_map_t local_map;
19835 vm_map_offset_t local_offset;
19836
19837 local_map = VME_SUBMAP(entry);
19838 local_offset = VME_OFFSET(entry);
19839 vm_map_reference(local_map);
19840 vm_map_unlock(map);
19841 if (vm_map_msync(
19842 local_map,
19843 local_offset,
19844 flush_size,
19845 sync_flags) == KERN_INVALID_ADDRESS) {
19846 had_hole = TRUE;
19847 }
19848 vm_map_deallocate(local_map);
19849 continue;
19850 }
19851 object = VME_OBJECT(entry);
19852
19853 /*
19854 * We can't sync this object if the object has not been
19855 * created yet
19856 */
19857 if (object == VM_OBJECT_NULL) {
19858 vm_map_unlock(map);
19859 continue;
19860 }
19861 offset += VME_OFFSET(entry);
19862
19863 vm_object_lock(object);
19864
19865 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
19866 int kill_pages = 0;
19867 boolean_t reusable_pages = FALSE;
19868
19869 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19870 /*
19871 * This is a destructive operation and so we
19872 * err on the side of limiting the range of
19873 * the operation.
19874 */
19875 start_offset = vm_object_round_page(offset);
19876 end_offset = vm_object_trunc_page(offset + flush_size);
19877
19878 if (end_offset <= start_offset) {
19879 vm_object_unlock(object);
19880 vm_map_unlock(map);
19881 continue;
19882 }
19883
19884 pmap_offset += start_offset - offset;
19885 } else {
19886 start_offset = offset;
19887 end_offset = offset + flush_size;
19888 }
19889
19890 if (sync_flags & VM_SYNC_KILLPAGES) {
19891 if (((object->ref_count == 1) ||
19892 ((object->copy_strategy !=
19893 MEMORY_OBJECT_COPY_SYMMETRIC) &&
19894 (object->copy == VM_OBJECT_NULL))) &&
19895 (object->shadow == VM_OBJECT_NULL)) {
19896 if (object->ref_count != 1) {
19897 vm_page_stats_reusable.free_shared++;
19898 }
19899 kill_pages = 1;
19900 } else {
19901 kill_pages = -1;
19902 }
19903 }
19904 if (kill_pages != -1) {
19905 vm_object_deactivate_pages(
19906 object,
19907 start_offset,
19908 (vm_object_size_t) (end_offset - start_offset),
19909 kill_pages,
19910 reusable_pages,
19911 map->pmap,
19912 pmap_offset);
19913 }
19914 vm_object_unlock(object);
19915 vm_map_unlock(map);
19916 continue;
19917 }
19918 /*
19919 * We can't sync this object if there isn't a pager.
19920 * Don't bother to sync internal objects, since there can't
19921 * be any "permanent" storage for these objects anyway.
19922 */
19923 if ((object->pager == MEMORY_OBJECT_NULL) ||
19924 (object->internal) || (object->private)) {
19925 vm_object_unlock(object);
19926 vm_map_unlock(map);
19927 continue;
19928 }
19929 /*
19930 * keep reference on the object until syncing is done
19931 */
19932 vm_object_reference_locked(object);
19933 vm_object_unlock(object);
19934
19935 vm_map_unlock(map);
19936
19937 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19938 start_offset = vm_object_trunc_page(offset);
19939 end_offset = vm_object_round_page(offset + flush_size);
19940 } else {
19941 start_offset = offset;
19942 end_offset = offset + flush_size;
19943 }
19944
19945 do_sync_req = vm_object_sync(object,
19946 start_offset,
19947 (end_offset - start_offset),
19948 sync_flags & VM_SYNC_INVALIDATE,
19949 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
19950 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
19951 sync_flags & VM_SYNC_SYNCHRONOUS);
19952
19953 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
19954 /*
19955 * clear out the clustering and read-ahead hints
19956 */
19957 vm_object_lock(object);
19958
19959 object->pages_created = 0;
19960 object->pages_used = 0;
19961 object->sequential = 0;
19962 object->last_alloc = 0;
19963
19964 vm_object_unlock(object);
19965 }
19966 vm_object_deallocate(object);
19967 } /* while */
19968
19969 /* for proper msync() behaviour */
19970 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS)) {
19971 return KERN_INVALID_ADDRESS;
19972 }
19973
19974 return KERN_SUCCESS;
19975 }/* vm_msync */
19976
19977 void
vm_named_entry_associate_vm_object(vm_named_entry_t named_entry,vm_object_t object,vm_object_offset_t offset,vm_object_size_t size,vm_prot_t prot)19978 vm_named_entry_associate_vm_object(
19979 vm_named_entry_t named_entry,
19980 vm_object_t object,
19981 vm_object_offset_t offset,
19982 vm_object_size_t size,
19983 vm_prot_t prot)
19984 {
19985 vm_map_copy_t copy;
19986 vm_map_entry_t copy_entry;
19987
19988 assert(!named_entry->is_sub_map);
19989 assert(!named_entry->is_copy);
19990 assert(!named_entry->is_object);
19991 assert(!named_entry->internal);
19992 assert(named_entry->backing.copy == VM_MAP_COPY_NULL);
19993
19994 copy = vm_map_copy_allocate();
19995 copy->type = VM_MAP_COPY_ENTRY_LIST;
19996 copy->offset = offset;
19997 copy->size = size;
19998 copy->cpy_hdr.page_shift = (uint16_t)PAGE_SHIFT;
19999 vm_map_store_init(©->cpy_hdr);
20000
20001 copy_entry = vm_map_copy_entry_create(copy);
20002 copy_entry->protection = prot;
20003 copy_entry->max_protection = prot;
20004 copy_entry->use_pmap = TRUE;
20005 copy_entry->vme_start = VM_MAP_TRUNC_PAGE(offset, PAGE_MASK);
20006 copy_entry->vme_end = VM_MAP_ROUND_PAGE(offset + size, PAGE_MASK);
20007 VME_OBJECT_SET(copy_entry, object, false, 0);
20008 VME_OFFSET_SET(copy_entry, vm_object_trunc_page(offset));
20009 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy), copy_entry);
20010
20011 named_entry->backing.copy = copy;
20012 named_entry->is_object = TRUE;
20013 if (object->internal) {
20014 named_entry->internal = TRUE;
20015 }
20016
20017 DEBUG4K_MEMENTRY("named_entry %p copy %p object %p offset 0x%llx size 0x%llx prot 0x%x\n",
20018 named_entry, copy, object, offset, size, prot);
20019 }
20020
20021 vm_object_t
vm_named_entry_to_vm_object(vm_named_entry_t named_entry)20022 vm_named_entry_to_vm_object(
20023 vm_named_entry_t named_entry)
20024 {
20025 vm_map_copy_t copy;
20026 vm_map_entry_t copy_entry;
20027 vm_object_t object;
20028
20029 assert(!named_entry->is_sub_map);
20030 assert(!named_entry->is_copy);
20031 assert(named_entry->is_object);
20032 copy = named_entry->backing.copy;
20033 assert(copy != VM_MAP_COPY_NULL);
20034 /*
20035 * Assert that the vm_map_copy is coming from the right
20036 * zone and hasn't been forged
20037 */
20038 vm_map_copy_require(copy);
20039 assert(copy->cpy_hdr.nentries == 1);
20040 copy_entry = vm_map_copy_first_entry(copy);
20041 object = VME_OBJECT(copy_entry);
20042
20043 DEBUG4K_MEMENTRY("%p -> %p -> %p [0x%llx 0x%llx 0x%llx 0x%x/0x%x ] -> %p offset 0x%llx size 0x%llx prot 0x%x\n", named_entry, copy, copy_entry, (uint64_t)copy_entry->vme_start, (uint64_t)copy_entry->vme_end, copy_entry->vme_offset, copy_entry->protection, copy_entry->max_protection, object, named_entry->offset, named_entry->size, named_entry->protection);
20044
20045 return object;
20046 }
20047
20048 /*
20049 * Routine: convert_port_entry_to_map
20050 * Purpose:
20051 * Convert from a port specifying an entry or a task
20052 * to a map. Doesn't consume the port ref; produces a map ref,
20053 * which may be null. Unlike convert_port_to_map, the
20054 * port may be task or a named entry backed.
20055 * Conditions:
20056 * Nothing locked.
20057 */
20058
20059 vm_map_t
convert_port_entry_to_map(ipc_port_t port)20060 convert_port_entry_to_map(
20061 ipc_port_t port)
20062 {
20063 vm_map_t map = VM_MAP_NULL;
20064 vm_named_entry_t named_entry;
20065
20066 if (!IP_VALID(port)) {
20067 return VM_MAP_NULL;
20068 }
20069
20070 if (ip_kotype(port) != IKOT_NAMED_ENTRY) {
20071 return convert_port_to_map(port);
20072 }
20073
20074 named_entry = mach_memory_entry_from_port(port);
20075
20076 if ((named_entry->is_sub_map) &&
20077 (named_entry->protection & VM_PROT_WRITE)) {
20078 map = named_entry->backing.map;
20079 if (map->pmap != PMAP_NULL) {
20080 if (map->pmap == kernel_pmap) {
20081 panic("userspace has access "
20082 "to a kernel map %p", map);
20083 }
20084 pmap_require(map->pmap);
20085 }
20086 vm_map_reference(map);
20087 }
20088
20089 return map;
20090 }
20091
20092 /*
20093 * Export routines to other components for the things we access locally through
20094 * macros.
20095 */
20096 #undef current_map
20097 vm_map_t
current_map(void)20098 current_map(void)
20099 {
20100 return current_map_fast();
20101 }
20102
20103 /*
20104 * vm_map_reference:
20105 *
20106 * Takes a reference on the specified map.
20107 */
20108 void
vm_map_reference(vm_map_t map)20109 vm_map_reference(
20110 vm_map_t map)
20111 {
20112 if (__probable(map != VM_MAP_NULL)) {
20113 vm_map_require(map);
20114 os_ref_retain_raw(&map->map_refcnt, &map_refgrp);
20115 }
20116 }
20117
20118 /*
20119 * vm_map_deallocate:
20120 *
20121 * Removes a reference from the specified map,
20122 * destroying it if no references remain.
20123 * The map should not be locked.
20124 */
20125 void
vm_map_deallocate(vm_map_t map)20126 vm_map_deallocate(
20127 vm_map_t map)
20128 {
20129 if (__probable(map != VM_MAP_NULL)) {
20130 vm_map_require(map);
20131 if (os_ref_release_raw(&map->map_refcnt, &map_refgrp) == 0) {
20132 vm_map_destroy(map);
20133 }
20134 }
20135 }
20136
20137 void
vm_map_inspect_deallocate(vm_map_inspect_t map)20138 vm_map_inspect_deallocate(
20139 vm_map_inspect_t map)
20140 {
20141 vm_map_deallocate((vm_map_t)map);
20142 }
20143
20144 void
vm_map_read_deallocate(vm_map_read_t map)20145 vm_map_read_deallocate(
20146 vm_map_read_t map)
20147 {
20148 vm_map_deallocate((vm_map_t)map);
20149 }
20150
20151
20152 void
vm_map_disable_NX(vm_map_t map)20153 vm_map_disable_NX(vm_map_t map)
20154 {
20155 if (map == NULL) {
20156 return;
20157 }
20158 if (map->pmap == NULL) {
20159 return;
20160 }
20161
20162 pmap_disable_NX(map->pmap);
20163 }
20164
20165 void
vm_map_disallow_data_exec(vm_map_t map)20166 vm_map_disallow_data_exec(vm_map_t map)
20167 {
20168 if (map == NULL) {
20169 return;
20170 }
20171
20172 map->map_disallow_data_exec = TRUE;
20173 }
20174
20175 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
20176 * more descriptive.
20177 */
20178 void
vm_map_set_32bit(vm_map_t map)20179 vm_map_set_32bit(vm_map_t map)
20180 {
20181 #if defined(__arm64__)
20182 map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
20183 #else
20184 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
20185 #endif
20186 }
20187
20188
20189 void
vm_map_set_64bit(vm_map_t map)20190 vm_map_set_64bit(vm_map_t map)
20191 {
20192 #if defined(__arm64__)
20193 map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
20194 #else
20195 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
20196 #endif
20197 }
20198
20199 /*
20200 * Expand the maximum size of an existing map to the maximum supported.
20201 */
20202 void
vm_map_set_jumbo(vm_map_t map)20203 vm_map_set_jumbo(vm_map_t map)
20204 {
20205 #if defined (__arm64__) && !XNU_TARGET_OS_OSX
20206 vm_map_set_max_addr(map, ~0);
20207 #else /* arm64 */
20208 (void) map;
20209 #endif
20210 }
20211
20212 /*
20213 * This map has a JIT entitlement
20214 */
20215 void
vm_map_set_jit_entitled(vm_map_t map)20216 vm_map_set_jit_entitled(vm_map_t map)
20217 {
20218 #if defined (__arm64__)
20219 pmap_set_jit_entitled(map->pmap);
20220 #else /* arm64 */
20221 (void) map;
20222 #endif
20223 }
20224
20225 /*
20226 * This map has TPRO enabled
20227 */
20228 void
vm_map_set_tpro(vm_map_t map)20229 vm_map_set_tpro(vm_map_t map)
20230 {
20231 #if defined (__arm64e__)
20232 pmap_set_tpro(map->pmap);
20233 #else /* arm64e */
20234 (void) map;
20235 #endif
20236 }
20237
20238 /*
20239 * Expand the maximum size of an existing map.
20240 */
20241 void
vm_map_set_max_addr(vm_map_t map,vm_map_offset_t new_max_offset)20242 vm_map_set_max_addr(vm_map_t map, vm_map_offset_t new_max_offset)
20243 {
20244 #if defined(__arm64__)
20245 vm_map_offset_t max_supported_offset;
20246 vm_map_offset_t old_max_offset;
20247
20248 vm_map_lock(map);
20249
20250 old_max_offset = map->max_offset;
20251 max_supported_offset = pmap_max_offset(vm_map_is_64bit(map), ARM_PMAP_MAX_OFFSET_JUMBO);
20252
20253 new_max_offset = trunc_page(new_max_offset);
20254
20255 /* The address space cannot be shrunk using this routine. */
20256 if (old_max_offset >= new_max_offset) {
20257 vm_map_unlock(map);
20258 return;
20259 }
20260
20261 if (max_supported_offset < new_max_offset) {
20262 new_max_offset = max_supported_offset;
20263 }
20264
20265 map->max_offset = new_max_offset;
20266
20267 if (map->holelistenabled) {
20268 if (map->holes_list->prev->vme_end == old_max_offset) {
20269 /*
20270 * There is already a hole at the end of the map; simply make it bigger.
20271 */
20272 map->holes_list->prev->vme_end = map->max_offset;
20273 } else {
20274 /*
20275 * There is no hole at the end, so we need to create a new hole
20276 * for the new empty space we're creating.
20277 */
20278 struct vm_map_links *new_hole;
20279
20280 new_hole = zalloc_id(ZONE_ID_VM_MAP_HOLES, Z_WAITOK | Z_NOFAIL);
20281 new_hole->start = old_max_offset;
20282 new_hole->end = map->max_offset;
20283 new_hole->prev = map->holes_list->prev;
20284 new_hole->next = (struct vm_map_entry *)map->holes_list;
20285 map->holes_list->prev->links.next = (struct vm_map_entry *)new_hole;
20286 map->holes_list->prev = (struct vm_map_entry *)new_hole;
20287 }
20288 }
20289
20290 vm_map_unlock(map);
20291 #else
20292 (void)map;
20293 (void)new_max_offset;
20294 #endif
20295 }
20296
20297 vm_map_offset_t
vm_compute_max_offset(boolean_t is64)20298 vm_compute_max_offset(boolean_t is64)
20299 {
20300 #if defined(__arm64__)
20301 return pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE);
20302 #else
20303 return is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS;
20304 #endif
20305 }
20306
20307 void
vm_map_get_max_aslr_slide_section(vm_map_t map __unused,int64_t * max_sections,int64_t * section_size)20308 vm_map_get_max_aslr_slide_section(
20309 vm_map_t map __unused,
20310 int64_t *max_sections,
20311 int64_t *section_size)
20312 {
20313 #if defined(__arm64__)
20314 *max_sections = 3;
20315 *section_size = ARM_TT_TWIG_SIZE;
20316 #else
20317 *max_sections = 1;
20318 *section_size = 0;
20319 #endif
20320 }
20321
20322 uint64_t
vm_map_get_max_aslr_slide_pages(vm_map_t map)20323 vm_map_get_max_aslr_slide_pages(vm_map_t map)
20324 {
20325 #if defined(__arm64__)
20326 /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
20327 * limited embedded address space; this is also meant to minimize pmap
20328 * memory usage on 16KB page systems.
20329 */
20330 return 1 << (24 - VM_MAP_PAGE_SHIFT(map));
20331 #else
20332 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
20333 #endif
20334 }
20335
20336 uint64_t
vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)20337 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
20338 {
20339 #if defined(__arm64__)
20340 /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
20341 * of independent entropy on 16KB page systems.
20342 */
20343 return 1 << (22 - VM_MAP_PAGE_SHIFT(map));
20344 #else
20345 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
20346 #endif
20347 }
20348
20349 boolean_t
vm_map_is_64bit(vm_map_t map)20350 vm_map_is_64bit(
20351 vm_map_t map)
20352 {
20353 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
20354 }
20355
20356 boolean_t
vm_map_has_hard_pagezero(vm_map_t map,vm_map_offset_t pagezero_size)20357 vm_map_has_hard_pagezero(
20358 vm_map_t map,
20359 vm_map_offset_t pagezero_size)
20360 {
20361 /*
20362 * XXX FBDP
20363 * We should lock the VM map (for read) here but we can get away
20364 * with it for now because there can't really be any race condition:
20365 * the VM map's min_offset is changed only when the VM map is created
20366 * and when the zero page is established (when the binary gets loaded),
20367 * and this routine gets called only when the task terminates and the
20368 * VM map is being torn down, and when a new map is created via
20369 * load_machfile()/execve().
20370 */
20371 return map->min_offset >= pagezero_size;
20372 }
20373
20374 /*
20375 * Raise a VM map's maximun offset.
20376 */
20377 kern_return_t
vm_map_raise_max_offset(vm_map_t map,vm_map_offset_t new_max_offset)20378 vm_map_raise_max_offset(
20379 vm_map_t map,
20380 vm_map_offset_t new_max_offset)
20381 {
20382 kern_return_t ret;
20383
20384 vm_map_lock(map);
20385 ret = KERN_INVALID_ADDRESS;
20386
20387 if (new_max_offset >= map->max_offset) {
20388 if (!vm_map_is_64bit(map)) {
20389 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
20390 map->max_offset = new_max_offset;
20391 ret = KERN_SUCCESS;
20392 }
20393 } else {
20394 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
20395 map->max_offset = new_max_offset;
20396 ret = KERN_SUCCESS;
20397 }
20398 }
20399 }
20400
20401 vm_map_unlock(map);
20402 return ret;
20403 }
20404
20405
20406 /*
20407 * Raise a VM map's minimum offset.
20408 * To strictly enforce "page zero" reservation.
20409 */
20410 kern_return_t
vm_map_raise_min_offset(vm_map_t map,vm_map_offset_t new_min_offset)20411 vm_map_raise_min_offset(
20412 vm_map_t map,
20413 vm_map_offset_t new_min_offset)
20414 {
20415 vm_map_entry_t first_entry;
20416
20417 new_min_offset = vm_map_round_page(new_min_offset,
20418 VM_MAP_PAGE_MASK(map));
20419
20420 vm_map_lock(map);
20421
20422 if (new_min_offset < map->min_offset) {
20423 /*
20424 * Can't move min_offset backwards, as that would expose
20425 * a part of the address space that was previously, and for
20426 * possibly good reasons, inaccessible.
20427 */
20428 vm_map_unlock(map);
20429 return KERN_INVALID_ADDRESS;
20430 }
20431 if (new_min_offset >= map->max_offset) {
20432 /* can't go beyond the end of the address space */
20433 vm_map_unlock(map);
20434 return KERN_INVALID_ADDRESS;
20435 }
20436
20437 first_entry = vm_map_first_entry(map);
20438 if (first_entry != vm_map_to_entry(map) &&
20439 first_entry->vme_start < new_min_offset) {
20440 /*
20441 * Some memory was already allocated below the new
20442 * minimun offset. It's too late to change it now...
20443 */
20444 vm_map_unlock(map);
20445 return KERN_NO_SPACE;
20446 }
20447
20448 map->min_offset = new_min_offset;
20449
20450 if (map->holelistenabled) {
20451 assert(map->holes_list);
20452 map->holes_list->start = new_min_offset;
20453 assert(new_min_offset < map->holes_list->end);
20454 }
20455
20456 vm_map_unlock(map);
20457
20458 return KERN_SUCCESS;
20459 }
20460
20461 /*
20462 * Set the limit on the maximum amount of address space and user wired memory allowed for this map.
20463 * This is basically a copy of the RLIMIT_AS and RLIMIT_MEMLOCK rlimit value maintained by the BSD
20464 * side of the kernel. The limits are checked in the mach VM side, so we keep a copy so we don't
20465 * have to reach over to the BSD data structures.
20466 */
20467
20468 uint64_t vm_map_set_size_limit_count = 0;
20469 kern_return_t
vm_map_set_size_limit(vm_map_t map,uint64_t new_size_limit)20470 vm_map_set_size_limit(vm_map_t map, uint64_t new_size_limit)
20471 {
20472 kern_return_t kr;
20473
20474 vm_map_lock(map);
20475 if (new_size_limit < map->size) {
20476 /* new limit should not be lower than its current size */
20477 DTRACE_VM2(vm_map_set_size_limit_fail,
20478 vm_map_size_t, map->size,
20479 uint64_t, new_size_limit);
20480 kr = KERN_FAILURE;
20481 } else if (new_size_limit == map->size_limit) {
20482 /* no change */
20483 kr = KERN_SUCCESS;
20484 } else {
20485 /* set new limit */
20486 DTRACE_VM2(vm_map_set_size_limit,
20487 vm_map_size_t, map->size,
20488 uint64_t, new_size_limit);
20489 if (new_size_limit != RLIM_INFINITY) {
20490 vm_map_set_size_limit_count++;
20491 }
20492 map->size_limit = new_size_limit;
20493 kr = KERN_SUCCESS;
20494 }
20495 vm_map_unlock(map);
20496 return kr;
20497 }
20498
20499 uint64_t vm_map_set_data_limit_count = 0;
20500 kern_return_t
vm_map_set_data_limit(vm_map_t map,uint64_t new_data_limit)20501 vm_map_set_data_limit(vm_map_t map, uint64_t new_data_limit)
20502 {
20503 kern_return_t kr;
20504
20505 vm_map_lock(map);
20506 if (new_data_limit < map->size) {
20507 /* new limit should not be lower than its current size */
20508 DTRACE_VM2(vm_map_set_data_limit_fail,
20509 vm_map_size_t, map->size,
20510 uint64_t, new_data_limit);
20511 kr = KERN_FAILURE;
20512 } else if (new_data_limit == map->data_limit) {
20513 /* no change */
20514 kr = KERN_SUCCESS;
20515 } else {
20516 /* set new limit */
20517 DTRACE_VM2(vm_map_set_data_limit,
20518 vm_map_size_t, map->size,
20519 uint64_t, new_data_limit);
20520 if (new_data_limit != RLIM_INFINITY) {
20521 vm_map_set_data_limit_count++;
20522 }
20523 map->data_limit = new_data_limit;
20524 kr = KERN_SUCCESS;
20525 }
20526 vm_map_unlock(map);
20527 return kr;
20528 }
20529
20530 void
vm_map_set_user_wire_limit(vm_map_t map,vm_size_t limit)20531 vm_map_set_user_wire_limit(vm_map_t map,
20532 vm_size_t limit)
20533 {
20534 vm_map_lock(map);
20535 map->user_wire_limit = limit;
20536 vm_map_unlock(map);
20537 }
20538
20539
20540 void
vm_map_switch_protect(vm_map_t map,boolean_t val)20541 vm_map_switch_protect(vm_map_t map,
20542 boolean_t val)
20543 {
20544 vm_map_lock(map);
20545 map->switch_protect = val;
20546 vm_map_unlock(map);
20547 }
20548
20549 extern int cs_process_enforcement_enable;
20550 boolean_t
vm_map_cs_enforcement(vm_map_t map)20551 vm_map_cs_enforcement(
20552 vm_map_t map)
20553 {
20554 if (cs_process_enforcement_enable) {
20555 return TRUE;
20556 }
20557 return map->cs_enforcement;
20558 }
20559
20560 kern_return_t
vm_map_cs_wx_enable(vm_map_t map)20561 vm_map_cs_wx_enable(
20562 vm_map_t map)
20563 {
20564 return pmap_cs_allow_invalid(vm_map_pmap(map));
20565 }
20566
20567 void
vm_map_cs_debugged_set(vm_map_t map,boolean_t val)20568 vm_map_cs_debugged_set(
20569 vm_map_t map,
20570 boolean_t val)
20571 {
20572 vm_map_lock(map);
20573 map->cs_debugged = val;
20574 vm_map_unlock(map);
20575 }
20576
20577 void
vm_map_cs_enforcement_set(vm_map_t map,boolean_t val)20578 vm_map_cs_enforcement_set(
20579 vm_map_t map,
20580 boolean_t val)
20581 {
20582 vm_map_lock(map);
20583 map->cs_enforcement = val;
20584 pmap_set_vm_map_cs_enforced(map->pmap, val);
20585 vm_map_unlock(map);
20586 }
20587
20588 /*
20589 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
20590 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
20591 * bump both counters.
20592 */
20593 void
vm_map_iokit_mapped_region(vm_map_t map,vm_size_t bytes)20594 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
20595 {
20596 pmap_t pmap = vm_map_pmap(map);
20597
20598 ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
20599 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
20600 }
20601
20602 void
vm_map_iokit_unmapped_region(vm_map_t map,vm_size_t bytes)20603 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
20604 {
20605 pmap_t pmap = vm_map_pmap(map);
20606
20607 ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
20608 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
20609 }
20610
20611 /* Add (generate) code signature for memory range */
20612 #if CONFIG_DYNAMIC_CODE_SIGNING
20613 kern_return_t
vm_map_sign(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)20614 vm_map_sign(vm_map_t map,
20615 vm_map_offset_t start,
20616 vm_map_offset_t end)
20617 {
20618 vm_map_entry_t entry;
20619 vm_page_t m;
20620 vm_object_t object;
20621
20622 /*
20623 * Vet all the input parameters and current type and state of the
20624 * underlaying object. Return with an error if anything is amiss.
20625 */
20626 if (map == VM_MAP_NULL) {
20627 return KERN_INVALID_ARGUMENT;
20628 }
20629
20630 vm_map_lock_read(map);
20631
20632 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
20633 /*
20634 * Must pass a valid non-submap address.
20635 */
20636 vm_map_unlock_read(map);
20637 return KERN_INVALID_ADDRESS;
20638 }
20639
20640 if ((entry->vme_start > start) || (entry->vme_end < end)) {
20641 /*
20642 * Map entry doesn't cover the requested range. Not handling
20643 * this situation currently.
20644 */
20645 vm_map_unlock_read(map);
20646 return KERN_INVALID_ARGUMENT;
20647 }
20648
20649 object = VME_OBJECT(entry);
20650 if (object == VM_OBJECT_NULL) {
20651 /*
20652 * Object must already be present or we can't sign.
20653 */
20654 vm_map_unlock_read(map);
20655 return KERN_INVALID_ARGUMENT;
20656 }
20657
20658 vm_object_lock(object);
20659 vm_map_unlock_read(map);
20660
20661 while (start < end) {
20662 uint32_t refmod;
20663
20664 m = vm_page_lookup(object,
20665 start - entry->vme_start + VME_OFFSET(entry));
20666 if (m == VM_PAGE_NULL) {
20667 /* shoud we try to fault a page here? we can probably
20668 * demand it exists and is locked for this request */
20669 vm_object_unlock(object);
20670 return KERN_FAILURE;
20671 }
20672 /* deal with special page status */
20673 if (m->vmp_busy ||
20674 (m->vmp_unusual && (VMP_ERROR_GET(m) || m->vmp_restart || m->vmp_private || m->vmp_absent))) {
20675 vm_object_unlock(object);
20676 return KERN_FAILURE;
20677 }
20678
20679 /* Page is OK... now "validate" it */
20680 /* This is the place where we'll call out to create a code
20681 * directory, later */
20682 /* XXX TODO4K: deal with 4k subpages individually? */
20683 m->vmp_cs_validated = VMP_CS_ALL_TRUE;
20684
20685 /* The page is now "clean" for codesigning purposes. That means
20686 * we don't consider it as modified (wpmapped) anymore. But
20687 * we'll disconnect the page so we note any future modification
20688 * attempts. */
20689 m->vmp_wpmapped = FALSE;
20690 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
20691
20692 /* Pull the dirty status from the pmap, since we cleared the
20693 * wpmapped bit */
20694 if ((refmod & VM_MEM_MODIFIED) && !m->vmp_dirty) {
20695 SET_PAGE_DIRTY(m, FALSE);
20696 }
20697
20698 /* On to the next page */
20699 start += PAGE_SIZE;
20700 }
20701 vm_object_unlock(object);
20702
20703 return KERN_SUCCESS;
20704 }
20705 #endif
20706
20707 kern_return_t
vm_map_partial_reap(vm_map_t map,unsigned int * reclaimed_resident,unsigned int * reclaimed_compressed)20708 vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
20709 {
20710 vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
20711 vm_map_entry_t next_entry;
20712 kern_return_t kr = KERN_SUCCESS;
20713 VM_MAP_ZAP_DECLARE(zap_list);
20714
20715 vm_map_lock(map);
20716
20717 for (entry = vm_map_first_entry(map);
20718 entry != vm_map_to_entry(map);
20719 entry = next_entry) {
20720 next_entry = entry->vme_next;
20721
20722 if (!entry->is_sub_map &&
20723 VME_OBJECT(entry) &&
20724 (VME_OBJECT(entry)->internal == TRUE) &&
20725 (VME_OBJECT(entry)->ref_count == 1)) {
20726 *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
20727 *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
20728
20729 (void)vm_map_delete(map, entry->vme_start,
20730 entry->vme_end, VM_MAP_REMOVE_NO_YIELD,
20731 KMEM_GUARD_NONE, &zap_list);
20732 }
20733 }
20734
20735 vm_map_unlock(map);
20736
20737 vm_map_zap_dispose(&zap_list);
20738
20739 return kr;
20740 }
20741
20742
20743 #if DEVELOPMENT || DEBUG
20744
20745 int
vm_map_disconnect_page_mappings(vm_map_t map,boolean_t do_unnest)20746 vm_map_disconnect_page_mappings(
20747 vm_map_t map,
20748 boolean_t do_unnest)
20749 {
20750 vm_map_entry_t entry;
20751 ledger_amount_t byte_count = 0;
20752
20753 if (do_unnest == TRUE) {
20754 #ifndef NO_NESTED_PMAP
20755 vm_map_lock(map);
20756
20757 for (entry = vm_map_first_entry(map);
20758 entry != vm_map_to_entry(map);
20759 entry = entry->vme_next) {
20760 if (entry->is_sub_map && entry->use_pmap) {
20761 /*
20762 * Make sure the range between the start of this entry and
20763 * the end of this entry is no longer nested, so that
20764 * we will only remove mappings from the pmap in use by this
20765 * this task
20766 */
20767 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
20768 }
20769 }
20770 vm_map_unlock(map);
20771 #endif
20772 }
20773 vm_map_lock_read(map);
20774
20775 ledger_get_balance(map->pmap->ledger, task_ledgers.phys_mem, &byte_count);
20776
20777 for (entry = vm_map_first_entry(map);
20778 entry != vm_map_to_entry(map);
20779 entry = entry->vme_next) {
20780 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
20781 (VME_OBJECT(entry)->phys_contiguous))) {
20782 continue;
20783 }
20784 if (entry->is_sub_map) {
20785 assert(!entry->use_pmap);
20786 }
20787
20788 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
20789 }
20790 vm_map_unlock_read(map);
20791
20792 return (int) (byte_count / VM_MAP_PAGE_SIZE(map));
20793 }
20794
20795 kern_return_t
vm_map_inject_error(vm_map_t map,vm_map_offset_t vaddr)20796 vm_map_inject_error(vm_map_t map, vm_map_offset_t vaddr)
20797 {
20798 vm_object_t object = NULL;
20799 vm_object_offset_t offset;
20800 vm_prot_t prot;
20801 boolean_t wired;
20802 vm_map_version_t version;
20803 vm_map_t real_map;
20804 int result = KERN_FAILURE;
20805
20806 vaddr = vm_map_trunc_page(vaddr, PAGE_MASK);
20807 vm_map_lock(map);
20808
20809 result = vm_map_lookup_and_lock_object(&map, vaddr, VM_PROT_READ,
20810 OBJECT_LOCK_EXCLUSIVE, &version, &object, &offset, &prot, &wired,
20811 NULL, &real_map, NULL);
20812 if (object == NULL) {
20813 result = KERN_MEMORY_ERROR;
20814 } else if (object->pager) {
20815 result = vm_compressor_pager_inject_error(object->pager,
20816 offset);
20817 } else {
20818 result = KERN_MEMORY_PRESENT;
20819 }
20820
20821 if (object != NULL) {
20822 vm_object_unlock(object);
20823 }
20824
20825 if (real_map != map) {
20826 vm_map_unlock(real_map);
20827 }
20828 vm_map_unlock(map);
20829
20830 return result;
20831 }
20832
20833 #endif
20834
20835
20836 #if CONFIG_FREEZE
20837
20838
20839 extern struct freezer_context freezer_context_global;
20840 AbsoluteTime c_freezer_last_yield_ts = 0;
20841
20842 extern unsigned int memorystatus_freeze_private_shared_pages_ratio;
20843 extern unsigned int memorystatus_freeze_shared_mb_per_process_max;
20844
20845 kern_return_t
vm_map_freeze(task_t task,unsigned int * purgeable_count,unsigned int * wired_count,unsigned int * clean_count,unsigned int * dirty_count,unsigned int dirty_budget,unsigned int * shared_count,int * freezer_error_code,boolean_t eval_only)20846 vm_map_freeze(
20847 task_t task,
20848 unsigned int *purgeable_count,
20849 unsigned int *wired_count,
20850 unsigned int *clean_count,
20851 unsigned int *dirty_count,
20852 unsigned int dirty_budget,
20853 unsigned int *shared_count,
20854 int *freezer_error_code,
20855 boolean_t eval_only)
20856 {
20857 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
20858 kern_return_t kr = KERN_SUCCESS;
20859 boolean_t evaluation_phase = TRUE;
20860 vm_object_t cur_shared_object = NULL;
20861 int cur_shared_obj_ref_cnt = 0;
20862 unsigned int dirty_private_count = 0, dirty_shared_count = 0, obj_pages_snapshot = 0;
20863
20864 *purgeable_count = *wired_count = *clean_count = *dirty_count = *shared_count = 0;
20865
20866 /*
20867 * We need the exclusive lock here so that we can
20868 * block any page faults or lookups while we are
20869 * in the middle of freezing this vm map.
20870 */
20871 vm_map_t map = task->map;
20872
20873 vm_map_lock(map);
20874
20875 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
20876
20877 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
20878 if (vm_compressor_low_on_space()) {
20879 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
20880 }
20881
20882 if (vm_swap_low_on_space()) {
20883 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
20884 }
20885
20886 kr = KERN_NO_SPACE;
20887 goto done;
20888 }
20889
20890 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
20891 /*
20892 * In-memory compressor backing the freezer. No disk.
20893 * So no need to do the evaluation phase.
20894 */
20895 evaluation_phase = FALSE;
20896
20897 if (eval_only == TRUE) {
20898 /*
20899 * We don't support 'eval_only' mode
20900 * in this non-swap config.
20901 */
20902 *freezer_error_code = FREEZER_ERROR_GENERIC;
20903 kr = KERN_INVALID_ARGUMENT;
20904 goto done;
20905 }
20906
20907 freezer_context_global.freezer_ctx_uncompressed_pages = 0;
20908 clock_get_uptime(&c_freezer_last_yield_ts);
20909 }
20910 again:
20911
20912 for (entry2 = vm_map_first_entry(map);
20913 entry2 != vm_map_to_entry(map);
20914 entry2 = entry2->vme_next) {
20915 vm_object_t src_object;
20916
20917 if (entry2->is_sub_map) {
20918 continue;
20919 }
20920
20921 src_object = VME_OBJECT(entry2);
20922 if (!src_object ||
20923 src_object->phys_contiguous ||
20924 !src_object->internal) {
20925 continue;
20926 }
20927
20928 /* If eligible, scan the entry, moving eligible pages over to our parent object */
20929
20930 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
20931 /*
20932 * We skip purgeable objects during evaluation phase only.
20933 * If we decide to freeze this process, we'll explicitly
20934 * purge these objects before we go around again with
20935 * 'evaluation_phase' set to FALSE.
20936 */
20937
20938 if ((src_object->purgable == VM_PURGABLE_EMPTY) || (src_object->purgable == VM_PURGABLE_VOLATILE)) {
20939 /*
20940 * We want to purge objects that may not belong to this task but are mapped
20941 * in this task alone. Since we already purged this task's purgeable memory
20942 * at the end of a successful evaluation phase, we want to avoid doing no-op calls
20943 * on this task's purgeable objects. Hence the check for only volatile objects.
20944 */
20945 if (evaluation_phase == FALSE &&
20946 (src_object->purgable == VM_PURGABLE_VOLATILE) &&
20947 (src_object->ref_count == 1)) {
20948 vm_object_lock(src_object);
20949 vm_object_purge(src_object, 0);
20950 vm_object_unlock(src_object);
20951 }
20952 continue;
20953 }
20954
20955 /*
20956 * Pages belonging to this object could be swapped to disk.
20957 * Make sure it's not a shared object because we could end
20958 * up just bringing it back in again.
20959 *
20960 * We try to optimize somewhat by checking for objects that are mapped
20961 * more than once within our own map. But we don't do full searches,
20962 * we just look at the entries following our current entry.
20963 */
20964
20965 if (src_object->ref_count > 1) {
20966 if (src_object != cur_shared_object) {
20967 obj_pages_snapshot = (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
20968 dirty_shared_count += obj_pages_snapshot;
20969
20970 cur_shared_object = src_object;
20971 cur_shared_obj_ref_cnt = 1;
20972 continue;
20973 } else {
20974 cur_shared_obj_ref_cnt++;
20975 if (src_object->ref_count == cur_shared_obj_ref_cnt) {
20976 /*
20977 * Fall through to below and treat this object as private.
20978 * So deduct its pages from our shared total and add it to the
20979 * private total.
20980 */
20981
20982 dirty_shared_count -= obj_pages_snapshot;
20983 dirty_private_count += obj_pages_snapshot;
20984 } else {
20985 continue;
20986 }
20987 }
20988 }
20989
20990
20991 if (src_object->ref_count == 1) {
20992 dirty_private_count += (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
20993 }
20994
20995 if (evaluation_phase == TRUE) {
20996 continue;
20997 }
20998 }
20999
21000 uint32_t paged_out_count = vm_object_compressed_freezer_pageout(src_object, dirty_budget);
21001 *wired_count += src_object->wired_page_count;
21002
21003 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
21004 if (vm_compressor_low_on_space()) {
21005 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
21006 }
21007
21008 if (vm_swap_low_on_space()) {
21009 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
21010 }
21011
21012 kr = KERN_NO_SPACE;
21013 break;
21014 }
21015 if (paged_out_count >= dirty_budget) {
21016 break;
21017 }
21018 dirty_budget -= paged_out_count;
21019 }
21020
21021 *shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
21022 if (evaluation_phase) {
21023 unsigned int shared_pages_threshold = (memorystatus_freeze_shared_mb_per_process_max * 1024 * 1024ULL) / PAGE_SIZE_64;
21024
21025 if (dirty_shared_count > shared_pages_threshold) {
21026 *freezer_error_code = FREEZER_ERROR_EXCESS_SHARED_MEMORY;
21027 kr = KERN_FAILURE;
21028 goto done;
21029 }
21030
21031 if (dirty_shared_count &&
21032 ((dirty_private_count / dirty_shared_count) < memorystatus_freeze_private_shared_pages_ratio)) {
21033 *freezer_error_code = FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO;
21034 kr = KERN_FAILURE;
21035 goto done;
21036 }
21037
21038 evaluation_phase = FALSE;
21039 dirty_shared_count = dirty_private_count = 0;
21040
21041 freezer_context_global.freezer_ctx_uncompressed_pages = 0;
21042 clock_get_uptime(&c_freezer_last_yield_ts);
21043
21044 if (eval_only) {
21045 kr = KERN_SUCCESS;
21046 goto done;
21047 }
21048
21049 vm_purgeable_purge_task_owned(task);
21050
21051 goto again;
21052 } else {
21053 kr = KERN_SUCCESS;
21054 }
21055
21056 done:
21057 vm_map_unlock(map);
21058
21059 if ((eval_only == FALSE) && (kr == KERN_SUCCESS)) {
21060 vm_object_compressed_freezer_done();
21061 }
21062 return kr;
21063 }
21064
21065 #endif
21066
21067 /*
21068 * vm_map_entry_should_cow_for_true_share:
21069 *
21070 * Determines if the map entry should be clipped and setup for copy-on-write
21071 * to avoid applying "true_share" to a large VM object when only a subset is
21072 * targeted.
21073 *
21074 * For now, we target only the map entries created for the Objective C
21075 * Garbage Collector, which initially have the following properties:
21076 * - alias == VM_MEMORY_MALLOC
21077 * - wired_count == 0
21078 * - !needs_copy
21079 * and a VM object with:
21080 * - internal
21081 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
21082 * - !true_share
21083 * - vo_size == ANON_CHUNK_SIZE
21084 *
21085 * Only non-kernel map entries.
21086 */
21087 boolean_t
vm_map_entry_should_cow_for_true_share(vm_map_entry_t entry)21088 vm_map_entry_should_cow_for_true_share(
21089 vm_map_entry_t entry)
21090 {
21091 vm_object_t object;
21092
21093 if (entry->is_sub_map) {
21094 /* entry does not point at a VM object */
21095 return FALSE;
21096 }
21097
21098 if (entry->needs_copy) {
21099 /* already set for copy_on_write: done! */
21100 return FALSE;
21101 }
21102
21103 if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
21104 VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
21105 /* not a malloc heap or Obj-C Garbage Collector heap */
21106 return FALSE;
21107 }
21108
21109 if (entry->wired_count) {
21110 /* wired: can't change the map entry... */
21111 vm_counters.should_cow_but_wired++;
21112 return FALSE;
21113 }
21114
21115 object = VME_OBJECT(entry);
21116
21117 if (object == VM_OBJECT_NULL) {
21118 /* no object yet... */
21119 return FALSE;
21120 }
21121
21122 if (!object->internal) {
21123 /* not an internal object */
21124 return FALSE;
21125 }
21126
21127 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
21128 /* not the default copy strategy */
21129 return FALSE;
21130 }
21131
21132 if (object->true_share) {
21133 /* already true_share: too late to avoid it */
21134 return FALSE;
21135 }
21136
21137 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
21138 object->vo_size != ANON_CHUNK_SIZE) {
21139 /* ... not an object created for the ObjC Garbage Collector */
21140 return FALSE;
21141 }
21142
21143 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
21144 object->vo_size != 2048 * 4096) {
21145 /* ... not a "MALLOC_SMALL" heap */
21146 return FALSE;
21147 }
21148
21149 /*
21150 * All the criteria match: we have a large object being targeted for "true_share".
21151 * To limit the adverse side-effects linked with "true_share", tell the caller to
21152 * try and avoid setting up the entire object for "true_share" by clipping the
21153 * targeted range and setting it up for copy-on-write.
21154 */
21155 return TRUE;
21156 }
21157
21158 vm_map_offset_t
vm_map_round_page_mask(vm_map_offset_t offset,vm_map_offset_t mask)21159 vm_map_round_page_mask(
21160 vm_map_offset_t offset,
21161 vm_map_offset_t mask)
21162 {
21163 return VM_MAP_ROUND_PAGE(offset, mask);
21164 }
21165
21166 vm_map_offset_t
vm_map_trunc_page_mask(vm_map_offset_t offset,vm_map_offset_t mask)21167 vm_map_trunc_page_mask(
21168 vm_map_offset_t offset,
21169 vm_map_offset_t mask)
21170 {
21171 return VM_MAP_TRUNC_PAGE(offset, mask);
21172 }
21173
21174 boolean_t
vm_map_page_aligned(vm_map_offset_t offset,vm_map_offset_t mask)21175 vm_map_page_aligned(
21176 vm_map_offset_t offset,
21177 vm_map_offset_t mask)
21178 {
21179 return ((offset) & mask) == 0;
21180 }
21181
21182 int
vm_map_page_shift(vm_map_t map)21183 vm_map_page_shift(
21184 vm_map_t map)
21185 {
21186 return VM_MAP_PAGE_SHIFT(map);
21187 }
21188
21189 int
vm_map_page_size(vm_map_t map)21190 vm_map_page_size(
21191 vm_map_t map)
21192 {
21193 return VM_MAP_PAGE_SIZE(map);
21194 }
21195
21196 vm_map_offset_t
vm_map_page_mask(vm_map_t map)21197 vm_map_page_mask(
21198 vm_map_t map)
21199 {
21200 return VM_MAP_PAGE_MASK(map);
21201 }
21202
21203 kern_return_t
vm_map_set_page_shift(vm_map_t map,int pageshift)21204 vm_map_set_page_shift(
21205 vm_map_t map,
21206 int pageshift)
21207 {
21208 if (map->hdr.nentries != 0) {
21209 /* too late to change page size */
21210 return KERN_FAILURE;
21211 }
21212
21213 map->hdr.page_shift = (uint16_t)pageshift;
21214
21215 return KERN_SUCCESS;
21216 }
21217
21218 kern_return_t
vm_map_query_volatile(vm_map_t map,mach_vm_size_t * volatile_virtual_size_p,mach_vm_size_t * volatile_resident_size_p,mach_vm_size_t * volatile_compressed_size_p,mach_vm_size_t * volatile_pmap_size_p,mach_vm_size_t * volatile_compressed_pmap_size_p)21219 vm_map_query_volatile(
21220 vm_map_t map,
21221 mach_vm_size_t *volatile_virtual_size_p,
21222 mach_vm_size_t *volatile_resident_size_p,
21223 mach_vm_size_t *volatile_compressed_size_p,
21224 mach_vm_size_t *volatile_pmap_size_p,
21225 mach_vm_size_t *volatile_compressed_pmap_size_p)
21226 {
21227 mach_vm_size_t volatile_virtual_size;
21228 mach_vm_size_t volatile_resident_count;
21229 mach_vm_size_t volatile_compressed_count;
21230 mach_vm_size_t volatile_pmap_count;
21231 mach_vm_size_t volatile_compressed_pmap_count;
21232 mach_vm_size_t resident_count;
21233 vm_map_entry_t entry;
21234 vm_object_t object;
21235
21236 /* map should be locked by caller */
21237
21238 volatile_virtual_size = 0;
21239 volatile_resident_count = 0;
21240 volatile_compressed_count = 0;
21241 volatile_pmap_count = 0;
21242 volatile_compressed_pmap_count = 0;
21243
21244 for (entry = vm_map_first_entry(map);
21245 entry != vm_map_to_entry(map);
21246 entry = entry->vme_next) {
21247 mach_vm_size_t pmap_resident_bytes, pmap_compressed_bytes;
21248
21249 if (entry->is_sub_map) {
21250 continue;
21251 }
21252 if (!(entry->protection & VM_PROT_WRITE)) {
21253 continue;
21254 }
21255 object = VME_OBJECT(entry);
21256 if (object == VM_OBJECT_NULL) {
21257 continue;
21258 }
21259 if (object->purgable != VM_PURGABLE_VOLATILE &&
21260 object->purgable != VM_PURGABLE_EMPTY) {
21261 continue;
21262 }
21263 if (VME_OFFSET(entry)) {
21264 /*
21265 * If the map entry has been split and the object now
21266 * appears several times in the VM map, we don't want
21267 * to count the object's resident_page_count more than
21268 * once. We count it only for the first one, starting
21269 * at offset 0 and ignore the other VM map entries.
21270 */
21271 continue;
21272 }
21273 resident_count = object->resident_page_count;
21274 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
21275 resident_count = 0;
21276 } else {
21277 resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
21278 }
21279
21280 volatile_virtual_size += entry->vme_end - entry->vme_start;
21281 volatile_resident_count += resident_count;
21282 if (object->pager) {
21283 volatile_compressed_count +=
21284 vm_compressor_pager_get_count(object->pager);
21285 }
21286 pmap_compressed_bytes = 0;
21287 pmap_resident_bytes =
21288 pmap_query_resident(map->pmap,
21289 entry->vme_start,
21290 entry->vme_end,
21291 &pmap_compressed_bytes);
21292 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
21293 volatile_compressed_pmap_count += (pmap_compressed_bytes
21294 / PAGE_SIZE);
21295 }
21296
21297 /* map is still locked on return */
21298
21299 *volatile_virtual_size_p = volatile_virtual_size;
21300 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
21301 *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
21302 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
21303 *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
21304
21305 return KERN_SUCCESS;
21306 }
21307
21308 void
vm_map_sizes(vm_map_t map,vm_map_size_t * psize,vm_map_size_t * pfree,vm_map_size_t * plargest_free)21309 vm_map_sizes(vm_map_t map,
21310 vm_map_size_t * psize,
21311 vm_map_size_t * pfree,
21312 vm_map_size_t * plargest_free)
21313 {
21314 vm_map_entry_t entry;
21315 vm_map_offset_t prev;
21316 vm_map_size_t free, total_free, largest_free;
21317 boolean_t end;
21318
21319 if (!map) {
21320 *psize = *pfree = *plargest_free = 0;
21321 return;
21322 }
21323 total_free = largest_free = 0;
21324
21325 vm_map_lock_read(map);
21326 if (psize) {
21327 *psize = map->max_offset - map->min_offset;
21328 }
21329
21330 prev = map->min_offset;
21331 for (entry = vm_map_first_entry(map);; entry = entry->vme_next) {
21332 end = (entry == vm_map_to_entry(map));
21333
21334 if (end) {
21335 free = entry->vme_end - prev;
21336 } else {
21337 free = entry->vme_start - prev;
21338 }
21339
21340 total_free += free;
21341 if (free > largest_free) {
21342 largest_free = free;
21343 }
21344
21345 if (end) {
21346 break;
21347 }
21348 prev = entry->vme_end;
21349 }
21350 vm_map_unlock_read(map);
21351 if (pfree) {
21352 *pfree = total_free;
21353 }
21354 if (plargest_free) {
21355 *plargest_free = largest_free;
21356 }
21357 }
21358
21359 #if VM_SCAN_FOR_SHADOW_CHAIN
21360 int vm_map_shadow_max(vm_map_t map);
21361 int
vm_map_shadow_max(vm_map_t map)21362 vm_map_shadow_max(
21363 vm_map_t map)
21364 {
21365 int shadows, shadows_max;
21366 vm_map_entry_t entry;
21367 vm_object_t object, next_object;
21368
21369 if (map == NULL) {
21370 return 0;
21371 }
21372
21373 shadows_max = 0;
21374
21375 vm_map_lock_read(map);
21376
21377 for (entry = vm_map_first_entry(map);
21378 entry != vm_map_to_entry(map);
21379 entry = entry->vme_next) {
21380 if (entry->is_sub_map) {
21381 continue;
21382 }
21383 object = VME_OBJECT(entry);
21384 if (object == NULL) {
21385 continue;
21386 }
21387 vm_object_lock_shared(object);
21388 for (shadows = 0;
21389 object->shadow != NULL;
21390 shadows++, object = next_object) {
21391 next_object = object->shadow;
21392 vm_object_lock_shared(next_object);
21393 vm_object_unlock(object);
21394 }
21395 vm_object_unlock(object);
21396 if (shadows > shadows_max) {
21397 shadows_max = shadows;
21398 }
21399 }
21400
21401 vm_map_unlock_read(map);
21402
21403 return shadows_max;
21404 }
21405 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
21406
21407 void
vm_commit_pagezero_status(vm_map_t lmap)21408 vm_commit_pagezero_status(vm_map_t lmap)
21409 {
21410 pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
21411 }
21412
21413 #if XNU_TARGET_OS_OSX
21414 void
vm_map_set_high_start(vm_map_t map,vm_map_offset_t high_start)21415 vm_map_set_high_start(
21416 vm_map_t map,
21417 vm_map_offset_t high_start)
21418 {
21419 map->vmmap_high_start = high_start;
21420 }
21421 #endif /* XNU_TARGET_OS_OSX */
21422
21423
21424 /*
21425 * FORKED CORPSE FOOTPRINT
21426 *
21427 * A forked corpse gets a copy of the original VM map but its pmap is mostly
21428 * empty since it never ran and never got to fault in any pages.
21429 * Collecting footprint info (via "sysctl vm.self_region_footprint") for
21430 * a forked corpse would therefore return very little information.
21431 *
21432 * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
21433 * to vm_map_fork() to collect footprint information from the original VM map
21434 * and its pmap, and store it in the forked corpse's VM map. That information
21435 * is stored in place of the VM map's "hole list" since we'll never need to
21436 * lookup for holes in the corpse's map.
21437 *
21438 * The corpse's footprint info looks like this:
21439 *
21440 * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
21441 * as follows:
21442 * +---------------------------------------+
21443 * header-> | cf_size |
21444 * +-------------------+-------------------+
21445 * | cf_last_region | cf_last_zeroes |
21446 * +-------------------+-------------------+
21447 * region1-> | cfr_vaddr |
21448 * +-------------------+-------------------+
21449 * | cfr_num_pages | d0 | d1 | d2 | d3 |
21450 * +---------------------------------------+
21451 * | d4 | d5 | ... |
21452 * +---------------------------------------+
21453 * | ... |
21454 * +-------------------+-------------------+
21455 * | dy | dz | na | na | cfr_vaddr... | <-region2
21456 * +-------------------+-------------------+
21457 * | cfr_vaddr (ctd) | cfr_num_pages |
21458 * +---------------------------------------+
21459 * | d0 | d1 ... |
21460 * +---------------------------------------+
21461 * ...
21462 * +---------------------------------------+
21463 * last region-> | cfr_vaddr |
21464 * +---------------------------------------+
21465 * + cfr_num_pages | d0 | d1 | d2 | d3 |
21466 * +---------------------------------------+
21467 * ...
21468 * +---------------------------------------+
21469 * | dx | dy | dz | na | na | na | na | na |
21470 * +---------------------------------------+
21471 *
21472 * where:
21473 * cf_size: total size of the buffer (rounded to page size)
21474 * cf_last_region: offset in the buffer of the last "region" sub-header
21475 * cf_last_zeroes: number of trailing "zero" dispositions at the end
21476 * of last region
21477 * cfr_vaddr: virtual address of the start of the covered "region"
21478 * cfr_num_pages: number of pages in the covered "region"
21479 * d*: disposition of the page at that virtual address
21480 * Regions in the buffer are word-aligned.
21481 *
21482 * We estimate the size of the buffer based on the number of memory regions
21483 * and the virtual size of the address space. While copying each memory region
21484 * during vm_map_fork(), we also collect the footprint info for that region
21485 * and store it in the buffer, packing it as much as possible (coalescing
21486 * contiguous memory regions to avoid having too many region headers and
21487 * avoiding long streaks of "zero" page dispositions by splitting footprint
21488 * "regions", so the number of regions in the footprint buffer might not match
21489 * the number of memory regions in the address space.
21490 *
21491 * We also have to copy the original task's "nonvolatile" ledgers since that's
21492 * part of the footprint and will need to be reported to any tool asking for
21493 * the footprint information of the forked corpse.
21494 */
21495
21496 uint64_t vm_map_corpse_footprint_count = 0;
21497 uint64_t vm_map_corpse_footprint_size_avg = 0;
21498 uint64_t vm_map_corpse_footprint_size_max = 0;
21499 uint64_t vm_map_corpse_footprint_full = 0;
21500 uint64_t vm_map_corpse_footprint_no_buf = 0;
21501
21502 struct vm_map_corpse_footprint_header {
21503 vm_size_t cf_size; /* allocated buffer size */
21504 uint32_t cf_last_region; /* offset of last region in buffer */
21505 union {
21506 uint32_t cfu_last_zeroes; /* during creation:
21507 * number of "zero" dispositions at
21508 * end of last region */
21509 uint32_t cfu_hint_region; /* during lookup:
21510 * offset of last looked up region */
21511 #define cf_last_zeroes cfu.cfu_last_zeroes
21512 #define cf_hint_region cfu.cfu_hint_region
21513 } cfu;
21514 };
21515 typedef uint8_t cf_disp_t;
21516 struct vm_map_corpse_footprint_region {
21517 vm_map_offset_t cfr_vaddr; /* region start virtual address */
21518 uint32_t cfr_num_pages; /* number of pages in this "region" */
21519 cf_disp_t cfr_disposition[0]; /* disposition of each page */
21520 } __attribute__((packed));
21521
21522 static cf_disp_t
vm_page_disposition_to_cf_disp(int disposition)21523 vm_page_disposition_to_cf_disp(
21524 int disposition)
21525 {
21526 assert(sizeof(cf_disp_t) == 1);
21527 /* relocate bits that don't fit in a "uint8_t" */
21528 if (disposition & VM_PAGE_QUERY_PAGE_REUSABLE) {
21529 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
21530 }
21531 /* cast gets rid of extra bits */
21532 return (cf_disp_t) disposition;
21533 }
21534
21535 static int
vm_page_cf_disp_to_disposition(cf_disp_t cf_disp)21536 vm_page_cf_disp_to_disposition(
21537 cf_disp_t cf_disp)
21538 {
21539 int disposition;
21540
21541 assert(sizeof(cf_disp_t) == 1);
21542 disposition = (int) cf_disp;
21543 /* move relocated bits back in place */
21544 if (cf_disp & VM_PAGE_QUERY_PAGE_FICTITIOUS) {
21545 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
21546 disposition &= ~VM_PAGE_QUERY_PAGE_FICTITIOUS;
21547 }
21548 return disposition;
21549 }
21550
21551 /*
21552 * vm_map_corpse_footprint_new_region:
21553 * closes the current footprint "region" and creates a new one
21554 *
21555 * Returns NULL if there's not enough space in the buffer for a new region.
21556 */
21557 static struct vm_map_corpse_footprint_region *
vm_map_corpse_footprint_new_region(struct vm_map_corpse_footprint_header * footprint_header)21558 vm_map_corpse_footprint_new_region(
21559 struct vm_map_corpse_footprint_header *footprint_header)
21560 {
21561 uintptr_t footprint_edge;
21562 uint32_t new_region_offset;
21563 struct vm_map_corpse_footprint_region *footprint_region;
21564 struct vm_map_corpse_footprint_region *new_footprint_region;
21565
21566 footprint_edge = ((uintptr_t)footprint_header +
21567 footprint_header->cf_size);
21568 footprint_region = ((struct vm_map_corpse_footprint_region *)
21569 ((char *)footprint_header +
21570 footprint_header->cf_last_region));
21571 assert((uintptr_t)footprint_region + sizeof(*footprint_region) <=
21572 footprint_edge);
21573
21574 /* get rid of trailing zeroes in the last region */
21575 assert(footprint_region->cfr_num_pages >=
21576 footprint_header->cf_last_zeroes);
21577 footprint_region->cfr_num_pages -=
21578 footprint_header->cf_last_zeroes;
21579 footprint_header->cf_last_zeroes = 0;
21580
21581 /* reuse this region if it's now empty */
21582 if (footprint_region->cfr_num_pages == 0) {
21583 return footprint_region;
21584 }
21585
21586 /* compute offset of new region */
21587 new_region_offset = footprint_header->cf_last_region;
21588 new_region_offset += sizeof(*footprint_region);
21589 new_region_offset += (footprint_region->cfr_num_pages * sizeof(cf_disp_t));
21590 new_region_offset = roundup(new_region_offset, sizeof(int));
21591
21592 /* check if we're going over the edge */
21593 if (((uintptr_t)footprint_header +
21594 new_region_offset +
21595 sizeof(*footprint_region)) >=
21596 footprint_edge) {
21597 /* over the edge: no new region */
21598 return NULL;
21599 }
21600
21601 /* adjust offset of last region in header */
21602 footprint_header->cf_last_region = new_region_offset;
21603
21604 new_footprint_region = (struct vm_map_corpse_footprint_region *)
21605 ((char *)footprint_header +
21606 footprint_header->cf_last_region);
21607 new_footprint_region->cfr_vaddr = 0;
21608 new_footprint_region->cfr_num_pages = 0;
21609 /* caller needs to initialize new region */
21610
21611 return new_footprint_region;
21612 }
21613
21614 /*
21615 * vm_map_corpse_footprint_collect:
21616 * collect footprint information for "old_entry" in "old_map" and
21617 * stores it in "new_map"'s vmmap_footprint_info.
21618 */
21619 kern_return_t
vm_map_corpse_footprint_collect(vm_map_t old_map,vm_map_entry_t old_entry,vm_map_t new_map)21620 vm_map_corpse_footprint_collect(
21621 vm_map_t old_map,
21622 vm_map_entry_t old_entry,
21623 vm_map_t new_map)
21624 {
21625 vm_map_offset_t va;
21626 kern_return_t kr;
21627 struct vm_map_corpse_footprint_header *footprint_header;
21628 struct vm_map_corpse_footprint_region *footprint_region;
21629 struct vm_map_corpse_footprint_region *new_footprint_region;
21630 cf_disp_t *next_disp_p;
21631 uintptr_t footprint_edge;
21632 uint32_t num_pages_tmp;
21633 int effective_page_size;
21634
21635 effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(old_map));
21636
21637 va = old_entry->vme_start;
21638
21639 vm_map_lock_assert_exclusive(old_map);
21640 vm_map_lock_assert_exclusive(new_map);
21641
21642 assert(new_map->has_corpse_footprint);
21643 assert(!old_map->has_corpse_footprint);
21644 if (!new_map->has_corpse_footprint ||
21645 old_map->has_corpse_footprint) {
21646 /*
21647 * This can only transfer footprint info from a
21648 * map with a live pmap to a map with a corpse footprint.
21649 */
21650 return KERN_NOT_SUPPORTED;
21651 }
21652
21653 if (new_map->vmmap_corpse_footprint == NULL) {
21654 vm_offset_t buf;
21655 vm_size_t buf_size;
21656
21657 buf = 0;
21658 buf_size = (sizeof(*footprint_header) +
21659 (old_map->hdr.nentries
21660 *
21661 (sizeof(*footprint_region) +
21662 +3)) /* potential alignment for each region */
21663 +
21664 ((old_map->size / effective_page_size)
21665 *
21666 sizeof(cf_disp_t))); /* disposition for each page */
21667 // printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
21668 buf_size = round_page(buf_size);
21669
21670 /* limit buffer to 1 page to validate overflow detection */
21671 // buf_size = PAGE_SIZE;
21672
21673 /* limit size to a somewhat sane amount */
21674 #if XNU_TARGET_OS_OSX
21675 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (8*1024*1024) /* 8MB */
21676 #else /* XNU_TARGET_OS_OSX */
21677 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (256*1024) /* 256KB */
21678 #endif /* XNU_TARGET_OS_OSX */
21679 if (buf_size > VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE) {
21680 buf_size = VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE;
21681 }
21682
21683 /*
21684 * Allocate the pageable buffer (with a trailing guard page).
21685 * It will be zero-filled on demand.
21686 */
21687 kr = kmem_alloc(kernel_map, &buf, buf_size + PAGE_SIZE,
21688 KMA_DATA | KMA_PAGEABLE | KMA_GUARD_LAST,
21689 VM_KERN_MEMORY_DIAG);
21690 if (kr != KERN_SUCCESS) {
21691 vm_map_corpse_footprint_no_buf++;
21692 return kr;
21693 }
21694
21695 /* initialize header and 1st region */
21696 footprint_header = (struct vm_map_corpse_footprint_header *)buf;
21697 new_map->vmmap_corpse_footprint = footprint_header;
21698
21699 footprint_header->cf_size = buf_size;
21700 footprint_header->cf_last_region =
21701 sizeof(*footprint_header);
21702 footprint_header->cf_last_zeroes = 0;
21703
21704 footprint_region = (struct vm_map_corpse_footprint_region *)
21705 ((char *)footprint_header +
21706 footprint_header->cf_last_region);
21707 footprint_region->cfr_vaddr = 0;
21708 footprint_region->cfr_num_pages = 0;
21709 } else {
21710 /* retrieve header and last region */
21711 footprint_header = (struct vm_map_corpse_footprint_header *)
21712 new_map->vmmap_corpse_footprint;
21713 footprint_region = (struct vm_map_corpse_footprint_region *)
21714 ((char *)footprint_header +
21715 footprint_header->cf_last_region);
21716 }
21717 footprint_edge = ((uintptr_t)footprint_header +
21718 footprint_header->cf_size);
21719
21720 if ((footprint_region->cfr_vaddr +
21721 (((vm_map_offset_t)footprint_region->cfr_num_pages) *
21722 effective_page_size))
21723 != old_entry->vme_start) {
21724 uint64_t num_pages_delta, num_pages_delta_size;
21725 uint32_t region_offset_delta_size;
21726
21727 /*
21728 * Not the next contiguous virtual address:
21729 * start a new region or store "zero" dispositions for
21730 * the missing pages?
21731 */
21732 /* size of gap in actual page dispositions */
21733 num_pages_delta = ((old_entry->vme_start -
21734 footprint_region->cfr_vaddr) / effective_page_size)
21735 - footprint_region->cfr_num_pages;
21736 num_pages_delta_size = num_pages_delta * sizeof(cf_disp_t);
21737 /* size of gap as a new footprint region header */
21738 region_offset_delta_size =
21739 (sizeof(*footprint_region) +
21740 roundup(((footprint_region->cfr_num_pages -
21741 footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)),
21742 sizeof(int)) -
21743 ((footprint_region->cfr_num_pages -
21744 footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)));
21745 // printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
21746 if (region_offset_delta_size < num_pages_delta_size ||
21747 os_add3_overflow(footprint_region->cfr_num_pages,
21748 (uint32_t) num_pages_delta,
21749 1,
21750 &num_pages_tmp)) {
21751 /*
21752 * Storing data for this gap would take more space
21753 * than inserting a new footprint region header:
21754 * let's start a new region and save space. If it's a
21755 * tie, let's avoid using a new region, since that
21756 * would require more region hops to find the right
21757 * range during lookups.
21758 *
21759 * If the current region's cfr_num_pages would overflow
21760 * if we added "zero" page dispositions for the gap,
21761 * no choice but to start a new region.
21762 */
21763 // printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
21764 new_footprint_region =
21765 vm_map_corpse_footprint_new_region(footprint_header);
21766 /* check that we're not going over the edge */
21767 if (new_footprint_region == NULL) {
21768 goto over_the_edge;
21769 }
21770 footprint_region = new_footprint_region;
21771 /* initialize new region as empty */
21772 footprint_region->cfr_vaddr = old_entry->vme_start;
21773 footprint_region->cfr_num_pages = 0;
21774 } else {
21775 /*
21776 * Store "zero" page dispositions for the missing
21777 * pages.
21778 */
21779 // printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
21780 for (; num_pages_delta > 0; num_pages_delta--) {
21781 next_disp_p = (cf_disp_t *)
21782 ((uintptr_t) footprint_region +
21783 sizeof(*footprint_region));
21784 next_disp_p += footprint_region->cfr_num_pages;
21785 /* check that we're not going over the edge */
21786 if ((uintptr_t)next_disp_p >= footprint_edge) {
21787 goto over_the_edge;
21788 }
21789 /* store "zero" disposition for this gap page */
21790 footprint_region->cfr_num_pages++;
21791 *next_disp_p = (cf_disp_t) 0;
21792 footprint_header->cf_last_zeroes++;
21793 }
21794 }
21795 }
21796
21797 for (va = old_entry->vme_start;
21798 va < old_entry->vme_end;
21799 va += effective_page_size) {
21800 int disposition;
21801 cf_disp_t cf_disp;
21802
21803 vm_map_footprint_query_page_info(old_map,
21804 old_entry,
21805 va,
21806 &disposition);
21807 cf_disp = vm_page_disposition_to_cf_disp(disposition);
21808
21809 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
21810
21811 if (cf_disp == 0 && footprint_region->cfr_num_pages == 0) {
21812 /*
21813 * Ignore "zero" dispositions at start of
21814 * region: just move start of region.
21815 */
21816 footprint_region->cfr_vaddr += effective_page_size;
21817 continue;
21818 }
21819
21820 /* would region's cfr_num_pages overflow? */
21821 if (os_add_overflow(footprint_region->cfr_num_pages, 1,
21822 &num_pages_tmp)) {
21823 /* overflow: create a new region */
21824 new_footprint_region =
21825 vm_map_corpse_footprint_new_region(
21826 footprint_header);
21827 if (new_footprint_region == NULL) {
21828 goto over_the_edge;
21829 }
21830 footprint_region = new_footprint_region;
21831 footprint_region->cfr_vaddr = va;
21832 footprint_region->cfr_num_pages = 0;
21833 }
21834
21835 next_disp_p = (cf_disp_t *) ((uintptr_t) footprint_region +
21836 sizeof(*footprint_region));
21837 next_disp_p += footprint_region->cfr_num_pages;
21838 /* check that we're not going over the edge */
21839 if ((uintptr_t)next_disp_p >= footprint_edge) {
21840 goto over_the_edge;
21841 }
21842 /* store this dispostion */
21843 *next_disp_p = cf_disp;
21844 footprint_region->cfr_num_pages++;
21845
21846 if (cf_disp != 0) {
21847 /* non-zero disp: break the current zero streak */
21848 footprint_header->cf_last_zeroes = 0;
21849 /* done */
21850 continue;
21851 }
21852
21853 /* zero disp: add to the current streak of zeroes */
21854 footprint_header->cf_last_zeroes++;
21855 if ((footprint_header->cf_last_zeroes +
21856 roundup(((footprint_region->cfr_num_pages -
21857 footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)) &
21858 (sizeof(int) - 1),
21859 sizeof(int))) <
21860 (sizeof(*footprint_header))) {
21861 /*
21862 * There are not enough trailing "zero" dispositions
21863 * (+ the extra padding we would need for the previous
21864 * region); creating a new region would not save space
21865 * at this point, so let's keep this "zero" disposition
21866 * in this region and reconsider later.
21867 */
21868 continue;
21869 }
21870 /*
21871 * Create a new region to avoid having too many consecutive
21872 * "zero" dispositions.
21873 */
21874 new_footprint_region =
21875 vm_map_corpse_footprint_new_region(footprint_header);
21876 if (new_footprint_region == NULL) {
21877 goto over_the_edge;
21878 }
21879 footprint_region = new_footprint_region;
21880 /* initialize the new region as empty ... */
21881 footprint_region->cfr_num_pages = 0;
21882 /* ... and skip this "zero" disp */
21883 footprint_region->cfr_vaddr = va + effective_page_size;
21884 }
21885
21886 return KERN_SUCCESS;
21887
21888 over_the_edge:
21889 // printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
21890 vm_map_corpse_footprint_full++;
21891 return KERN_RESOURCE_SHORTAGE;
21892 }
21893
21894 /*
21895 * vm_map_corpse_footprint_collect_done:
21896 * completes the footprint collection by getting rid of any remaining
21897 * trailing "zero" dispositions and trimming the unused part of the
21898 * kernel buffer
21899 */
21900 void
vm_map_corpse_footprint_collect_done(vm_map_t new_map)21901 vm_map_corpse_footprint_collect_done(
21902 vm_map_t new_map)
21903 {
21904 struct vm_map_corpse_footprint_header *footprint_header;
21905 struct vm_map_corpse_footprint_region *footprint_region;
21906 vm_size_t buf_size, actual_size;
21907 kern_return_t kr;
21908
21909 assert(new_map->has_corpse_footprint);
21910 if (!new_map->has_corpse_footprint ||
21911 new_map->vmmap_corpse_footprint == NULL) {
21912 return;
21913 }
21914
21915 footprint_header = (struct vm_map_corpse_footprint_header *)
21916 new_map->vmmap_corpse_footprint;
21917 buf_size = footprint_header->cf_size;
21918
21919 footprint_region = (struct vm_map_corpse_footprint_region *)
21920 ((char *)footprint_header +
21921 footprint_header->cf_last_region);
21922
21923 /* get rid of trailing zeroes in last region */
21924 assert(footprint_region->cfr_num_pages >= footprint_header->cf_last_zeroes);
21925 footprint_region->cfr_num_pages -= footprint_header->cf_last_zeroes;
21926 footprint_header->cf_last_zeroes = 0;
21927
21928 actual_size = (vm_size_t)(footprint_header->cf_last_region +
21929 sizeof(*footprint_region) +
21930 (footprint_region->cfr_num_pages * sizeof(cf_disp_t)));
21931
21932 // printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
21933 vm_map_corpse_footprint_size_avg =
21934 (((vm_map_corpse_footprint_size_avg *
21935 vm_map_corpse_footprint_count) +
21936 actual_size) /
21937 (vm_map_corpse_footprint_count + 1));
21938 vm_map_corpse_footprint_count++;
21939 if (actual_size > vm_map_corpse_footprint_size_max) {
21940 vm_map_corpse_footprint_size_max = actual_size;
21941 }
21942
21943 actual_size = round_page(actual_size);
21944 if (buf_size > actual_size) {
21945 kr = vm_deallocate(kernel_map,
21946 ((vm_address_t)footprint_header +
21947 actual_size +
21948 PAGE_SIZE), /* trailing guard page */
21949 (buf_size - actual_size));
21950 assertf(kr == KERN_SUCCESS,
21951 "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21952 footprint_header,
21953 (uint64_t) buf_size,
21954 (uint64_t) actual_size,
21955 kr);
21956 kr = vm_protect(kernel_map,
21957 ((vm_address_t)footprint_header +
21958 actual_size),
21959 PAGE_SIZE,
21960 FALSE, /* set_maximum */
21961 VM_PROT_NONE);
21962 assertf(kr == KERN_SUCCESS,
21963 "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21964 footprint_header,
21965 (uint64_t) buf_size,
21966 (uint64_t) actual_size,
21967 kr);
21968 }
21969
21970 footprint_header->cf_size = actual_size;
21971 }
21972
21973 /*
21974 * vm_map_corpse_footprint_query_page_info:
21975 * retrieves the disposition of the page at virtual address "vaddr"
21976 * in the forked corpse's VM map
21977 *
21978 * This is the equivalent of vm_map_footprint_query_page_info() for a forked corpse.
21979 */
21980 kern_return_t
vm_map_corpse_footprint_query_page_info(vm_map_t map,vm_map_offset_t va,int * disposition_p)21981 vm_map_corpse_footprint_query_page_info(
21982 vm_map_t map,
21983 vm_map_offset_t va,
21984 int *disposition_p)
21985 {
21986 struct vm_map_corpse_footprint_header *footprint_header;
21987 struct vm_map_corpse_footprint_region *footprint_region;
21988 uint32_t footprint_region_offset;
21989 vm_map_offset_t region_start, region_end;
21990 int disp_idx;
21991 kern_return_t kr;
21992 int effective_page_size;
21993 cf_disp_t cf_disp;
21994
21995 if (!map->has_corpse_footprint) {
21996 *disposition_p = 0;
21997 kr = KERN_INVALID_ARGUMENT;
21998 goto done;
21999 }
22000
22001 footprint_header = map->vmmap_corpse_footprint;
22002 if (footprint_header == NULL) {
22003 *disposition_p = 0;
22004 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
22005 kr = KERN_INVALID_ARGUMENT;
22006 goto done;
22007 }
22008
22009 /* start looking at the hint ("cf_hint_region") */
22010 footprint_region_offset = footprint_header->cf_hint_region;
22011
22012 effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(map));
22013
22014 lookup_again:
22015 if (footprint_region_offset < sizeof(*footprint_header)) {
22016 /* hint too low: start from 1st region */
22017 footprint_region_offset = sizeof(*footprint_header);
22018 }
22019 if (footprint_region_offset >= footprint_header->cf_last_region) {
22020 /* hint too high: re-start from 1st region */
22021 footprint_region_offset = sizeof(*footprint_header);
22022 }
22023 footprint_region = (struct vm_map_corpse_footprint_region *)
22024 ((char *)footprint_header + footprint_region_offset);
22025 region_start = footprint_region->cfr_vaddr;
22026 region_end = (region_start +
22027 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
22028 effective_page_size));
22029 if (va < region_start &&
22030 footprint_region_offset != sizeof(*footprint_header)) {
22031 /* our range starts before the hint region */
22032
22033 /* reset the hint (in a racy way...) */
22034 footprint_header->cf_hint_region = sizeof(*footprint_header);
22035 /* lookup "va" again from 1st region */
22036 footprint_region_offset = sizeof(*footprint_header);
22037 goto lookup_again;
22038 }
22039
22040 while (va >= region_end) {
22041 if (footprint_region_offset >= footprint_header->cf_last_region) {
22042 break;
22043 }
22044 /* skip the region's header */
22045 footprint_region_offset += sizeof(*footprint_region);
22046 /* skip the region's page dispositions */
22047 footprint_region_offset += (footprint_region->cfr_num_pages * sizeof(cf_disp_t));
22048 /* align to next word boundary */
22049 footprint_region_offset =
22050 roundup(footprint_region_offset,
22051 sizeof(int));
22052 footprint_region = (struct vm_map_corpse_footprint_region *)
22053 ((char *)footprint_header + footprint_region_offset);
22054 region_start = footprint_region->cfr_vaddr;
22055 region_end = (region_start +
22056 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
22057 effective_page_size));
22058 }
22059 if (va < region_start || va >= region_end) {
22060 /* page not found */
22061 *disposition_p = 0;
22062 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
22063 kr = KERN_SUCCESS;
22064 goto done;
22065 }
22066
22067 /* "va" found: set the lookup hint for next lookup (in a racy way...) */
22068 footprint_header->cf_hint_region = footprint_region_offset;
22069
22070 /* get page disposition for "va" in this region */
22071 disp_idx = (int) ((va - footprint_region->cfr_vaddr) / effective_page_size);
22072 cf_disp = footprint_region->cfr_disposition[disp_idx];
22073 *disposition_p = vm_page_cf_disp_to_disposition(cf_disp);
22074 kr = KERN_SUCCESS;
22075 done:
22076 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
22077 /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
22078 DTRACE_VM4(footprint_query_page_info,
22079 vm_map_t, map,
22080 vm_map_offset_t, va,
22081 int, *disposition_p,
22082 kern_return_t, kr);
22083
22084 return kr;
22085 }
22086
22087 void
vm_map_corpse_footprint_destroy(vm_map_t map)22088 vm_map_corpse_footprint_destroy(
22089 vm_map_t map)
22090 {
22091 if (map->has_corpse_footprint &&
22092 map->vmmap_corpse_footprint != 0) {
22093 struct vm_map_corpse_footprint_header *footprint_header;
22094 vm_size_t buf_size;
22095 kern_return_t kr;
22096
22097 footprint_header = map->vmmap_corpse_footprint;
22098 buf_size = footprint_header->cf_size;
22099 kr = vm_deallocate(kernel_map,
22100 (vm_offset_t) map->vmmap_corpse_footprint,
22101 ((vm_size_t) buf_size
22102 + PAGE_SIZE)); /* trailing guard page */
22103 assertf(kr == KERN_SUCCESS, "kr=0x%x\n", kr);
22104 map->vmmap_corpse_footprint = 0;
22105 map->has_corpse_footprint = FALSE;
22106 }
22107 }
22108
22109 /*
22110 * vm_map_copy_footprint_ledgers:
22111 * copies any ledger that's relevant to the memory footprint of "old_task"
22112 * into the forked corpse's task ("new_task")
22113 */
22114 void
vm_map_copy_footprint_ledgers(task_t old_task,task_t new_task)22115 vm_map_copy_footprint_ledgers(
22116 task_t old_task,
22117 task_t new_task)
22118 {
22119 vm_map_copy_ledger(old_task, new_task, task_ledgers.phys_footprint);
22120 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile);
22121 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile_compressed);
22122 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal);
22123 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal_compressed);
22124 vm_map_copy_ledger(old_task, new_task, task_ledgers.iokit_mapped);
22125 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting);
22126 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting_compressed);
22127 vm_map_copy_ledger(old_task, new_task, task_ledgers.page_table);
22128 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint);
22129 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint_compressed);
22130 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile);
22131 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile_compressed);
22132 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint);
22133 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint_compressed);
22134 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint);
22135 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint_compressed);
22136 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint);
22137 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint_compressed);
22138 vm_map_copy_ledger(old_task, new_task, task_ledgers.wired_mem);
22139 }
22140
22141 /*
22142 * vm_map_copy_ledger:
22143 * copy a single ledger from "old_task" to "new_task"
22144 */
22145 void
vm_map_copy_ledger(task_t old_task,task_t new_task,int ledger_entry)22146 vm_map_copy_ledger(
22147 task_t old_task,
22148 task_t new_task,
22149 int ledger_entry)
22150 {
22151 ledger_amount_t old_balance, new_balance, delta;
22152
22153 assert(new_task->map->has_corpse_footprint);
22154 if (!new_task->map->has_corpse_footprint) {
22155 return;
22156 }
22157
22158 /* turn off sanity checks for the ledger we're about to mess with */
22159 ledger_disable_panic_on_negative(new_task->ledger,
22160 ledger_entry);
22161
22162 /* adjust "new_task" to match "old_task" */
22163 ledger_get_balance(old_task->ledger,
22164 ledger_entry,
22165 &old_balance);
22166 ledger_get_balance(new_task->ledger,
22167 ledger_entry,
22168 &new_balance);
22169 if (new_balance == old_balance) {
22170 /* new == old: done */
22171 } else if (new_balance > old_balance) {
22172 /* new > old ==> new -= new - old */
22173 delta = new_balance - old_balance;
22174 ledger_debit(new_task->ledger,
22175 ledger_entry,
22176 delta);
22177 } else {
22178 /* new < old ==> new += old - new */
22179 delta = old_balance - new_balance;
22180 ledger_credit(new_task->ledger,
22181 ledger_entry,
22182 delta);
22183 }
22184 }
22185
22186 /*
22187 * vm_map_get_pmap:
22188 * returns the pmap associated with the vm_map
22189 */
22190 pmap_t
vm_map_get_pmap(vm_map_t map)22191 vm_map_get_pmap(vm_map_t map)
22192 {
22193 return vm_map_pmap(map);
22194 }
22195
22196 #if CONFIG_MAP_RANGES
22197 /*
22198 * vm_map_range_map_init:
22199 * initializes the VM range ID map to enable index lookup
22200 * of user VM ranges based on VM tag from userspace.
22201 */
22202 static void
vm_map_range_map_init(void)22203 vm_map_range_map_init(void)
22204 {
22205 /* maintain status quo by default */
22206 for (int i = 0; i < VM_MEMORY_COUNT; i++) {
22207 vm_map_range_id_map[i] = UMEM_RANGE_ID_DEFAULT;
22208 }
22209
22210 /* move all MALLOC allocations to heap range */
22211 vm_map_range_id_map[VM_MEMORY_MALLOC] = UMEM_RANGE_ID_HEAP;
22212 vm_map_range_id_map[VM_MEMORY_MALLOC_HUGE] = UMEM_RANGE_ID_HEAP;
22213 vm_map_range_id_map[VM_MEMORY_MALLOC_LARGE] = UMEM_RANGE_ID_HEAP;
22214 vm_map_range_id_map[VM_MEMORY_MALLOC_LARGE_REUSABLE] = UMEM_RANGE_ID_HEAP;
22215 vm_map_range_id_map[VM_MEMORY_MALLOC_LARGE_REUSED] = UMEM_RANGE_ID_HEAP;
22216 vm_map_range_id_map[VM_MEMORY_MALLOC_MEDIUM] = UMEM_RANGE_ID_HEAP;
22217 vm_map_range_id_map[VM_MEMORY_MALLOC_NANO] = UMEM_RANGE_ID_HEAP;
22218 vm_map_range_id_map[VM_MEMORY_MALLOC_PGUARD] = UMEM_RANGE_ID_HEAP;
22219 vm_map_range_id_map[VM_MEMORY_MALLOC_PROB_GUARD] = UMEM_RANGE_ID_HEAP;
22220 vm_map_range_id_map[VM_MEMORY_MALLOC_SMALL] = UMEM_RANGE_ID_HEAP;
22221 vm_map_range_id_map[VM_MEMORY_MALLOC_TINY] = UMEM_RANGE_ID_HEAP;
22222 }
22223
22224 /*
22225 * vm_map_range_configure:
22226 * configures the user vm_map ranges by increasing the maximum VA range of
22227 * the map and carving out a range at the end of VA space (searching backwards
22228 * in the newly expanded map).
22229 */
22230 kern_return_t
vm_map_range_configure(vm_map_t map)22231 vm_map_range_configure(vm_map_t map)
22232 {
22233 vm_map_size_t addr_space_size;
22234 vm_map_offset_t start, end, saved_max, random_addr;
22235
22236 if (!vm_map_user_ranges) {
22237 return KERN_SUCCESS;
22238 }
22239
22240 /* Should not be applying ranges to kernel map or kernel map submaps */
22241 assert(map != kernel_map);
22242 assert(vm_map_pmap(map) != kernel_pmap);
22243
22244 /* save the existing max offset */
22245 vm_map_lock_read(map);
22246 saved_max = map->max_offset;
22247 vm_map_unlock_read(map);
22248
22249 /*
22250 * Check that we're not already jumbo'd. If so we cannot guarantee that
22251 * we can set up the ranges safely without interfering with the existing
22252 * map.
22253 */
22254 if (saved_max > vm_compute_max_offset(vm_map_is_64bit(map))) {
22255 return KERN_NO_SPACE;
22256 }
22257
22258 /* expand the default VM space to the largest possible address */
22259 vm_map_set_jumbo(map);
22260
22261 vm_map_lock(map);
22262 addr_space_size = map->max_offset - saved_max;
22263
22264 if (addr_space_size <= VM_MAP_USER_RANGE_MAX) {
22265 vm_map_unlock(map);
22266 return KERN_NO_SPACE;
22267 }
22268
22269 addr_space_size -= VM_MAP_USER_RANGE_MAX;
22270 random_addr = (vm_map_offset_t)random();
22271 random_addr <<= VM_MAP_PAGE_SHIFT(map);
22272 random_addr %= addr_space_size;
22273
22274 /*
22275 * round off the start so we begin on a L2 TT boundary and ensure we have
22276 * at least a ARM_TT_L2_SIZE sized hole between existing map range and
22277 * new range(s).
22278 */
22279 start = vm_map_round_page(saved_max + random_addr + 1, ARM_TT_L2_OFFMASK);
22280 end = MIN(map->max_offset, start + VM_MAP_USER_RANGE_MAX);
22281 assert(start > saved_max);
22282 assert(end <= map->max_offset);
22283
22284 /* default range covers the "normal" heap range */
22285 map->user_range[UMEM_RANGE_ID_DEFAULT].min_address = map->min_offset;
22286 map->user_range[UMEM_RANGE_ID_DEFAULT].max_address = saved_max;
22287
22288 /* heap range covers the new extended range */
22289 map->user_range[UMEM_RANGE_ID_HEAP].min_address = start;
22290 map->user_range[UMEM_RANGE_ID_HEAP].max_address = end;
22291 map->uses_user_ranges = true;
22292 vm_map_unlock(map);
22293
22294 return KERN_SUCCESS;
22295 }
22296
22297 /*
22298 * vm_map_range_fork:
22299 * clones the array of ranges from old_map to new_map in support
22300 * of a VM map fork.
22301 */
22302 void
vm_map_range_fork(vm_map_t new_map,vm_map_t old_map)22303 vm_map_range_fork(vm_map_t new_map, vm_map_t old_map)
22304 {
22305 int i = 0;
22306
22307 if (!old_map->uses_user_ranges) {
22308 /* nothing to do */
22309 return;
22310 }
22311
22312 for (i = 0; i < UMEM_RANGE_COUNT; i++) {
22313 new_map->user_range[i].min_address = old_map->user_range[i].min_address;
22314 new_map->user_range[i].max_address = old_map->user_range[i].max_address;
22315 }
22316
22317 new_map->uses_user_ranges = true;
22318 }
22319
22320 /*
22321 * vm_map_get_user_range_id:
22322 * looks up the vm_map_range_id_map lookup table to determine which range ID to
22323 * utilize for any given user memory tag. If no ranges are present return the
22324 * default range.
22325 */
22326 __attribute__((overloadable))
22327 vm_map_range_id_t
vm_map_get_user_range_id(vm_map_t map,uint16_t tag)22328 vm_map_get_user_range_id(vm_map_t map, uint16_t tag)
22329 {
22330 vm_map_range_id_t range_id = UMEM_RANGE_ID_DEFAULT;
22331
22332 if (map != NULL && map->uses_user_ranges && tag < VM_MEMORY_COUNT) {
22333 range_id = vm_map_range_id_map[tag];
22334 }
22335
22336 return range_id;
22337 }
22338
22339 /*
22340 * vm_map_get_user_range_id:
22341 * determines which range ID the given addr/size combination maps to. If
22342 * range ID cannot be determined return the default range.
22343 */
22344 __attribute__((overloadable))
22345 vm_map_range_id_t
vm_map_get_user_range_id(vm_map_t map,mach_vm_offset_t addr,mach_vm_size_t size)22346 vm_map_get_user_range_id(
22347 vm_map_t map,
22348 mach_vm_offset_t addr,
22349 mach_vm_size_t size)
22350 {
22351 vm_map_range_id_t range_id = UMEM_RANGE_ID_MAX;
22352
22353 if (map == NULL || !map->uses_user_ranges) {
22354 return UMEM_RANGE_ID_DEFAULT;
22355 }
22356
22357 for (; range_id > UMEM_RANGE_ID_DEFAULT; --range_id) {
22358 if (mach_vm_range_contains(&map->user_range[range_id], addr, size)) {
22359 break;
22360 }
22361 }
22362
22363 assert(range_id < UMEM_RANGE_COUNT);
22364 return range_id;
22365 }
22366
22367 /*
22368 * vm_map_get_user_range:
22369 * copy the VM user range for the given VM map and range ID.
22370 */
22371 kern_return_t
vm_map_get_user_range(vm_map_t map,vm_map_range_id_t range_id,mach_vm_range_t range)22372 vm_map_get_user_range(
22373 vm_map_t map,
22374 vm_map_range_id_t range_id,
22375 mach_vm_range_t range)
22376 {
22377 if (map == NULL ||
22378 !map->uses_user_ranges ||
22379 range_id > UMEM_RANGE_ID_MAX ||
22380 range == NULL) {
22381 return KERN_INVALID_ARGUMENT;
22382 }
22383
22384 *range = map->user_range[range_id];
22385 return KERN_SUCCESS;
22386 }
22387 #endif /* CONFIG_MAP_RANGES */
22388
22389 /*
22390 * vm_map_entry_has_device_pager:
22391 * Check if the vm map entry specified by the virtual address has a device pager.
22392 * If the vm map entry does not exist or if the map is NULL, this returns FALSE.
22393 */
22394 boolean_t
vm_map_entry_has_device_pager(vm_map_t map,vm_map_offset_t vaddr)22395 vm_map_entry_has_device_pager(vm_map_t map, vm_map_offset_t vaddr)
22396 {
22397 vm_map_entry_t entry;
22398 vm_object_t object;
22399 boolean_t result;
22400
22401 if (map == NULL) {
22402 return FALSE;
22403 }
22404
22405 vm_map_lock(map);
22406 while (TRUE) {
22407 if (!vm_map_lookup_entry(map, vaddr, &entry)) {
22408 result = FALSE;
22409 break;
22410 }
22411 if (entry->is_sub_map) {
22412 // Check the submap
22413 vm_map_t submap = VME_SUBMAP(entry);
22414 assert(submap != NULL);
22415 vm_map_lock(submap);
22416 vm_map_unlock(map);
22417 map = submap;
22418 continue;
22419 }
22420 object = VME_OBJECT(entry);
22421 if (object != NULL && object->pager != NULL && is_device_pager_ops(object->pager->mo_pager_ops)) {
22422 result = TRUE;
22423 break;
22424 }
22425 result = FALSE;
22426 break;
22427 }
22428
22429 vm_map_unlock(map);
22430 return result;
22431 }
22432
22433
22434 #if MACH_ASSERT
22435
22436 extern int pmap_ledgers_panic;
22437 extern int pmap_ledgers_panic_leeway;
22438
22439 #define LEDGER_DRIFT(__LEDGER) \
22440 int __LEDGER##_over; \
22441 ledger_amount_t __LEDGER##_over_total; \
22442 ledger_amount_t __LEDGER##_over_max; \
22443 int __LEDGER##_under; \
22444 ledger_amount_t __LEDGER##_under_total; \
22445 ledger_amount_t __LEDGER##_under_max
22446
22447 struct {
22448 uint64_t num_pmaps_checked;
22449
22450 LEDGER_DRIFT(phys_footprint);
22451 LEDGER_DRIFT(internal);
22452 LEDGER_DRIFT(internal_compressed);
22453 LEDGER_DRIFT(external);
22454 LEDGER_DRIFT(reusable);
22455 LEDGER_DRIFT(iokit_mapped);
22456 LEDGER_DRIFT(alternate_accounting);
22457 LEDGER_DRIFT(alternate_accounting_compressed);
22458 LEDGER_DRIFT(page_table);
22459 LEDGER_DRIFT(purgeable_volatile);
22460 LEDGER_DRIFT(purgeable_nonvolatile);
22461 LEDGER_DRIFT(purgeable_volatile_compressed);
22462 LEDGER_DRIFT(purgeable_nonvolatile_compressed);
22463 LEDGER_DRIFT(tagged_nofootprint);
22464 LEDGER_DRIFT(tagged_footprint);
22465 LEDGER_DRIFT(tagged_nofootprint_compressed);
22466 LEDGER_DRIFT(tagged_footprint_compressed);
22467 LEDGER_DRIFT(network_volatile);
22468 LEDGER_DRIFT(network_nonvolatile);
22469 LEDGER_DRIFT(network_volatile_compressed);
22470 LEDGER_DRIFT(network_nonvolatile_compressed);
22471 LEDGER_DRIFT(media_nofootprint);
22472 LEDGER_DRIFT(media_footprint);
22473 LEDGER_DRIFT(media_nofootprint_compressed);
22474 LEDGER_DRIFT(media_footprint_compressed);
22475 LEDGER_DRIFT(graphics_nofootprint);
22476 LEDGER_DRIFT(graphics_footprint);
22477 LEDGER_DRIFT(graphics_nofootprint_compressed);
22478 LEDGER_DRIFT(graphics_footprint_compressed);
22479 LEDGER_DRIFT(neural_nofootprint);
22480 LEDGER_DRIFT(neural_footprint);
22481 LEDGER_DRIFT(neural_nofootprint_compressed);
22482 LEDGER_DRIFT(neural_footprint_compressed);
22483 } pmap_ledgers_drift;
22484
22485 void
vm_map_pmap_check_ledgers(pmap_t pmap,ledger_t ledger,int pid,char * procname)22486 vm_map_pmap_check_ledgers(
22487 pmap_t pmap,
22488 ledger_t ledger,
22489 int pid,
22490 char *procname)
22491 {
22492 ledger_amount_t bal;
22493 boolean_t do_panic;
22494
22495 do_panic = FALSE;
22496
22497 pmap_ledgers_drift.num_pmaps_checked++;
22498
22499 #define LEDGER_CHECK_BALANCE(__LEDGER) \
22500 MACRO_BEGIN \
22501 int panic_on_negative = TRUE; \
22502 ledger_get_balance(ledger, \
22503 task_ledgers.__LEDGER, \
22504 &bal); \
22505 ledger_get_panic_on_negative(ledger, \
22506 task_ledgers.__LEDGER, \
22507 &panic_on_negative); \
22508 if (bal != 0) { \
22509 if (panic_on_negative || \
22510 (pmap_ledgers_panic && \
22511 pmap_ledgers_panic_leeway > 0 && \
22512 (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) || \
22513 bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
22514 do_panic = TRUE; \
22515 } \
22516 printf("LEDGER BALANCE proc %d (%s) " \
22517 "\"%s\" = %lld\n", \
22518 pid, procname, #__LEDGER, bal); \
22519 if (bal > 0) { \
22520 pmap_ledgers_drift.__LEDGER##_over++; \
22521 pmap_ledgers_drift.__LEDGER##_over_total += bal; \
22522 if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
22523 pmap_ledgers_drift.__LEDGER##_over_max = bal; \
22524 } \
22525 } else if (bal < 0) { \
22526 pmap_ledgers_drift.__LEDGER##_under++; \
22527 pmap_ledgers_drift.__LEDGER##_under_total += bal; \
22528 if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
22529 pmap_ledgers_drift.__LEDGER##_under_max = bal; \
22530 } \
22531 } \
22532 } \
22533 MACRO_END
22534
22535 LEDGER_CHECK_BALANCE(phys_footprint);
22536 LEDGER_CHECK_BALANCE(internal);
22537 LEDGER_CHECK_BALANCE(internal_compressed);
22538 LEDGER_CHECK_BALANCE(external);
22539 LEDGER_CHECK_BALANCE(reusable);
22540 LEDGER_CHECK_BALANCE(iokit_mapped);
22541 LEDGER_CHECK_BALANCE(alternate_accounting);
22542 LEDGER_CHECK_BALANCE(alternate_accounting_compressed);
22543 LEDGER_CHECK_BALANCE(page_table);
22544 LEDGER_CHECK_BALANCE(purgeable_volatile);
22545 LEDGER_CHECK_BALANCE(purgeable_nonvolatile);
22546 LEDGER_CHECK_BALANCE(purgeable_volatile_compressed);
22547 LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed);
22548 LEDGER_CHECK_BALANCE(tagged_nofootprint);
22549 LEDGER_CHECK_BALANCE(tagged_footprint);
22550 LEDGER_CHECK_BALANCE(tagged_nofootprint_compressed);
22551 LEDGER_CHECK_BALANCE(tagged_footprint_compressed);
22552 LEDGER_CHECK_BALANCE(network_volatile);
22553 LEDGER_CHECK_BALANCE(network_nonvolatile);
22554 LEDGER_CHECK_BALANCE(network_volatile_compressed);
22555 LEDGER_CHECK_BALANCE(network_nonvolatile_compressed);
22556 LEDGER_CHECK_BALANCE(media_nofootprint);
22557 LEDGER_CHECK_BALANCE(media_footprint);
22558 LEDGER_CHECK_BALANCE(media_nofootprint_compressed);
22559 LEDGER_CHECK_BALANCE(media_footprint_compressed);
22560 LEDGER_CHECK_BALANCE(graphics_nofootprint);
22561 LEDGER_CHECK_BALANCE(graphics_footprint);
22562 LEDGER_CHECK_BALANCE(graphics_nofootprint_compressed);
22563 LEDGER_CHECK_BALANCE(graphics_footprint_compressed);
22564 LEDGER_CHECK_BALANCE(neural_nofootprint);
22565 LEDGER_CHECK_BALANCE(neural_footprint);
22566 LEDGER_CHECK_BALANCE(neural_nofootprint_compressed);
22567 LEDGER_CHECK_BALANCE(neural_footprint_compressed);
22568
22569 if (do_panic) {
22570 if (pmap_ledgers_panic) {
22571 panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers",
22572 pmap, pid, procname);
22573 } else {
22574 printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
22575 pmap, pid, procname);
22576 }
22577 }
22578 }
22579
22580 void
vm_map_pmap_set_process(vm_map_t map,int pid,char * procname)22581 vm_map_pmap_set_process(
22582 vm_map_t map,
22583 int pid,
22584 char *procname)
22585 {
22586 pmap_set_process(vm_map_pmap(map), pid, procname);
22587 }
22588
22589 #endif /* MACH_ASSERT */
22590