1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66 #include <mach/vm_types.h>
67 #include <mach_assert.h>
68
69 #include <vm/vm_options.h>
70
71 #include <libkern/OSAtomic.h>
72
73 #include <mach/kern_return.h>
74 #include <mach/port.h>
75 #include <mach/vm_attributes.h>
76 #include <mach/vm_param.h>
77 #include <mach/vm_behavior.h>
78 #include <mach/vm_statistics.h>
79 #include <mach/memory_object.h>
80 #include <mach/mach_vm.h>
81 #include <machine/cpu_capabilities.h>
82 #include <mach/sdt.h>
83
84 #include <kern/assert.h>
85 #include <kern/backtrace.h>
86 #include <kern/counter.h>
87 #include <kern/exc_guard.h>
88 #include <kern/kalloc.h>
89 #include <kern/zalloc_internal.h>
90
91 #include <vm/cpm.h>
92 #include <vm/vm_compressor.h>
93 #include <vm/vm_compressor_pager.h>
94 #include <vm/vm_init.h>
95 #include <vm/vm_fault.h>
96 #include <vm/vm_map_internal.h>
97 #include <vm/vm_object.h>
98 #include <vm/vm_page.h>
99 #include <vm/vm_pageout.h>
100 #include <vm/pmap.h>
101 #include <vm/vm_kern.h>
102 #include <ipc/ipc_port.h>
103 #include <kern/sched_prim.h>
104 #include <kern/misc_protos.h>
105
106 #include <mach/vm_map_server.h>
107 #include <mach/mach_host_server.h>
108 #include <vm/vm_protos.h>
109 #include <vm/vm_purgeable_internal.h>
110 #include <vm/vm_reclaim_internal.h>
111
112 #include <vm/vm_protos.h>
113 #include <vm/vm_shared_region.h>
114 #include <vm/vm_map_store.h>
115
116 #include <san/kasan.h>
117
118 #include <sys/resource.h>
119 #include <sys/codesign.h>
120 #include <sys/code_signing.h>
121 #include <sys/mman.h>
122 #include <sys/reboot.h>
123 #include <sys/kdebug_triage.h>
124
125 #include <libkern/section_keywords.h>
126
127 #if DEVELOPMENT || DEBUG
128 extern int proc_selfcsflags(void);
129 int panic_on_unsigned_execute = 0;
130 int panic_on_mlock_failure = 0;
131 #endif /* DEVELOPMENT || DEBUG */
132
133 #if MACH_ASSERT
134 int debug4k_filter = 0;
135 char debug4k_proc_name[1024] = "";
136 int debug4k_proc_filter = (int)-1 & ~(1 << __DEBUG4K_FAULT);
137 int debug4k_panic_on_misaligned_sharing = 0;
138 const char *debug4k_category_name[] = {
139 "error", /* 0 */
140 "life", /* 1 */
141 "load", /* 2 */
142 "fault", /* 3 */
143 "copy", /* 4 */
144 "share", /* 5 */
145 "adjust", /* 6 */
146 "pmap", /* 7 */
147 "mementry", /* 8 */
148 "iokit", /* 9 */
149 "upl", /* 10 */
150 "exc", /* 11 */
151 "vfs" /* 12 */
152 };
153 #endif /* MACH_ASSERT */
154 int debug4k_no_cow_copyin = 0;
155
156
157 #if __arm64__
158 extern const int fourk_binary_compatibility_unsafe;
159 extern const int fourk_binary_compatibility_allow_wx;
160 #endif /* __arm64__ */
161 extern int proc_selfpid(void);
162 extern char *proc_name_address(void *p);
163
164 #if VM_MAP_DEBUG_APPLE_PROTECT
165 int vm_map_debug_apple_protect = 0;
166 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
167 #if VM_MAP_DEBUG_FOURK
168 int vm_map_debug_fourk = 0;
169 #endif /* VM_MAP_DEBUG_FOURK */
170
171 #if DEBUG || DEVELOPMENT
172 static TUNABLE(bool, vm_map_executable_immutable,
173 "vm_map_executable_immutable", true);
174 #else
175 #define vm_map_executable_immutable true
176 #endif
177
178 #if CONFIG_MAP_RANGES
179 static TUNABLE(bool, vm_map_user_ranges, "vm_map_user_ranges", true);
180 static SECURITY_READ_ONLY_LATE(uint8_t) vm_map_range_id_map[VM_MEMORY_COUNT];
181 #endif
182
183 os_refgrp_decl(static, map_refgrp, "vm_map", NULL);
184
185 extern u_int32_t random(void); /* from <libkern/libkern.h> */
186 /* Internal prototypes
187 */
188
189 typedef struct vm_map_zap {
190 vm_map_entry_t vmz_head;
191 vm_map_entry_t *vmz_tail;
192 } *vm_map_zap_t;
193
194 #define VM_MAP_ZAP_DECLARE(zap) \
195 struct vm_map_zap zap = { .vmz_tail = &zap.vmz_head }
196
197 static vm_map_entry_t vm_map_entry_insert(
198 vm_map_t map,
199 vm_map_entry_t insp_entry,
200 vm_map_offset_t start,
201 vm_map_offset_t end,
202 vm_object_t object,
203 vm_object_offset_t offset,
204 vm_map_kernel_flags_t vmk_flags,
205 boolean_t needs_copy,
206 vm_prot_t cur_protection,
207 vm_prot_t max_protection,
208 vm_inherit_t inheritance,
209 boolean_t no_cache,
210 boolean_t permanent,
211 unsigned int superpage_size,
212 boolean_t clear_map_aligned,
213 int alias);
214
215 static void vm_map_simplify_range(
216 vm_map_t map,
217 vm_map_offset_t start,
218 vm_map_offset_t end); /* forward */
219
220 static boolean_t vm_map_range_check(
221 vm_map_t map,
222 vm_map_offset_t start,
223 vm_map_offset_t end,
224 vm_map_entry_t *entry);
225
226 static void vm_map_submap_pmap_clean(
227 vm_map_t map,
228 vm_map_offset_t start,
229 vm_map_offset_t end,
230 vm_map_t sub_map,
231 vm_map_offset_t offset);
232
233 static void vm_map_pmap_enter(
234 vm_map_t map,
235 vm_map_offset_t addr,
236 vm_map_offset_t end_addr,
237 vm_object_t object,
238 vm_object_offset_t offset,
239 vm_prot_t protection);
240
241 static void _vm_map_clip_end(
242 struct vm_map_header *map_header,
243 vm_map_entry_t entry,
244 vm_map_offset_t end);
245
246 static void _vm_map_clip_start(
247 struct vm_map_header *map_header,
248 vm_map_entry_t entry,
249 vm_map_offset_t start);
250
251 static kmem_return_t vm_map_delete(
252 vm_map_t map,
253 vm_map_offset_t start,
254 vm_map_offset_t end,
255 vmr_flags_t flags,
256 kmem_guard_t guard,
257 vm_map_zap_t zap);
258
259 static void vm_map_copy_insert(
260 vm_map_t map,
261 vm_map_entry_t after_where,
262 vm_map_copy_t copy);
263
264 static kern_return_t vm_map_copy_overwrite_unaligned(
265 vm_map_t dst_map,
266 vm_map_entry_t entry,
267 vm_map_copy_t copy,
268 vm_map_address_t start,
269 boolean_t discard_on_success);
270
271 static kern_return_t vm_map_copy_overwrite_aligned(
272 vm_map_t dst_map,
273 vm_map_entry_t tmp_entry,
274 vm_map_copy_t copy,
275 vm_map_offset_t start,
276 pmap_t pmap);
277
278 static kern_return_t vm_map_copyin_kernel_buffer(
279 vm_map_t src_map,
280 vm_map_address_t src_addr,
281 vm_map_size_t len,
282 boolean_t src_destroy,
283 vm_map_copy_t *copy_result); /* OUT */
284
285 static kern_return_t vm_map_copyout_kernel_buffer(
286 vm_map_t map,
287 vm_map_address_t *addr, /* IN/OUT */
288 vm_map_copy_t copy,
289 vm_map_size_t copy_size,
290 boolean_t overwrite,
291 boolean_t consume_on_success);
292
293 static void vm_map_fork_share(
294 vm_map_t old_map,
295 vm_map_entry_t old_entry,
296 vm_map_t new_map);
297
298 static boolean_t vm_map_fork_copy(
299 vm_map_t old_map,
300 vm_map_entry_t *old_entry_p,
301 vm_map_t new_map,
302 int vm_map_copyin_flags);
303
304 static kern_return_t vm_map_wire_nested(
305 vm_map_t map,
306 vm_map_offset_t start,
307 vm_map_offset_t end,
308 vm_prot_t caller_prot,
309 vm_tag_t tag,
310 boolean_t user_wire,
311 pmap_t map_pmap,
312 vm_map_offset_t pmap_addr,
313 ppnum_t *physpage_p);
314
315 static kern_return_t vm_map_unwire_nested(
316 vm_map_t map,
317 vm_map_offset_t start,
318 vm_map_offset_t end,
319 boolean_t user_wire,
320 pmap_t map_pmap,
321 vm_map_offset_t pmap_addr);
322
323 static kern_return_t vm_map_overwrite_submap_recurse(
324 vm_map_t dst_map,
325 vm_map_offset_t dst_addr,
326 vm_map_size_t dst_size);
327
328 static kern_return_t vm_map_copy_overwrite_nested(
329 vm_map_t dst_map,
330 vm_map_offset_t dst_addr,
331 vm_map_copy_t copy,
332 boolean_t interruptible,
333 pmap_t pmap,
334 boolean_t discard_on_success);
335
336 static kern_return_t vm_map_remap_extract(
337 vm_map_t map,
338 vm_map_offset_t addr,
339 vm_map_size_t size,
340 boolean_t copy,
341 struct vm_map_header *map_header,
342 vm_prot_t *cur_protection,
343 vm_prot_t *max_protection,
344 vm_inherit_t inheritance,
345 vm_map_kernel_flags_t vmk_flags);
346
347 static kern_return_t vm_map_remap_range_allocate(
348 vm_map_t map,
349 vm_map_address_t *address,
350 vm_map_size_t size,
351 vm_map_offset_t mask,
352 int flags,
353 vm_map_kernel_flags_t vmk_flags,
354 vm_tag_t tag,
355 vm_map_entry_t *map_entry,
356 vm_map_zap_t zap_list);
357
358 static void vm_map_region_look_for_page(
359 vm_map_t map,
360 vm_map_offset_t va,
361 vm_object_t object,
362 vm_object_offset_t offset,
363 int max_refcnt,
364 unsigned short depth,
365 vm_region_extended_info_t extended,
366 mach_msg_type_number_t count);
367
368 static int vm_map_region_count_obj_refs(
369 vm_map_entry_t entry,
370 vm_object_t object);
371
372
373 static kern_return_t vm_map_willneed(
374 vm_map_t map,
375 vm_map_offset_t start,
376 vm_map_offset_t end);
377
378 static kern_return_t vm_map_reuse_pages(
379 vm_map_t map,
380 vm_map_offset_t start,
381 vm_map_offset_t end);
382
383 static kern_return_t vm_map_reusable_pages(
384 vm_map_t map,
385 vm_map_offset_t start,
386 vm_map_offset_t end);
387
388 static kern_return_t vm_map_can_reuse(
389 vm_map_t map,
390 vm_map_offset_t start,
391 vm_map_offset_t end);
392
393 #if MACH_ASSERT
394 static kern_return_t vm_map_pageout(
395 vm_map_t map,
396 vm_map_offset_t start,
397 vm_map_offset_t end);
398 #endif /* MACH_ASSERT */
399
400 kern_return_t vm_map_corpse_footprint_collect(
401 vm_map_t old_map,
402 vm_map_entry_t old_entry,
403 vm_map_t new_map);
404 void vm_map_corpse_footprint_collect_done(
405 vm_map_t new_map);
406 void vm_map_corpse_footprint_destroy(
407 vm_map_t map);
408 kern_return_t vm_map_corpse_footprint_query_page_info(
409 vm_map_t map,
410 vm_map_offset_t va,
411 int *disposition_p);
412 void vm_map_footprint_query_page_info(
413 vm_map_t map,
414 vm_map_entry_t map_entry,
415 vm_map_offset_t curr_s_offset,
416 int *disposition_p);
417
418 #if CONFIG_MAP_RANGES
419 static void vm_map_range_map_init(void);
420 #endif /* CONFIG_MAP_RANGES */
421
422 pid_t find_largest_process_vm_map_entries(void);
423
424 extern int exit_with_guard_exception(void *p, mach_exception_data_type_t code,
425 mach_exception_data_type_t subcode);
426
427 /*
428 * Macros to copy a vm_map_entry. We must be careful to correctly
429 * manage the wired page count. vm_map_entry_copy() creates a new
430 * map entry to the same memory - the wired count in the new entry
431 * must be set to zero. vm_map_entry_copy_full() creates a new
432 * entry that is identical to the old entry. This preserves the
433 * wire count; it's used for map splitting and zone changing in
434 * vm_map_copyout.
435 */
436
437 static inline void
vm_map_entry_copy_pmap_cs_assoc(vm_map_t map __unused,vm_map_entry_t new __unused,vm_map_entry_t old __unused)438 vm_map_entry_copy_pmap_cs_assoc(
439 vm_map_t map __unused,
440 vm_map_entry_t new __unused,
441 vm_map_entry_t old __unused)
442 {
443 /* when pmap_cs is not enabled, assert as a sanity check */
444 assert(new->pmap_cs_associated == FALSE);
445 }
446
447 /*
448 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
449 * But for security reasons on some platforms, we don't want the
450 * new mapping to be "used for jit", so we reset the flag here.
451 */
452 static inline void
vm_map_entry_copy_code_signing(vm_map_t map,vm_map_entry_t new,vm_map_entry_t old __unused)453 vm_map_entry_copy_code_signing(
454 vm_map_t map,
455 vm_map_entry_t new,
456 vm_map_entry_t old __unused)
457 {
458 if (VM_MAP_POLICY_ALLOW_JIT_COPY(map)) {
459 assert(new->used_for_jit == old->used_for_jit);
460 } else {
461 new->used_for_jit = FALSE;
462 }
463 }
464
465 static inline void
vm_map_entry_copy_full(vm_map_entry_t new,vm_map_entry_t old)466 vm_map_entry_copy_full(
467 vm_map_entry_t new,
468 vm_map_entry_t old)
469 {
470 #if MAP_ENTRY_CREATION_DEBUG
471 btref_put(new->vme_creation_bt);
472 btref_retain(old->vme_creation_bt);
473 #endif
474 #if MAP_ENTRY_INSERTION_DEBUG
475 btref_put(new->vme_insertion_bt);
476 btref_retain(old->vme_insertion_bt);
477 #endif
478 *new = *old;
479 }
480
481 static inline void
vm_map_entry_copy(vm_map_t map,vm_map_entry_t new,vm_map_entry_t old)482 vm_map_entry_copy(
483 vm_map_t map,
484 vm_map_entry_t new,
485 vm_map_entry_t old)
486 {
487 vm_map_entry_copy_full(new, old);
488
489 new->is_shared = FALSE;
490 new->needs_wakeup = FALSE;
491 new->in_transition = FALSE;
492 new->wired_count = 0;
493 new->user_wired_count = 0;
494 new->vme_permanent = FALSE;
495 vm_map_entry_copy_code_signing(map, new, old);
496 vm_map_entry_copy_pmap_cs_assoc(map, new, old);
497 if (new->iokit_acct) {
498 assertf(!new->use_pmap, "old %p new %p\n", old, new);
499 new->iokit_acct = FALSE;
500 new->use_pmap = TRUE;
501 }
502 new->vme_resilient_codesign = FALSE;
503 new->vme_resilient_media = FALSE;
504 new->vme_atomic = FALSE;
505 new->vme_no_copy_on_read = FALSE;
506 }
507
508 /*
509 * Normal lock_read_to_write() returns FALSE/0 on failure.
510 * These functions evaluate to zero on success and non-zero value on failure.
511 */
512 __attribute__((always_inline))
513 int
vm_map_lock_read_to_write(vm_map_t map)514 vm_map_lock_read_to_write(vm_map_t map)
515 {
516 if (lck_rw_lock_shared_to_exclusive(&(map)->lock)) {
517 DTRACE_VM(vm_map_lock_upgrade);
518 return 0;
519 }
520 return 1;
521 }
522
523 __attribute__((always_inline))
524 boolean_t
vm_map_try_lock(vm_map_t map)525 vm_map_try_lock(vm_map_t map)
526 {
527 if (lck_rw_try_lock_exclusive(&(map)->lock)) {
528 DTRACE_VM(vm_map_lock_w);
529 return TRUE;
530 }
531 return FALSE;
532 }
533
534 __attribute__((always_inline))
535 boolean_t
vm_map_try_lock_read(vm_map_t map)536 vm_map_try_lock_read(vm_map_t map)
537 {
538 if (lck_rw_try_lock_shared(&(map)->lock)) {
539 DTRACE_VM(vm_map_lock_r);
540 return TRUE;
541 }
542 return FALSE;
543 }
544
545 /*!
546 * @function kdp_vm_map_is_acquired_exclusive
547 *
548 * @abstract
549 * Checks if vm map is acquired exclusive.
550 *
551 * @discussion
552 * NOT SAFE: To be used only by kernel debugger.
553 *
554 * @param map map to check
555 *
556 * @returns TRUE if the map is acquired exclusively.
557 */
558 boolean_t
kdp_vm_map_is_acquired_exclusive(vm_map_t map)559 kdp_vm_map_is_acquired_exclusive(vm_map_t map)
560 {
561 return kdp_lck_rw_lock_is_acquired_exclusive(&map->lock);
562 }
563
564 /*
565 * Routines to get the page size the caller should
566 * use while inspecting the target address space.
567 * Use the "_safely" variant if the caller is dealing with a user-provided
568 * array whose size depends on the page size, to avoid any overflow or
569 * underflow of a user-allocated buffer.
570 */
571 int
vm_self_region_page_shift_safely(vm_map_t target_map)572 vm_self_region_page_shift_safely(
573 vm_map_t target_map)
574 {
575 int effective_page_shift = 0;
576
577 if (PAGE_SIZE == (4096)) {
578 /* x86_64 and 4k watches: always use 4k */
579 return PAGE_SHIFT;
580 }
581 /* did caller provide an explicit page size for this thread to use? */
582 effective_page_shift = thread_self_region_page_shift();
583 if (effective_page_shift) {
584 /* use the explicitly-provided page size */
585 return effective_page_shift;
586 }
587 /* no explicit page size: use the caller's page size... */
588 effective_page_shift = VM_MAP_PAGE_SHIFT(current_map());
589 if (effective_page_shift == VM_MAP_PAGE_SHIFT(target_map)) {
590 /* page size match: safe to use */
591 return effective_page_shift;
592 }
593 /* page size mismatch */
594 return -1;
595 }
596 int
vm_self_region_page_shift(vm_map_t target_map)597 vm_self_region_page_shift(
598 vm_map_t target_map)
599 {
600 int effective_page_shift;
601
602 effective_page_shift = vm_self_region_page_shift_safely(target_map);
603 if (effective_page_shift == -1) {
604 /* no safe value but OK to guess for caller */
605 effective_page_shift = MIN(VM_MAP_PAGE_SHIFT(current_map()),
606 VM_MAP_PAGE_SHIFT(target_map));
607 }
608 return effective_page_shift;
609 }
610
611
612 /*
613 * Decide if we want to allow processes to execute from their data or stack areas.
614 * override_nx() returns true if we do. Data/stack execution can be enabled independently
615 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
616 * or allow_stack_exec to enable data execution for that type of data area for that particular
617 * ABI (or both by or'ing the flags together). These are initialized in the architecture
618 * specific pmap files since the default behavior varies according to architecture. The
619 * main reason it varies is because of the need to provide binary compatibility with old
620 * applications that were written before these restrictions came into being. In the old
621 * days, an app could execute anything it could read, but this has slowly been tightened
622 * up over time. The default behavior is:
623 *
624 * 32-bit PPC apps may execute from both stack and data areas
625 * 32-bit Intel apps may exeucte from data areas but not stack
626 * 64-bit PPC/Intel apps may not execute from either data or stack
627 *
628 * An application on any architecture may override these defaults by explicitly
629 * adding PROT_EXEC permission to the page in question with the mprotect(2)
630 * system call. This code here just determines what happens when an app tries to
631 * execute from a page that lacks execute permission.
632 *
633 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
634 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
635 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
636 * execution from data areas for a particular binary even if the arch normally permits it. As
637 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
638 * to support some complicated use cases, notably browsers with out-of-process plugins that
639 * are not all NX-safe.
640 */
641
642 extern int allow_data_exec, allow_stack_exec;
643
644 int
override_nx(vm_map_t map,uint32_t user_tag)645 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
646 {
647 int current_abi;
648
649 if (map->pmap == kernel_pmap) {
650 return FALSE;
651 }
652
653 /*
654 * Determine if the app is running in 32 or 64 bit mode.
655 */
656
657 if (vm_map_is_64bit(map)) {
658 current_abi = VM_ABI_64;
659 } else {
660 current_abi = VM_ABI_32;
661 }
662
663 /*
664 * Determine if we should allow the execution based on whether it's a
665 * stack or data area and the current architecture.
666 */
667
668 if (user_tag == VM_MEMORY_STACK) {
669 return allow_stack_exec & current_abi;
670 }
671
672 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
673 }
674
675
676 /*
677 * Virtual memory maps provide for the mapping, protection,
678 * and sharing of virtual memory objects. In addition,
679 * this module provides for an efficient virtual copy of
680 * memory from one map to another.
681 *
682 * Synchronization is required prior to most operations.
683 *
684 * Maps consist of an ordered doubly-linked list of simple
685 * entries; a single hint is used to speed up lookups.
686 *
687 * Sharing maps have been deleted from this version of Mach.
688 * All shared objects are now mapped directly into the respective
689 * maps. This requires a change in the copy on write strategy;
690 * the asymmetric (delayed) strategy is used for shared temporary
691 * objects instead of the symmetric (shadow) strategy. All maps
692 * are now "top level" maps (either task map, kernel map or submap
693 * of the kernel map).
694 *
695 * Since portions of maps are specified by start/end addreses,
696 * which may not align with existing map entries, all
697 * routines merely "clip" entries to these start/end values.
698 * [That is, an entry is split into two, bordering at a
699 * start or end value.] Note that these clippings may not
700 * always be necessary (as the two resulting entries are then
701 * not changed); however, the clipping is done for convenience.
702 * No attempt is currently made to "glue back together" two
703 * abutting entries.
704 *
705 * The symmetric (shadow) copy strategy implements virtual copy
706 * by copying VM object references from one map to
707 * another, and then marking both regions as copy-on-write.
708 * It is important to note that only one writeable reference
709 * to a VM object region exists in any map when this strategy
710 * is used -- this means that shadow object creation can be
711 * delayed until a write operation occurs. The symmetric (delayed)
712 * strategy allows multiple maps to have writeable references to
713 * the same region of a vm object, and hence cannot delay creating
714 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
715 * Copying of permanent objects is completely different; see
716 * vm_object_copy_strategically() in vm_object.c.
717 */
718
719 ZONE_DECLARE_ID(ZONE_ID_VM_MAP_COPY, struct vm_map_copy);
720
721 #define VM_MAP_ZONE_NAME "maps"
722 #define VM_MAP_ZFLAGS ( \
723 ZC_NOENCRYPT | \
724 ZC_VM_LP64)
725
726 #define VM_MAP_ENTRY_ZONE_NAME "VM map entries"
727 #define VM_MAP_ENTRY_ZFLAGS ( \
728 ZC_NOENCRYPT | \
729 ZC_CACHING | \
730 ZC_KASAN_NOQUARANTINE | \
731 ZC_VM_LP64)
732
733 #define VM_MAP_HOLES_ZONE_NAME "VM map holes"
734 #define VM_MAP_HOLES_ZFLAGS ( \
735 ZC_NOENCRYPT | \
736 ZC_CACHING | \
737 ZC_KASAN_NOQUARANTINE | \
738 ZC_VM_LP64)
739
740 /*
741 * Asserts that a vm_map_copy object is coming from the
742 * vm_map_copy_zone to ensure that it isn't a fake constructed
743 * anywhere else.
744 */
745 void
vm_map_copy_require(struct vm_map_copy * copy)746 vm_map_copy_require(struct vm_map_copy *copy)
747 {
748 zone_id_require(ZONE_ID_VM_MAP_COPY, sizeof(struct vm_map_copy), copy);
749 }
750
751 /*
752 * vm_map_require:
753 *
754 * Ensures that the argument is memory allocated from the genuine
755 * vm map zone. (See zone_id_require_allow_foreign).
756 */
757 void
vm_map_require(vm_map_t map)758 vm_map_require(vm_map_t map)
759 {
760 zone_id_require(ZONE_ID_VM_MAP, sizeof(struct _vm_map), map);
761 }
762
763 #define VM_MAP_EARLY_COUNT_MAX 16
764 static __startup_data vm_offset_t map_data;
765 static __startup_data vm_size_t map_data_size;
766 static __startup_data vm_offset_t kentry_data;
767 static __startup_data vm_size_t kentry_data_size;
768 static __startup_data vm_offset_t map_holes_data;
769 static __startup_data vm_size_t map_holes_data_size;
770 static __startup_data vm_map_t *early_map_owners[VM_MAP_EARLY_COUNT_MAX];
771 static __startup_data uint32_t early_map_count;
772
773 #if XNU_TARGET_OS_OSX
774 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
775 #else /* XNU_TARGET_OS_OSX */
776 #define NO_COALESCE_LIMIT 0
777 #endif /* XNU_TARGET_OS_OSX */
778
779 /* Skip acquiring locks if we're in the midst of a kernel core dump */
780 unsigned int not_in_kdp = 1;
781
782 unsigned int vm_map_set_cache_attr_count = 0;
783
784 kern_return_t
vm_map_set_cache_attr(vm_map_t map,vm_map_offset_t va)785 vm_map_set_cache_attr(
786 vm_map_t map,
787 vm_map_offset_t va)
788 {
789 vm_map_entry_t map_entry;
790 vm_object_t object;
791 kern_return_t kr = KERN_SUCCESS;
792
793 vm_map_lock_read(map);
794
795 if (!vm_map_lookup_entry(map, va, &map_entry) ||
796 map_entry->is_sub_map) {
797 /*
798 * that memory is not properly mapped
799 */
800 kr = KERN_INVALID_ARGUMENT;
801 goto done;
802 }
803 object = VME_OBJECT(map_entry);
804
805 if (object == VM_OBJECT_NULL) {
806 /*
807 * there should be a VM object here at this point
808 */
809 kr = KERN_INVALID_ARGUMENT;
810 goto done;
811 }
812 vm_object_lock(object);
813 object->set_cache_attr = TRUE;
814 vm_object_unlock(object);
815
816 vm_map_set_cache_attr_count++;
817 done:
818 vm_map_unlock_read(map);
819
820 return kr;
821 }
822
823
824 #if CONFIG_CODE_DECRYPTION
825 /*
826 * vm_map_apple_protected:
827 * This remaps the requested part of the object with an object backed by
828 * the decrypting pager.
829 * crypt_info contains entry points and session data for the crypt module.
830 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
831 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
832 */
833 kern_return_t
vm_map_apple_protected(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_object_offset_t crypto_backing_offset,struct pager_crypt_info * crypt_info,uint32_t cryptid)834 vm_map_apple_protected(
835 vm_map_t map,
836 vm_map_offset_t start,
837 vm_map_offset_t end,
838 vm_object_offset_t crypto_backing_offset,
839 struct pager_crypt_info *crypt_info,
840 uint32_t cryptid)
841 {
842 boolean_t map_locked;
843 kern_return_t kr;
844 vm_map_entry_t map_entry;
845 struct vm_map_entry tmp_entry;
846 memory_object_t unprotected_mem_obj;
847 vm_object_t protected_object;
848 vm_map_offset_t map_addr;
849 vm_map_offset_t start_aligned, end_aligned;
850 vm_object_offset_t crypto_start, crypto_end;
851 int vm_flags;
852 vm_map_kernel_flags_t vmk_flags;
853 boolean_t cache_pager;
854
855 vm_flags = 0;
856 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
857
858 map_locked = FALSE;
859 unprotected_mem_obj = MEMORY_OBJECT_NULL;
860
861 start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
862 end_aligned = vm_map_round_page(end, PAGE_MASK_64);
863 start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
864 end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
865
866 #if __arm64__
867 /*
868 * "start" and "end" might be 4K-aligned but not 16K-aligned,
869 * so we might have to loop and establish up to 3 mappings:
870 *
871 * + the first 16K-page, which might overlap with the previous
872 * 4K-aligned mapping,
873 * + the center,
874 * + the last 16K-page, which might overlap with the next
875 * 4K-aligned mapping.
876 * Each of these mapping might be backed by a vnode pager (if
877 * properly page-aligned) or a "fourk_pager", itself backed by a
878 * vnode pager (if 4K-aligned but not page-aligned).
879 */
880 #endif /* __arm64__ */
881
882 map_addr = start_aligned;
883 for (map_addr = start_aligned;
884 map_addr < end;
885 map_addr = tmp_entry.vme_end) {
886 vm_map_lock(map);
887 map_locked = TRUE;
888
889 /* lookup the protected VM object */
890 if (!vm_map_lookup_entry(map,
891 map_addr,
892 &map_entry) ||
893 map_entry->is_sub_map ||
894 VME_OBJECT(map_entry) == VM_OBJECT_NULL) {
895 /* that memory is not properly mapped */
896 kr = KERN_INVALID_ARGUMENT;
897 goto done;
898 }
899
900 /* ensure mapped memory is mapped as executable except
901 * except for model decryption flow */
902 if ((cryptid != CRYPTID_MODEL_ENCRYPTION) &&
903 !(map_entry->protection & VM_PROT_EXECUTE)) {
904 kr = KERN_INVALID_ARGUMENT;
905 goto done;
906 }
907
908 /* get the protected object to be decrypted */
909 protected_object = VME_OBJECT(map_entry);
910 if (protected_object == VM_OBJECT_NULL) {
911 /* there should be a VM object here at this point */
912 kr = KERN_INVALID_ARGUMENT;
913 goto done;
914 }
915 /* ensure protected object stays alive while map is unlocked */
916 vm_object_reference(protected_object);
917
918 /* limit the map entry to the area we want to cover */
919 vm_map_clip_start(map, map_entry, start_aligned);
920 vm_map_clip_end(map, map_entry, end_aligned);
921
922 tmp_entry = *map_entry;
923 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
924 vm_map_unlock(map);
925 map_locked = FALSE;
926
927 /*
928 * This map entry might be only partially encrypted
929 * (if not fully "page-aligned").
930 */
931 crypto_start = 0;
932 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
933 if (tmp_entry.vme_start < start) {
934 if (tmp_entry.vme_start != start_aligned) {
935 kr = KERN_INVALID_ADDRESS;
936 }
937 crypto_start += (start - tmp_entry.vme_start);
938 }
939 if (tmp_entry.vme_end > end) {
940 if (tmp_entry.vme_end != end_aligned) {
941 kr = KERN_INVALID_ADDRESS;
942 }
943 crypto_end -= (tmp_entry.vme_end - end);
944 }
945
946 /*
947 * This "extra backing offset" is needed to get the decryption
948 * routine to use the right key. It adjusts for the possibly
949 * relative offset of an interposed "4K" pager...
950 */
951 if (crypto_backing_offset == (vm_object_offset_t) -1) {
952 crypto_backing_offset = VME_OFFSET(&tmp_entry);
953 }
954
955 cache_pager = TRUE;
956 #if XNU_TARGET_OS_OSX
957 if (vm_map_is_alien(map)) {
958 cache_pager = FALSE;
959 }
960 #endif /* XNU_TARGET_OS_OSX */
961
962 /*
963 * Lookup (and create if necessary) the protected memory object
964 * matching that VM object.
965 * If successful, this also grabs a reference on the memory object,
966 * to guarantee that it doesn't go away before we get a chance to map
967 * it.
968 */
969 unprotected_mem_obj = apple_protect_pager_setup(
970 protected_object,
971 VME_OFFSET(&tmp_entry),
972 crypto_backing_offset,
973 crypt_info,
974 crypto_start,
975 crypto_end,
976 cache_pager);
977
978 /* release extra ref on protected object */
979 vm_object_deallocate(protected_object);
980
981 if (unprotected_mem_obj == NULL) {
982 kr = KERN_FAILURE;
983 goto done;
984 }
985
986 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
987 /* can overwrite an immutable mapping */
988 vmk_flags.vmkf_overwrite_immutable = TRUE;
989 #if __arm64__
990 if (tmp_entry.used_for_jit &&
991 (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
992 PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
993 fourk_binary_compatibility_unsafe &&
994 fourk_binary_compatibility_allow_wx) {
995 printf("** FOURK_COMPAT [%d]: "
996 "allowing write+execute at 0x%llx\n",
997 proc_selfpid(), tmp_entry.vme_start);
998 vmk_flags.vmkf_map_jit = TRUE;
999 }
1000 #endif /* __arm64__ */
1001
1002 /* map this memory object in place of the current one */
1003 map_addr = tmp_entry.vme_start;
1004 kr = vm_map_enter_mem_object(map,
1005 &map_addr,
1006 (tmp_entry.vme_end -
1007 tmp_entry.vme_start),
1008 (mach_vm_offset_t) 0,
1009 vm_flags,
1010 vmk_flags,
1011 VM_KERN_MEMORY_NONE,
1012 (ipc_port_t)(uintptr_t) unprotected_mem_obj,
1013 0,
1014 TRUE,
1015 tmp_entry.protection,
1016 tmp_entry.max_protection,
1017 tmp_entry.inheritance);
1018 assertf(kr == KERN_SUCCESS,
1019 "kr = 0x%x\n", kr);
1020 assertf(map_addr == tmp_entry.vme_start,
1021 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
1022 (uint64_t)map_addr,
1023 (uint64_t) tmp_entry.vme_start,
1024 &tmp_entry);
1025
1026 #if VM_MAP_DEBUG_APPLE_PROTECT
1027 if (vm_map_debug_apple_protect) {
1028 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
1029 " backing:[object:%p,offset:0x%llx,"
1030 "crypto_backing_offset:0x%llx,"
1031 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
1032 map,
1033 (uint64_t) map_addr,
1034 (uint64_t) (map_addr + (tmp_entry.vme_end -
1035 tmp_entry.vme_start)),
1036 unprotected_mem_obj,
1037 protected_object,
1038 VME_OFFSET(&tmp_entry),
1039 crypto_backing_offset,
1040 crypto_start,
1041 crypto_end);
1042 }
1043 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1044
1045 /*
1046 * Release the reference obtained by
1047 * apple_protect_pager_setup().
1048 * The mapping (if it succeeded) is now holding a reference on
1049 * the memory object.
1050 */
1051 memory_object_deallocate(unprotected_mem_obj);
1052 unprotected_mem_obj = MEMORY_OBJECT_NULL;
1053
1054 /* continue with next map entry */
1055 crypto_backing_offset += (tmp_entry.vme_end -
1056 tmp_entry.vme_start);
1057 crypto_backing_offset -= crypto_start;
1058 }
1059 kr = KERN_SUCCESS;
1060
1061 done:
1062 if (map_locked) {
1063 vm_map_unlock(map);
1064 }
1065 return kr;
1066 }
1067 #endif /* CONFIG_CODE_DECRYPTION */
1068
1069
1070 LCK_GRP_DECLARE(vm_map_lck_grp, "vm_map");
1071 LCK_ATTR_DECLARE(vm_map_lck_attr, 0, 0);
1072 LCK_ATTR_DECLARE(vm_map_lck_rw_attr, 0, LCK_ATTR_DEBUG);
1073
1074 #if XNU_TARGET_OS_OSX
1075 int malloc_no_cow = 0;
1076 #else /* XNU_TARGET_OS_OSX */
1077 int malloc_no_cow = 1;
1078 #endif /* XNU_TARGET_OS_OSX */
1079 uint64_t vm_memory_malloc_no_cow_mask = 0ULL;
1080 #if DEBUG
1081 int vm_check_map_sanity = 0;
1082 #endif
1083
1084 /*
1085 * vm_map_init:
1086 *
1087 * Initialize the vm_map module. Must be called before
1088 * any other vm_map routines.
1089 *
1090 * Map and entry structures are allocated from zones -- we must
1091 * initialize those zones.
1092 *
1093 * There are three zones of interest:
1094 *
1095 * vm_map_zone: used to allocate maps.
1096 * vm_map_entry_zone: used to allocate map entries.
1097 *
1098 * LP32:
1099 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
1100 *
1101 * The kernel allocates map entries from a special zone that is initially
1102 * "crammed" with memory. It would be difficult (perhaps impossible) for
1103 * the kernel to allocate more memory to a entry zone when it became
1104 * empty since the very act of allocating memory implies the creation
1105 * of a new entry.
1106 */
1107 __startup_func
1108 void
vm_map_init(void)1109 vm_map_init(void)
1110 {
1111
1112 #if MACH_ASSERT
1113 PE_parse_boot_argn("debug4k_filter", &debug4k_filter,
1114 sizeof(debug4k_filter));
1115 #endif /* MACH_ASSERT */
1116
1117 zone_create_ext(VM_MAP_ZONE_NAME, sizeof(struct _vm_map),
1118 VM_MAP_ZFLAGS, ZONE_ID_VM_MAP, NULL);
1119
1120 /*
1121 * Don't quarantine because we always need elements available
1122 * Disallow GC on this zone... to aid the GC.
1123 */
1124 zone_create_ext(VM_MAP_ENTRY_ZONE_NAME,
1125 sizeof(struct vm_map_entry), VM_MAP_ENTRY_ZFLAGS,
1126 ZONE_ID_VM_MAP_ENTRY, ^(zone_t z) {
1127 z->z_elems_rsv = (uint16_t)(32 *
1128 (ml_early_cpu_max_number() + 1));
1129 });
1130
1131 zone_create_ext(VM_MAP_HOLES_ZONE_NAME,
1132 sizeof(struct vm_map_links), VM_MAP_HOLES_ZFLAGS,
1133 ZONE_ID_VM_MAP_HOLES, ^(zone_t z) {
1134 z->z_elems_rsv = (uint16_t)(16 * 1024 / zone_elem_size(z));
1135 });
1136
1137 zone_create_ext("VM map copies", sizeof(struct vm_map_copy),
1138 ZC_NOENCRYPT | ZC_CACHING, ZONE_ID_VM_MAP_COPY, NULL);
1139
1140 /*
1141 * Add the stolen memory to zones, adjust zone size and stolen counts.
1142 */
1143 zone_cram_early(vm_map_zone, map_data, map_data_size);
1144 zone_cram_early(vm_map_entry_zone, kentry_data, kentry_data_size);
1145 zone_cram_early(vm_map_holes_zone, map_holes_data, map_holes_data_size);
1146 printf("VM boostrap: %d maps, %d entries and %d holes available\n",
1147 vm_map_zone->z_elems_free,
1148 vm_map_entry_zone->z_elems_free,
1149 vm_map_holes_zone->z_elems_free);
1150
1151 /*
1152 * Since these are covered by zones, remove them from stolen page accounting.
1153 */
1154 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
1155
1156 #if VM_MAP_DEBUG_APPLE_PROTECT
1157 PE_parse_boot_argn("vm_map_debug_apple_protect",
1158 &vm_map_debug_apple_protect,
1159 sizeof(vm_map_debug_apple_protect));
1160 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1161 #if VM_MAP_DEBUG_APPLE_FOURK
1162 PE_parse_boot_argn("vm_map_debug_fourk",
1163 &vm_map_debug_fourk,
1164 sizeof(vm_map_debug_fourk));
1165 #endif /* VM_MAP_DEBUG_FOURK */
1166
1167 PE_parse_boot_argn("malloc_no_cow",
1168 &malloc_no_cow,
1169 sizeof(malloc_no_cow));
1170 if (malloc_no_cow) {
1171 vm_memory_malloc_no_cow_mask = 0ULL;
1172 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC;
1173 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_SMALL;
1174 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_MEDIUM;
1175 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE;
1176 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
1177 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
1178 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_TINY;
1179 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE;
1180 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED;
1181 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_NANO;
1182 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
1183 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
1184 &vm_memory_malloc_no_cow_mask,
1185 sizeof(vm_memory_malloc_no_cow_mask));
1186 }
1187
1188 #if CONFIG_MAP_RANGES
1189 vm_map_range_map_init();
1190 #endif /* CONFIG_MAP_RANGES */
1191
1192 #if DEBUG
1193 PE_parse_boot_argn("vm_check_map_sanity", &vm_check_map_sanity, sizeof(vm_check_map_sanity));
1194 if (vm_check_map_sanity) {
1195 kprintf("VM sanity checking enabled\n");
1196 } else {
1197 kprintf("VM sanity checking disabled. Set bootarg vm_check_map_sanity=1 to enable\n");
1198 }
1199 #endif /* DEBUG */
1200
1201 #if DEVELOPMENT || DEBUG
1202 PE_parse_boot_argn("panic_on_unsigned_execute",
1203 &panic_on_unsigned_execute,
1204 sizeof(panic_on_unsigned_execute));
1205 PE_parse_boot_argn("panic_on_mlock_failure",
1206 &panic_on_mlock_failure,
1207 sizeof(panic_on_mlock_failure));
1208 #endif /* DEVELOPMENT || DEBUG */
1209 }
1210
1211 __startup_func
1212 static void
vm_map_steal_memory(void)1213 vm_map_steal_memory(void)
1214 {
1215 /*
1216 * We need to reserve enough memory to support boostraping VM maps
1217 * and the zone subsystem.
1218 *
1219 * The VM Maps that need to function before zones can support them
1220 * are the ones registered with vm_map_will_allocate_early_map(),
1221 * which are:
1222 * - the kernel map
1223 * - the various submaps used by zones (pgz, meta, ...)
1224 *
1225 * We also need enough entries and holes to support them
1226 * until zone_metadata_init() is called, which is when
1227 * the zone allocator becomes capable of expanding dynamically.
1228 *
1229 * We need:
1230 * - VM_MAP_EARLY_COUNT_MAX worth of VM Maps.
1231 * - To allow for 3-4 entries per map, but the kernel map
1232 * needs a multiple of VM_MAP_EARLY_COUNT_MAX entries
1233 * to describe the submaps, so double it (and make it 8x too)
1234 * - To allow for holes between entries,
1235 * hence needs the same budget as entries
1236 */
1237 map_data_size = zone_get_early_alloc_size(VM_MAP_ZONE_NAME,
1238 sizeof(struct _vm_map), VM_MAP_ZFLAGS,
1239 VM_MAP_EARLY_COUNT_MAX);
1240
1241 kentry_data_size = zone_get_early_alloc_size(VM_MAP_ENTRY_ZONE_NAME,
1242 sizeof(struct vm_map_entry), VM_MAP_ENTRY_ZFLAGS,
1243 8 * VM_MAP_EARLY_COUNT_MAX);
1244
1245 map_holes_data_size = zone_get_early_alloc_size(VM_MAP_HOLES_ZONE_NAME,
1246 sizeof(struct vm_map_links), VM_MAP_HOLES_ZFLAGS,
1247 8 * VM_MAP_EARLY_COUNT_MAX);
1248
1249 /*
1250 * Steal a contiguous range of memory so that a simple range check
1251 * can validate early addresses being freed/crammed to these
1252 * zones
1253 */
1254 map_data = zone_early_mem_init(map_data_size + kentry_data_size +
1255 map_holes_data_size);
1256 kentry_data = map_data + map_data_size;
1257 map_holes_data = kentry_data + kentry_data_size;
1258 }
1259 STARTUP(PMAP_STEAL, STARTUP_RANK_FIRST, vm_map_steal_memory);
1260
1261 __startup_func
1262 static void
vm_kernel_boostraped(void)1263 vm_kernel_boostraped(void)
1264 {
1265 printf("VM bootstrap done: %d maps, %d entries and %d holes left\n",
1266 vm_map_zone->z_elems_free,
1267 vm_map_entry_zone->z_elems_free,
1268 vm_map_holes_zone->z_elems_free);
1269 }
1270 STARTUP(ZALLOC, STARTUP_RANK_SECOND, vm_kernel_boostraped);
1271
1272 void
vm_map_disable_hole_optimization(vm_map_t map)1273 vm_map_disable_hole_optimization(vm_map_t map)
1274 {
1275 vm_map_entry_t head_entry, hole_entry, next_hole_entry;
1276
1277 if (map->holelistenabled) {
1278 head_entry = hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1279
1280 while (hole_entry != NULL) {
1281 next_hole_entry = hole_entry->vme_next;
1282
1283 hole_entry->vme_next = NULL;
1284 hole_entry->vme_prev = NULL;
1285 zfree_id(ZONE_ID_VM_MAP_HOLES, hole_entry);
1286
1287 if (next_hole_entry == head_entry) {
1288 hole_entry = NULL;
1289 } else {
1290 hole_entry = next_hole_entry;
1291 }
1292 }
1293
1294 map->holes_list = NULL;
1295 map->holelistenabled = FALSE;
1296
1297 map->first_free = vm_map_first_entry(map);
1298 SAVE_HINT_HOLE_WRITE(map, NULL);
1299 }
1300 }
1301
1302 boolean_t
vm_kernel_map_is_kernel(vm_map_t map)1303 vm_kernel_map_is_kernel(vm_map_t map)
1304 {
1305 return map->pmap == kernel_pmap;
1306 }
1307
1308 /*
1309 * vm_map_create:
1310 *
1311 * Creates and returns a new empty VM map with
1312 * the given physical map structure, and having
1313 * the given lower and upper address bounds.
1314 */
1315
1316 extern vm_map_t vm_map_create_external(
1317 pmap_t pmap,
1318 vm_map_offset_t min_off,
1319 vm_map_offset_t max_off,
1320 boolean_t pageable);
1321
1322 vm_map_t
vm_map_create_external(pmap_t pmap,vm_map_offset_t min,vm_map_offset_t max,boolean_t pageable)1323 vm_map_create_external(
1324 pmap_t pmap,
1325 vm_map_offset_t min,
1326 vm_map_offset_t max,
1327 boolean_t pageable)
1328 {
1329 vm_map_create_options_t options = VM_MAP_CREATE_DEFAULT;
1330
1331 if (pageable) {
1332 options |= VM_MAP_CREATE_PAGEABLE;
1333 }
1334 return vm_map_create_options(pmap, min, max, options);
1335 }
1336
1337 __startup_func
1338 void
vm_map_will_allocate_early_map(vm_map_t * owner)1339 vm_map_will_allocate_early_map(vm_map_t *owner)
1340 {
1341 if (early_map_count >= VM_MAP_EARLY_COUNT_MAX) {
1342 panic("VM_MAP_EARLY_COUNT_MAX is too low");
1343 }
1344
1345 early_map_owners[early_map_count++] = owner;
1346 }
1347
1348 __startup_func
1349 void
vm_map_relocate_early_maps(vm_offset_t delta)1350 vm_map_relocate_early_maps(vm_offset_t delta)
1351 {
1352 for (uint32_t i = 0; i < early_map_count; i++) {
1353 vm_address_t addr = (vm_address_t)*early_map_owners[i];
1354
1355 *early_map_owners[i] = (vm_map_t)(addr + delta);
1356 }
1357
1358 early_map_count = ~0u;
1359 }
1360
1361 /*
1362 * Routine: vm_map_relocate_early_elem
1363 *
1364 * Purpose:
1365 * Early zone elements are allocated in a temporary part
1366 * of the address space.
1367 *
1368 * Once the zones live in their final place, the early
1369 * VM maps, map entries and map holes need to be relocated.
1370 *
1371 * It involves rewriting any vm_map_t, vm_map_entry_t or
1372 * pointers to vm_map_links. Other pointers to other types
1373 * are fine.
1374 *
1375 * Fortunately, pointers to those types are self-contained
1376 * in those zones, _except_ for pointers to VM maps,
1377 * which are tracked during early boot and fixed with
1378 * vm_map_relocate_early_maps().
1379 */
1380 __startup_func
1381 void
vm_map_relocate_early_elem(uint32_t zone_id,vm_offset_t new_addr,vm_offset_t delta)1382 vm_map_relocate_early_elem(
1383 uint32_t zone_id,
1384 vm_offset_t new_addr,
1385 vm_offset_t delta)
1386 {
1387 #define relocate(type_t, field) ({ \
1388 typeof(((type_t)NULL)->field) *__field = &((type_t)new_addr)->field; \
1389 if (*__field) { \
1390 *__field = (typeof(*__field))((vm_offset_t)*__field + delta); \
1391 } \
1392 })
1393
1394 switch (zone_id) {
1395 case ZONE_ID_VM_MAP:
1396 case ZONE_ID_VM_MAP_ENTRY:
1397 case ZONE_ID_VM_MAP_HOLES:
1398 break;
1399
1400 default:
1401 panic("Unexpected zone ID %d", zone_id);
1402 }
1403
1404 if (zone_id == ZONE_ID_VM_MAP) {
1405 relocate(vm_map_t, hdr.links.prev);
1406 relocate(vm_map_t, hdr.links.next);
1407 ((vm_map_t)new_addr)->pmap = kernel_pmap;
1408 #ifdef VM_MAP_STORE_USE_RB
1409 relocate(vm_map_t, hdr.rb_head_store.rbh_root);
1410 #endif /* VM_MAP_STORE_USE_RB */
1411 relocate(vm_map_t, hint);
1412 relocate(vm_map_t, hole_hint);
1413 relocate(vm_map_t, first_free);
1414 return;
1415 }
1416
1417 relocate(struct vm_map_links *, prev);
1418 relocate(struct vm_map_links *, next);
1419
1420 if (zone_id == ZONE_ID_VM_MAP_ENTRY) {
1421 #ifdef VM_MAP_STORE_USE_RB
1422 relocate(vm_map_entry_t, store.entry.rbe_left);
1423 relocate(vm_map_entry_t, store.entry.rbe_right);
1424 relocate(vm_map_entry_t, store.entry.rbe_parent);
1425 #endif /* VM_MAP_STORE_USE_RB */
1426 if (((vm_map_entry_t)new_addr)->is_sub_map) {
1427 /* no object to relocate because we haven't made any */
1428 ((vm_map_entry_t)new_addr)->vme_submap +=
1429 delta >> VME_SUBMAP_SHIFT;
1430 }
1431 #if MAP_ENTRY_CREATION_DEBUG
1432 relocate(vm_map_entry_t, vme_creation_maphdr);
1433 #endif /* MAP_ENTRY_CREATION_DEBUG */
1434 }
1435
1436 #undef relocate
1437 }
1438
1439 vm_map_t
vm_map_create_options(pmap_t pmap,vm_map_offset_t min,vm_map_offset_t max,vm_map_create_options_t options)1440 vm_map_create_options(
1441 pmap_t pmap,
1442 vm_map_offset_t min,
1443 vm_map_offset_t max,
1444 vm_map_create_options_t options)
1445 {
1446 vm_map_t result;
1447
1448 #if DEBUG || DEVELOPMENT
1449 if (__improbable(startup_phase < STARTUP_SUB_ZALLOC)) {
1450 if (early_map_count != ~0u && early_map_count !=
1451 zone_count_allocated(vm_map_zone) + 1) {
1452 panic("allocating %dth early map, owner not known",
1453 zone_count_allocated(vm_map_zone) + 1);
1454 }
1455 if (early_map_count != ~0u && pmap && pmap != kernel_pmap) {
1456 panic("allocating %dth early map for non kernel pmap",
1457 early_map_count);
1458 }
1459 }
1460 #endif /* DEBUG || DEVELOPMENT */
1461
1462 result = zalloc_id(ZONE_ID_VM_MAP, Z_WAITOK | Z_NOFAIL | Z_ZERO);
1463
1464 vm_map_first_entry(result) = vm_map_to_entry(result);
1465 vm_map_last_entry(result) = vm_map_to_entry(result);
1466
1467 vm_map_store_init(&result->hdr);
1468 result->hdr.entries_pageable = (bool)(options & VM_MAP_CREATE_PAGEABLE);
1469 vm_map_set_page_shift(result, PAGE_SHIFT);
1470
1471 result->size_limit = RLIM_INFINITY; /* default unlimited */
1472 result->data_limit = RLIM_INFINITY; /* default unlimited */
1473 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
1474 os_ref_init_count_raw(&result->map_refcnt, &map_refgrp, 1);
1475 result->pmap = pmap;
1476 result->min_offset = min;
1477 result->max_offset = max;
1478 result->first_free = vm_map_to_entry(result);
1479 result->hint = vm_map_to_entry(result);
1480
1481 if (options & VM_MAP_CREATE_NEVER_FAULTS) {
1482 assert(pmap == kernel_pmap);
1483 result->never_faults = true;
1484 }
1485
1486 /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1487 if (options & VM_MAP_CREATE_CORPSE_FOOTPRINT) {
1488 result->has_corpse_footprint = true;
1489 } else if (!(options & VM_MAP_CREATE_DISABLE_HOLELIST)) {
1490 struct vm_map_links *hole_entry;
1491
1492 hole_entry = zalloc_id(ZONE_ID_VM_MAP_HOLES, Z_WAITOK | Z_NOFAIL);
1493 hole_entry->start = min;
1494 #if defined(__arm64__)
1495 hole_entry->end = result->max_offset;
1496 #else
1497 hole_entry->end = MAX(max, (vm_map_offset_t)MACH_VM_MAX_ADDRESS);
1498 #endif
1499 result->holes_list = result->hole_hint = hole_entry;
1500 hole_entry->prev = hole_entry->next = CAST_TO_VM_MAP_ENTRY(hole_entry);
1501 result->holelistenabled = true;
1502 }
1503
1504 vm_map_lock_init(result);
1505
1506 return result;
1507 }
1508
1509 /*
1510 * Adjusts a submap that was made by kmem_suballoc()
1511 * before it knew where it would be mapped,
1512 * so that it has the right min/max offsets.
1513 *
1514 * We do not need to hold any locks:
1515 * only the caller knows about this map,
1516 * and it is not published on any entry yet.
1517 */
1518 static void
vm_map_adjust_offsets(vm_map_t map,vm_map_offset_t min_off,vm_map_offset_t max_off)1519 vm_map_adjust_offsets(
1520 vm_map_t map,
1521 vm_map_offset_t min_off,
1522 vm_map_offset_t max_off)
1523 {
1524 assert(map->min_offset == 0);
1525 assert(map->max_offset == max_off - min_off);
1526 assert(map->hdr.nentries == 0);
1527 assert(os_ref_get_count_raw(&map->map_refcnt) == 2);
1528
1529 map->min_offset = min_off;
1530 map->max_offset = max_off;
1531
1532 if (map->holelistenabled) {
1533 struct vm_map_links *hole = map->holes_list;
1534
1535 hole->start = min_off;
1536 #if defined(__arm64__)
1537 hole->end = max_off;
1538 #else
1539 hole->end = MAX(max_off, (vm_map_offset_t)MACH_VM_MAX_ADDRESS);
1540 #endif
1541 }
1542 }
1543
1544
1545 vm_map_size_t
vm_map_adjusted_size(vm_map_t map)1546 vm_map_adjusted_size(vm_map_t map)
1547 {
1548 struct vm_reserved_region *regions = NULL;
1549 size_t num_regions = 0;
1550 mach_vm_size_t reserved_size = 0, map_size = 0;
1551
1552 if (map == NULL || (map->size == 0)) {
1553 return 0;
1554 }
1555
1556 map_size = map->size;
1557
1558 if (map->reserved_regions == FALSE || !vm_map_is_exotic(map) || map->terminated) {
1559 /*
1560 * No special reserved regions or not an exotic map or the task
1561 * is terminating and these special regions might have already
1562 * been deallocated.
1563 */
1564 return map_size;
1565 }
1566
1567 num_regions = ml_get_vm_reserved_regions(vm_map_is_64bit(map), ®ions);
1568 assert((num_regions == 0) || (num_regions > 0 && regions != NULL));
1569
1570 while (num_regions) {
1571 reserved_size += regions[--num_regions].vmrr_size;
1572 }
1573
1574 /*
1575 * There are a few places where the map is being switched out due to
1576 * 'termination' without that bit being set (e.g. exec and corpse purging).
1577 * In those cases, we could have the map's regions being deallocated on
1578 * a core while some accounting process is trying to get the map's size.
1579 * So this assert can't be enabled till all those places are uniform in
1580 * their use of the 'map->terminated' bit.
1581 *
1582 * assert(map_size >= reserved_size);
1583 */
1584
1585 return (map_size >= reserved_size) ? (map_size - reserved_size) : map_size;
1586 }
1587
1588 /*
1589 * vm_map_entry_create: [ internal use only ]
1590 *
1591 * Allocates a VM map entry for insertion in the
1592 * given map (or map copy). No fields are filled.
1593 *
1594 * The VM entry will be zero initialized, except for:
1595 * - behavior set to VM_BEHAVIOR_DEFAULT
1596 * - inheritance set to VM_INHERIT_DEFAULT
1597 */
1598 #define vm_map_entry_create(map) _vm_map_entry_create(&(map)->hdr)
1599
1600 #define vm_map_copy_entry_create(copy) _vm_map_entry_create(&(copy)->cpy_hdr)
1601
1602 static vm_map_entry_t
_vm_map_entry_create(struct vm_map_header * map_header __unused)1603 _vm_map_entry_create(
1604 struct vm_map_header *map_header __unused)
1605 {
1606 vm_map_entry_t entry = NULL;
1607
1608 entry = zalloc_id(ZONE_ID_VM_MAP_ENTRY, Z_WAITOK | Z_ZERO);
1609
1610 /*
1611 * Help the compiler with what we know to be true,
1612 * so that the further bitfields inits have good codegen.
1613 *
1614 * See rdar://87041299
1615 */
1616 __builtin_assume(entry->vme_object_value == 0);
1617 __builtin_assume(*(uint64_t *)(&entry->vme_object_value + 1) == 0);
1618 __builtin_assume(*(uint64_t *)(&entry->vme_object_value + 2) == 0);
1619
1620 static_assert(VM_MAX_TAG_VALUE <= VME_ALIAS_MASK,
1621 "VME_ALIAS_MASK covers tags");
1622
1623 static_assert(VM_BEHAVIOR_DEFAULT == 0,
1624 "can skip zeroing of the behavior field");
1625 entry->inheritance = VM_INHERIT_DEFAULT;
1626
1627 vm_map_store_update((vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1628
1629 #if MAP_ENTRY_CREATION_DEBUG
1630 entry->vme_creation_maphdr = map_header;
1631 entry->vme_creation_bt = btref_get(__builtin_frame_address(0),
1632 BTREF_GET_NOWAIT);
1633 #endif
1634 return entry;
1635 }
1636
1637 /*
1638 * vm_map_entry_dispose: [ internal use only ]
1639 *
1640 * Inverse of vm_map_entry_create.
1641 *
1642 * write map lock held so no need to
1643 * do anything special to insure correctness
1644 * of the stores
1645 */
1646 static void
vm_map_entry_dispose(vm_map_entry_t entry)1647 vm_map_entry_dispose(
1648 vm_map_entry_t entry)
1649 {
1650 #if MAP_ENTRY_CREATION_DEBUG
1651 btref_put(entry->vme_creation_bt);
1652 #endif
1653 #if MAP_ENTRY_INSERTION_DEBUG
1654 btref_put(entry->vme_insertion_bt);
1655 #endif
1656 zfree(vm_map_entry_zone, entry);
1657 }
1658
1659 #define vm_map_copy_entry_dispose(copy_entry) \
1660 vm_map_entry_dispose(copy_entry)
1661
1662 static vm_map_entry_t
vm_map_zap_first_entry(vm_map_zap_t list)1663 vm_map_zap_first_entry(
1664 vm_map_zap_t list)
1665 {
1666 return list->vmz_head;
1667 }
1668
1669 static vm_map_entry_t
vm_map_zap_last_entry(vm_map_zap_t list)1670 vm_map_zap_last_entry(
1671 vm_map_zap_t list)
1672 {
1673 assert(vm_map_zap_first_entry(list));
1674 return __container_of(list->vmz_tail, struct vm_map_entry, vme_next);
1675 }
1676
1677 static void
vm_map_zap_append(vm_map_zap_t list,vm_map_entry_t entry)1678 vm_map_zap_append(
1679 vm_map_zap_t list,
1680 vm_map_entry_t entry)
1681 {
1682 entry->vme_next = VM_MAP_ENTRY_NULL;
1683 *list->vmz_tail = entry;
1684 list->vmz_tail = &entry->vme_next;
1685 }
1686
1687 static vm_map_entry_t
vm_map_zap_pop(vm_map_zap_t list)1688 vm_map_zap_pop(
1689 vm_map_zap_t list)
1690 {
1691 vm_map_entry_t head = list->vmz_head;
1692
1693 if (head != VM_MAP_ENTRY_NULL &&
1694 (list->vmz_head = head->vme_next) == VM_MAP_ENTRY_NULL) {
1695 list->vmz_tail = &list->vmz_head;
1696 }
1697
1698 return head;
1699 }
1700
1701 static void
vm_map_zap_dispose(vm_map_zap_t list)1702 vm_map_zap_dispose(
1703 vm_map_zap_t list)
1704 {
1705 vm_map_entry_t entry;
1706
1707 while ((entry = vm_map_zap_pop(list))) {
1708 if (entry->is_sub_map) {
1709 vm_map_deallocate(VME_SUBMAP(entry));
1710 } else {
1711 vm_object_deallocate(VME_OBJECT(entry));
1712 }
1713
1714 vm_map_entry_dispose(entry);
1715 }
1716 }
1717
1718 #if MACH_ASSERT
1719 static boolean_t first_free_check = FALSE;
1720 boolean_t
first_free_is_valid(vm_map_t map)1721 first_free_is_valid(
1722 vm_map_t map)
1723 {
1724 if (!first_free_check) {
1725 return TRUE;
1726 }
1727
1728 return first_free_is_valid_store( map );
1729 }
1730 #endif /* MACH_ASSERT */
1731
1732
1733 #define vm_map_copy_entry_link(copy, after_where, entry) \
1734 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1735
1736 #define vm_map_copy_entry_unlink(copy, entry) \
1737 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry), false)
1738
1739 /*
1740 * vm_map_destroy:
1741 *
1742 * Actually destroy a map.
1743 */
1744 void
vm_map_destroy(vm_map_t map)1745 vm_map_destroy(
1746 vm_map_t map)
1747 {
1748 /* final cleanup: this is not allowed to fail */
1749 vmr_flags_t flags = VM_MAP_REMOVE_NO_FLAGS;
1750
1751 VM_MAP_ZAP_DECLARE(zap);
1752
1753 vm_map_lock(map);
1754
1755 map->terminated = true;
1756 /* clean up regular map entries */
1757 (void)vm_map_delete(map, map->min_offset, map->max_offset, flags,
1758 KMEM_GUARD_NONE, &zap);
1759 /* clean up leftover special mappings (commpage, GPU carveout, etc...) */
1760 (void)vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL, flags,
1761 KMEM_GUARD_NONE, &zap);
1762
1763 vm_map_disable_hole_optimization(map);
1764 vm_map_corpse_footprint_destroy(map);
1765
1766 vm_map_unlock(map);
1767
1768 vm_map_zap_dispose(&zap);
1769
1770 assert(map->hdr.nentries == 0);
1771
1772 if (map->pmap) {
1773 pmap_destroy(map->pmap);
1774 }
1775
1776 lck_rw_destroy(&map->lock, &vm_map_lck_grp);
1777
1778 zfree_id(ZONE_ID_VM_MAP, map);
1779 }
1780
1781 /*
1782 * Returns pid of the task with the largest number of VM map entries.
1783 * Used in the zone-map-exhaustion jetsam path.
1784 */
1785 pid_t
find_largest_process_vm_map_entries(void)1786 find_largest_process_vm_map_entries(void)
1787 {
1788 pid_t victim_pid = -1;
1789 int max_vm_map_entries = 0;
1790 task_t task = TASK_NULL;
1791 queue_head_t *task_list = &tasks;
1792
1793 lck_mtx_lock(&tasks_threads_lock);
1794 queue_iterate(task_list, task, task_t, tasks) {
1795 if (task == kernel_task || !task->active) {
1796 continue;
1797 }
1798
1799 vm_map_t task_map = task->map;
1800 if (task_map != VM_MAP_NULL) {
1801 int task_vm_map_entries = task_map->hdr.nentries;
1802 if (task_vm_map_entries > max_vm_map_entries) {
1803 max_vm_map_entries = task_vm_map_entries;
1804 victim_pid = pid_from_task(task);
1805 }
1806 }
1807 }
1808 lck_mtx_unlock(&tasks_threads_lock);
1809
1810 printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1811 return victim_pid;
1812 }
1813
1814
1815 /*
1816 * vm_map_lookup_entry: [ internal use only ]
1817 *
1818 * Calls into the vm map store layer to find the map
1819 * entry containing (or immediately preceding) the
1820 * specified address in the given map; the entry is returned
1821 * in the "entry" parameter. The boolean
1822 * result indicates whether the address is
1823 * actually contained in the map.
1824 */
1825 boolean_t
vm_map_lookup_entry(vm_map_t map,vm_map_offset_t address,vm_map_entry_t * entry)1826 vm_map_lookup_entry(
1827 vm_map_t map,
1828 vm_map_offset_t address,
1829 vm_map_entry_t *entry) /* OUT */
1830 {
1831 #if CONFIG_KERNEL_TBI
1832 if (VM_KERNEL_ADDRESS(address)) {
1833 address = VM_KERNEL_STRIP_UPTR(address);
1834 }
1835 #endif /* CONFIG_KERNEL_TBI */
1836 #if CONFIG_PROB_GZALLOC
1837 if (map->pmap == kernel_pmap) {
1838 assertf(!pgz_owned(address),
1839 "it is the responsibility of callers to unguard PGZ addresses");
1840 }
1841 #endif /* CONFIG_PROB_GZALLOC */
1842 return vm_map_store_lookup_entry( map, address, entry );
1843 }
1844
1845 boolean_t
vm_map_lookup_entry_or_next(vm_map_t map,vm_map_offset_t address,vm_map_entry_t * entry)1846 vm_map_lookup_entry_or_next(
1847 vm_map_t map,
1848 vm_map_offset_t address,
1849 vm_map_entry_t *entry) /* OUT */
1850 {
1851 if (vm_map_lookup_entry(map, address, entry)) {
1852 return true;
1853 }
1854
1855 *entry = (*entry)->vme_next;
1856 return false;
1857 }
1858
1859 #if CONFIG_PROB_GZALLOC
1860 boolean_t
vm_map_lookup_entry_allow_pgz(vm_map_t map,vm_map_offset_t address,vm_map_entry_t * entry)1861 vm_map_lookup_entry_allow_pgz(
1862 vm_map_t map,
1863 vm_map_offset_t address,
1864 vm_map_entry_t *entry) /* OUT */
1865 {
1866 #if CONFIG_KERNEL_TBI
1867 if (VM_KERNEL_ADDRESS(address)) {
1868 address = VM_KERNEL_STRIP_UPTR(address);
1869 }
1870 #endif /* CONFIG_KERNEL_TBI */
1871 return vm_map_store_lookup_entry( map, address, entry );
1872 }
1873 #endif /* CONFIG_PROB_GZALLOC */
1874
1875 #if !ZSECURITY_CONFIG(KERNEL_DATA_SPLIT)
1876 /*
1877 * Routine: vm_map_adjust_direction
1878 * Purpose:
1879 * Overrides direction to reduce fragmentation. Allocate small
1880 * allocations from the end and large allocations from the right.
1881 */
1882 static void
vm_map_adjust_direction(vm_map_kernel_flags_t * vmk_flags,vm_map_size_t size)1883 vm_map_adjust_direction(
1884 vm_map_kernel_flags_t *vmk_flags,
1885 vm_map_size_t size)
1886 {
1887 if (size < KMEM_SMALLMAP_THRESHOLD) {
1888 vmk_flags->vmkf_last_free = true;
1889 } else {
1890 vmk_flags->vmkf_last_free = false;
1891 }
1892 }
1893 #endif /* !ZSECURITY_CONFIG(KERNEL_DATA_SPLIT) */
1894
1895 /*
1896 * Routine: vm_map_range_invalid_panic
1897 * Purpose:
1898 * Panic on detection of an invalid range id.
1899 */
1900 __abortlike
1901 static void
vm_map_range_invalid_panic(vm_map_t map,vm_map_range_id_t range_id)1902 vm_map_range_invalid_panic(
1903 vm_map_t map,
1904 vm_map_range_id_t range_id)
1905 {
1906 panic("invalid range ID (%u) for map %p", range_id, map);
1907 }
1908
1909 /*
1910 * Routine: vm_map_get_range
1911 * Purpose:
1912 * Adjust bounds based on security policy.
1913 */
1914 static struct mach_vm_range
vm_map_get_range(vm_map_t map,vm_map_address_t * address,vm_map_kernel_flags_t * vmk_flags,vm_map_size_t size)1915 vm_map_get_range(
1916 vm_map_t map,
1917 vm_map_address_t *address,
1918 vm_map_kernel_flags_t *vmk_flags,
1919 vm_map_size_t size)
1920 {
1921 struct mach_vm_range effective_range = {};
1922 vm_map_range_id_t range_id = vmk_flags->vmkf_range_id;
1923
1924 if (map == kernel_map) {
1925 effective_range = kmem_ranges[range_id];
1926
1927 if (startup_phase >= STARTUP_SUB_KMEM) {
1928 /*
1929 * Hint provided by caller is zeroed as the range is restricted to a
1930 * subset of the entire kernel_map VA, which could put the hint outside
1931 * the range, causing vm_map_store_find_space to fail.
1932 */
1933 *address = 0ull;
1934 /*
1935 * Ensure that range_id passed in by the caller is within meaningful
1936 * bounds. Range id of KMEM_RANGE_ID_NONE will cause vm_map_locate_space
1937 * to fail as the corresponding range is invalid. Range id larger than
1938 * KMEM_RANGE_ID_MAX will lead to an OOB access.
1939 */
1940 if ((range_id == KMEM_RANGE_ID_NONE) ||
1941 (range_id > KMEM_RANGE_ID_MAX)) {
1942 vm_map_range_invalid_panic(map, range_id);
1943 }
1944 #if ZSECURITY_CONFIG(KERNEL_DATA_SPLIT)
1945 /*
1946 * Each allocation front looks like [ S | L | S ]
1947 * Adjust range for allocations larger than KMEM_SMALLMAP_THRESHOLD.
1948 * Allocations smaller than KMEM_SMALLMAP_THRESHOLD are allowed to
1949 * use the entire range. Two small allocations from different fronts
1950 * (left and right) can only meet when memory in the that range is
1951 * entirely exhausted.
1952 */
1953 if (size >= KMEM_SMALLMAP_THRESHOLD) {
1954 effective_range = kmem_large_ranges[range_id];
1955 }
1956 #else /* ZSECURITY_CONFIG(KERNEL_DATA_SPLIT) */
1957 vm_map_adjust_direction(vmk_flags, size);
1958 #endif /* ZSECURITY_CONFIG(KERNEL_DATA_SPLIT) */
1959 }
1960 #if CONFIG_MAP_RANGES
1961 } else if (map->uses_user_ranges) {
1962 if (range_id > UMEM_RANGE_ID_MAX) {
1963 vm_map_range_invalid_panic(map, range_id);
1964 }
1965
1966 effective_range = map->user_range[range_id];
1967 #endif /* CONFIG_MAP_RANGES */
1968 } else {
1969 /*
1970 * If minimum is 0, bump it up by PAGE_SIZE. We want to limit
1971 * allocations of PAGEZERO to explicit requests since its
1972 * normal use is to catch dereferences of NULL and many
1973 * applications also treat pointers with a value of 0 as
1974 * special and suddenly having address 0 contain useable
1975 * memory would tend to confuse those applications.
1976 */
1977 effective_range.min_address = MAX(map->min_offset, VM_MAP_PAGE_SIZE(map));
1978 effective_range.max_address = map->max_offset;
1979 }
1980
1981 return effective_range;
1982 }
1983
1984 /*
1985 * Routine: vm_map_locate_space
1986 * Purpose:
1987 * Finds a range in the specified virtual address map,
1988 * returning the start of that range,
1989 * as well as the entry right before it.
1990 */
1991 kern_return_t
vm_map_locate_space(vm_map_t map,vm_map_size_t size,vm_map_offset_t mask,vm_map_kernel_flags_t vmk_flags,vm_map_offset_t * start_inout,vm_map_entry_t * entry_out)1992 vm_map_locate_space(
1993 vm_map_t map,
1994 vm_map_size_t size,
1995 vm_map_offset_t mask,
1996 vm_map_kernel_flags_t vmk_flags,
1997 vm_map_offset_t *start_inout,
1998 vm_map_entry_t *entry_out)
1999 {
2000 struct mach_vm_range effective_range = {};
2001 vm_map_size_t guard_offset;
2002 vm_map_offset_t hint, limit;
2003 vm_map_entry_t entry;
2004
2005 /*
2006 * Only supported by vm_map_enter() with a fixed address.
2007 */
2008 assert(!vmk_flags.vmkf_beyond_max);
2009
2010 if (__improbable(map->wait_for_space)) {
2011 /*
2012 * support for "wait_for_space" is minimal,
2013 * its only consumer is the ipc_kernel_copy_map.
2014 */
2015 assert(!map->holelistenabled &&
2016 !vmk_flags.vmkf_last_free &&
2017 !vmk_flags.vmkf_keep_map_locked &&
2018 !vmk_flags.vmkf_map_jit &&
2019 !vmk_flags.vmkf_random_address &&
2020 *start_inout <= map->min_offset);
2021 } else if (vmk_flags.vmkf_last_free) {
2022 assert(!vmk_flags.vmkf_map_jit &&
2023 !vmk_flags.vmkf_random_address);
2024 }
2025
2026 if (vmk_flags.vmkf_guard_before) {
2027 guard_offset = VM_MAP_PAGE_SIZE(map);
2028 assert(size > guard_offset);
2029 size -= guard_offset;
2030 } else {
2031 assert(size != 0);
2032 guard_offset = 0;
2033 }
2034
2035 effective_range = vm_map_get_range(map, start_inout, &vmk_flags, size);
2036 #if XNU_TARGET_OS_OSX
2037 if (__improbable(vmk_flags.vmkf_32bit_map_va)) {
2038 assert(map != kernel_map);
2039 effective_range.max_address = MIN(map->max_offset, 0x00000000FFFFF000ULL);
2040 }
2041 #endif /* XNU_TARGET_OS_OSX */
2042
2043 again:
2044 if (vmk_flags.vmkf_last_free) {
2045 hint = *start_inout;
2046
2047 if (hint == 0 || hint > effective_range.max_address) {
2048 hint = effective_range.max_address;
2049 }
2050 if (hint <= effective_range.min_address) {
2051 return KERN_NO_SPACE;
2052 }
2053 limit = effective_range.min_address;
2054 } else {
2055 hint = *start_inout;
2056
2057 if (vmk_flags.vmkf_map_jit) {
2058 if (map->jit_entry_exists &&
2059 !VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map)) {
2060 return KERN_INVALID_ARGUMENT;
2061 }
2062 if (VM_MAP_POLICY_ALLOW_JIT_RANDOM_ADDRESS(map)) {
2063 vmk_flags.vmkf_random_address = true;
2064 }
2065 }
2066
2067 if (vmk_flags.vmkf_random_address) {
2068 kern_return_t kr;
2069
2070 kr = vm_map_random_address_for_size(map, &hint, size, vmk_flags);
2071 if (kr != KERN_SUCCESS) {
2072 return kr;
2073 }
2074 }
2075 #if XNU_TARGET_OS_OSX
2076 else if ((hint == 0 || hint == vm_map_min(map)) &&
2077 !map->disable_vmentry_reuse &&
2078 map->vmmap_high_start != 0) {
2079 hint = map->vmmap_high_start;
2080 }
2081 #endif /* XNU_TARGET_OS_OSX */
2082
2083 if (hint < effective_range.min_address) {
2084 hint = effective_range.min_address;
2085 }
2086 if (effective_range.max_address <= hint) {
2087 return KERN_NO_SPACE;
2088 }
2089
2090 limit = effective_range.max_address;
2091 }
2092 entry = vm_map_store_find_space(map,
2093 hint, limit, vmk_flags.vmkf_last_free,
2094 guard_offset, size, mask,
2095 start_inout);
2096
2097 if (__improbable(entry == NULL)) {
2098 if (map->wait_for_space &&
2099 guard_offset + size <=
2100 effective_range.max_address - effective_range.min_address) {
2101 assert_wait((event_t)map, THREAD_ABORTSAFE);
2102 vm_map_unlock(map);
2103 thread_block(THREAD_CONTINUE_NULL);
2104 vm_map_lock(map);
2105 goto again;
2106 }
2107 return KERN_NO_SPACE;
2108 }
2109
2110 if (entry_out) {
2111 *entry_out = entry;
2112 }
2113 return KERN_SUCCESS;
2114 }
2115
2116
2117 /*
2118 * Routine: vm_map_find_space
2119 * Purpose:
2120 * Allocate a range in the specified virtual address map,
2121 * returning the entry allocated for that range.
2122 * Used by kmem_alloc, etc.
2123 *
2124 * The map must be NOT be locked. It will be returned locked
2125 * on KERN_SUCCESS, unlocked on failure.
2126 *
2127 * If an entry is allocated, the object/offset fields
2128 * are initialized to zero.
2129 */
2130 kern_return_t
vm_map_find_space(vm_map_t map,vm_map_offset_t hint_address,vm_map_size_t size,vm_map_offset_t mask,vm_map_kernel_flags_t vmk_flags,vm_map_entry_t * o_entry)2131 vm_map_find_space(
2132 vm_map_t map,
2133 vm_map_offset_t hint_address,
2134 vm_map_size_t size,
2135 vm_map_offset_t mask,
2136 vm_map_kernel_flags_t vmk_flags,
2137 vm_map_entry_t *o_entry) /* OUT */
2138 {
2139 vm_map_entry_t new_entry, entry;
2140 kern_return_t kr;
2141
2142 if (size == 0) {
2143 return KERN_INVALID_ARGUMENT;
2144 }
2145
2146 new_entry = vm_map_entry_create(map);
2147 new_entry->use_pmap = true;
2148 new_entry->protection = VM_PROT_DEFAULT;
2149 new_entry->max_protection = VM_PROT_ALL;
2150
2151 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
2152 new_entry->map_aligned = true;
2153 }
2154 if (vmk_flags.vmkf_permanent) {
2155 new_entry->vme_permanent = true;
2156 }
2157
2158 vm_map_lock(map);
2159
2160 kr = vm_map_locate_space(map, size, mask, vmk_flags,
2161 &hint_address, &entry);
2162 if (kr != KERN_SUCCESS) {
2163 vm_map_unlock(map);
2164 vm_map_entry_dispose(new_entry);
2165 return kr;
2166 }
2167 new_entry->vme_start = hint_address;
2168 new_entry->vme_end = hint_address + size;
2169
2170 /*
2171 * At this point,
2172 *
2173 * - new_entry's "vme_start" and "vme_end" should define
2174 * the endpoints of the available new range,
2175 *
2176 * - and "entry" should refer to the region before
2177 * the new range,
2178 *
2179 * - and the map should still be locked.
2180 */
2181
2182 assert(page_aligned(new_entry->vme_start));
2183 assert(page_aligned(new_entry->vme_end));
2184 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start, VM_MAP_PAGE_MASK(map)));
2185 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end, VM_MAP_PAGE_MASK(map)));
2186
2187 /*
2188 * Insert the new entry into the list
2189 */
2190
2191 vm_map_store_entry_link(map, entry, new_entry, VM_MAP_KERNEL_FLAGS_NONE);
2192 map->size += size;
2193
2194 /*
2195 * Update the lookup hint
2196 */
2197 SAVE_HINT_MAP_WRITE(map, new_entry);
2198
2199 *o_entry = new_entry;
2200 return KERN_SUCCESS;
2201 }
2202
2203 int vm_map_pmap_enter_print = FALSE;
2204 int vm_map_pmap_enter_enable = FALSE;
2205
2206 /*
2207 * Routine: vm_map_pmap_enter [internal only]
2208 *
2209 * Description:
2210 * Force pages from the specified object to be entered into
2211 * the pmap at the specified address if they are present.
2212 * As soon as a page not found in the object the scan ends.
2213 *
2214 * Returns:
2215 * Nothing.
2216 *
2217 * In/out conditions:
2218 * The source map should not be locked on entry.
2219 */
2220 __unused static void
vm_map_pmap_enter(vm_map_t map,vm_map_offset_t addr,vm_map_offset_t end_addr,vm_object_t object,vm_object_offset_t offset,vm_prot_t protection)2221 vm_map_pmap_enter(
2222 vm_map_t map,
2223 vm_map_offset_t addr,
2224 vm_map_offset_t end_addr,
2225 vm_object_t object,
2226 vm_object_offset_t offset,
2227 vm_prot_t protection)
2228 {
2229 int type_of_fault;
2230 kern_return_t kr;
2231 struct vm_object_fault_info fault_info = {};
2232
2233 if (map->pmap == 0) {
2234 return;
2235 }
2236
2237 assert(VM_MAP_PAGE_SHIFT(map) == PAGE_SHIFT);
2238
2239 while (addr < end_addr) {
2240 vm_page_t m;
2241
2242
2243 /*
2244 * TODO:
2245 * From vm_map_enter(), we come into this function without the map
2246 * lock held or the object lock held.
2247 * We haven't taken a reference on the object either.
2248 * We should do a proper lookup on the map to make sure
2249 * that things are sane before we go locking objects that
2250 * could have been deallocated from under us.
2251 */
2252
2253 vm_object_lock(object);
2254
2255 m = vm_page_lookup(object, offset);
2256
2257 if (m == VM_PAGE_NULL || m->vmp_busy || m->vmp_fictitious ||
2258 (m->vmp_unusual && (VMP_ERROR_GET(m) || m->vmp_restart || m->vmp_absent))) {
2259 vm_object_unlock(object);
2260 return;
2261 }
2262
2263 if (vm_map_pmap_enter_print) {
2264 printf("vm_map_pmap_enter:");
2265 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
2266 map, (unsigned long long)addr, object, (unsigned long long)offset);
2267 }
2268 type_of_fault = DBG_CACHE_HIT_FAULT;
2269 kr = vm_fault_enter(m, map->pmap,
2270 addr,
2271 PAGE_SIZE, 0,
2272 protection, protection,
2273 VM_PAGE_WIRED(m),
2274 FALSE, /* change_wiring */
2275 VM_KERN_MEMORY_NONE, /* tag - not wiring */
2276 &fault_info,
2277 NULL, /* need_retry */
2278 &type_of_fault);
2279
2280 vm_object_unlock(object);
2281
2282 offset += PAGE_SIZE_64;
2283 addr += PAGE_SIZE;
2284 }
2285 }
2286
2287 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
2288 kern_return_t
vm_map_random_address_for_size(vm_map_t map,vm_map_offset_t * address,vm_map_size_t size,vm_map_kernel_flags_t vmk_flags)2289 vm_map_random_address_for_size(
2290 vm_map_t map,
2291 vm_map_offset_t *address,
2292 vm_map_size_t size,
2293 vm_map_kernel_flags_t vmk_flags)
2294 {
2295 kern_return_t kr = KERN_SUCCESS;
2296 int tries = 0;
2297 vm_map_offset_t random_addr = 0;
2298 vm_map_offset_t hole_end;
2299
2300 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
2301 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
2302 vm_map_size_t vm_hole_size = 0;
2303 vm_map_size_t addr_space_size;
2304 struct mach_vm_range effective_range = vm_map_get_range(map, address, &vmk_flags, size);
2305
2306 addr_space_size = effective_range.max_address - effective_range.min_address;
2307 if (size >= addr_space_size) {
2308 return KERN_NO_SPACE;
2309 }
2310 addr_space_size -= size;
2311
2312 assert(VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map)));
2313
2314 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2315 if (startup_phase < STARTUP_SUB_ZALLOC) {
2316 random_addr = (vm_map_offset_t)early_random();
2317 } else {
2318 random_addr = (vm_map_offset_t)random();
2319 }
2320 random_addr <<= VM_MAP_PAGE_SHIFT(map);
2321 random_addr = vm_map_trunc_page(
2322 effective_range.min_address + (random_addr % addr_space_size),
2323 VM_MAP_PAGE_MASK(map));
2324
2325 #if CONFIG_PROB_GZALLOC
2326 if (map->pmap == kernel_pmap && pgz_owned(random_addr)) {
2327 continue;
2328 }
2329 #endif /* CONFIG_PROB_GZALLOC */
2330
2331 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
2332 if (prev_entry == vm_map_to_entry(map)) {
2333 next_entry = vm_map_first_entry(map);
2334 } else {
2335 next_entry = prev_entry->vme_next;
2336 }
2337 if (next_entry == vm_map_to_entry(map)) {
2338 hole_end = vm_map_max(map);
2339 } else {
2340 hole_end = next_entry->vme_start;
2341 }
2342 vm_hole_size = hole_end - random_addr;
2343 if (vm_hole_size >= size) {
2344 *address = random_addr;
2345 break;
2346 }
2347 }
2348 tries++;
2349 }
2350
2351 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2352 kr = KERN_NO_SPACE;
2353 }
2354 return kr;
2355 }
2356
2357 static boolean_t
vm_memory_malloc_no_cow(int alias)2358 vm_memory_malloc_no_cow(
2359 int alias)
2360 {
2361 uint64_t alias_mask;
2362
2363 if (alias > 63) {
2364 return FALSE;
2365 }
2366
2367 alias_mask = 1ULL << alias;
2368 if (alias_mask & vm_memory_malloc_no_cow_mask) {
2369 return TRUE;
2370 }
2371 return FALSE;
2372 }
2373
2374 uint64_t vm_map_enter_RLIMIT_AS_count = 0;
2375 uint64_t vm_map_enter_RLIMIT_DATA_count = 0;
2376 /*
2377 * Routine: vm_map_enter
2378 *
2379 * Description:
2380 * Allocate a range in the specified virtual address map.
2381 * The resulting range will refer to memory defined by
2382 * the given memory object and offset into that object.
2383 *
2384 * Arguments are as defined in the vm_map call.
2385 */
2386 static unsigned int vm_map_enter_restore_successes = 0;
2387 static unsigned int vm_map_enter_restore_failures = 0;
2388 kern_return_t
vm_map_enter(vm_map_t map,vm_map_offset_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t alias,vm_object_t object,vm_object_offset_t offset,boolean_t needs_copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)2389 vm_map_enter(
2390 vm_map_t map,
2391 vm_map_offset_t *address, /* IN/OUT */
2392 vm_map_size_t size,
2393 vm_map_offset_t mask,
2394 int flags,
2395 vm_map_kernel_flags_t vmk_flags,
2396 vm_tag_t alias,
2397 vm_object_t object,
2398 vm_object_offset_t offset,
2399 boolean_t needs_copy,
2400 vm_prot_t cur_protection,
2401 vm_prot_t max_protection,
2402 vm_inherit_t inheritance)
2403 {
2404 vm_map_entry_t entry, new_entry;
2405 vm_map_offset_t start, tmp_start, tmp_offset;
2406 vm_map_offset_t end, tmp_end;
2407 vm_map_offset_t tmp2_start, tmp2_end;
2408 vm_map_offset_t step;
2409 kern_return_t result = KERN_SUCCESS;
2410 boolean_t map_locked = FALSE;
2411 boolean_t pmap_empty = TRUE;
2412 boolean_t new_mapping_established = FALSE;
2413 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
2414 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
2415 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
2416 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2417 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
2418 const boolean_t is_submap = vmk_flags.vmkf_submap;
2419 boolean_t permanent = (((flags & VM_FLAGS_PERMANENT) != 0) || vmk_flags.vmkf_permanent);
2420 const boolean_t no_copy_on_read = vmk_flags.vmkf_no_copy_on_read;
2421 const boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
2422 boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
2423 boolean_t resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
2424 boolean_t resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
2425 boolean_t entry_for_tpro = ((flags & VM_FLAGS_TPRO) != 0);
2426 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
2427 vm_tag_t user_alias;
2428 kern_return_t kr;
2429 boolean_t clear_map_aligned = FALSE;
2430 vm_map_size_t chunk_size = 0;
2431 vm_object_t caller_object;
2432 VM_MAP_ZAP_DECLARE(zap_old_list);
2433 VM_MAP_ZAP_DECLARE(zap_new_list);
2434
2435 caller_object = object;
2436
2437 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2438
2439 if (flags & VM_FLAGS_4GB_CHUNK) {
2440 #if defined(__LP64__)
2441 chunk_size = (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2442 #else /* __LP64__ */
2443 chunk_size = ANON_CHUNK_SIZE;
2444 #endif /* __LP64__ */
2445 } else {
2446 chunk_size = ANON_CHUNK_SIZE;
2447 }
2448
2449 if (superpage_size) {
2450 switch (superpage_size) {
2451 /*
2452 * Note that the current implementation only supports
2453 * a single size for superpages, SUPERPAGE_SIZE, per
2454 * architecture. As soon as more sizes are supposed
2455 * to be supported, SUPERPAGE_SIZE has to be replaced
2456 * with a lookup of the size depending on superpage_size.
2457 */
2458 #ifdef __x86_64__
2459 case SUPERPAGE_SIZE_ANY:
2460 /* handle it like 2 MB and round up to page size */
2461 size = (size + 2 * 1024 * 1024 - 1) & ~(2 * 1024 * 1024 - 1);
2462 OS_FALLTHROUGH;
2463 case SUPERPAGE_SIZE_2MB:
2464 break;
2465 #endif
2466 default:
2467 return KERN_INVALID_ARGUMENT;
2468 }
2469 mask = SUPERPAGE_SIZE - 1;
2470 if (size & (SUPERPAGE_SIZE - 1)) {
2471 return KERN_INVALID_ARGUMENT;
2472 }
2473 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
2474 }
2475
2476
2477 if ((cur_protection & VM_PROT_WRITE) &&
2478 (cur_protection & VM_PROT_EXECUTE) &&
2479 #if XNU_TARGET_OS_OSX
2480 map->pmap != kernel_pmap &&
2481 (cs_process_global_enforcement() ||
2482 (vmk_flags.vmkf_cs_enforcement_override
2483 ? vmk_flags.vmkf_cs_enforcement
2484 : (vm_map_cs_enforcement(map)
2485 #if __arm64__
2486 || !VM_MAP_IS_EXOTIC(map)
2487 #endif /* __arm64__ */
2488 ))) &&
2489 #endif /* XNU_TARGET_OS_OSX */
2490 (VM_MAP_POLICY_WX_FAIL(map) ||
2491 VM_MAP_POLICY_WX_STRIP_X(map)) &&
2492 !entry_for_jit) {
2493 boolean_t vm_protect_wx_fail = VM_MAP_POLICY_WX_FAIL(map);
2494
2495 DTRACE_VM3(cs_wx,
2496 uint64_t, 0,
2497 uint64_t, 0,
2498 vm_prot_t, cur_protection);
2499 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. %s\n",
2500 proc_selfpid(),
2501 (get_bsdtask_info(current_task())
2502 ? proc_name_address(get_bsdtask_info(current_task()))
2503 : "?"),
2504 __FUNCTION__,
2505 (vm_protect_wx_fail ? "failing" : "turning off execute"));
2506 cur_protection &= ~VM_PROT_EXECUTE;
2507 if (vm_protect_wx_fail) {
2508 return KERN_PROTECTION_FAILURE;
2509 }
2510 }
2511
2512 /*
2513 * If the task has requested executable lockdown,
2514 * deny any new executable mapping.
2515 */
2516 if (map->map_disallow_new_exec == TRUE) {
2517 if (cur_protection & VM_PROT_EXECUTE) {
2518 return KERN_PROTECTION_FAILURE;
2519 }
2520 }
2521
2522 if (resilient_codesign) {
2523 assert(!is_submap);
2524 int reject_prot = (needs_copy ? VM_PROT_ALLEXEC : (VM_PROT_WRITE | VM_PROT_ALLEXEC));
2525 if ((cur_protection | max_protection) & reject_prot) {
2526 return KERN_PROTECTION_FAILURE;
2527 }
2528 }
2529
2530 if (resilient_media) {
2531 assert(!is_submap);
2532 // assert(!needs_copy);
2533 if (object != VM_OBJECT_NULL &&
2534 !object->internal) {
2535 /*
2536 * This mapping is directly backed by an external
2537 * memory manager (e.g. a vnode pager for a file):
2538 * we would not have any safe place to inject
2539 * a zero-filled page if an actual page is not
2540 * available, without possibly impacting the actual
2541 * contents of the mapped object (e.g. the file),
2542 * so we can't provide any media resiliency here.
2543 */
2544 return KERN_INVALID_ARGUMENT;
2545 }
2546 }
2547
2548 if (is_submap) {
2549 vm_map_t submap;
2550 if (purgable) {
2551 /* submaps can not be purgeable */
2552 return KERN_INVALID_ARGUMENT;
2553 }
2554 if (object == VM_OBJECT_NULL) {
2555 /* submaps can not be created lazily */
2556 return KERN_INVALID_ARGUMENT;
2557 }
2558 submap = (vm_map_t) object;
2559 if (VM_MAP_PAGE_SHIFT(submap) != VM_MAP_PAGE_SHIFT(map)) {
2560 /* page size mismatch */
2561 return KERN_INVALID_ARGUMENT;
2562 }
2563 }
2564 if (vmk_flags.vmkf_already) {
2565 /*
2566 * VM_FLAGS_ALREADY says that it's OK if the same mapping
2567 * is already present. For it to be meaningul, the requested
2568 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2569 * we shouldn't try and remove what was mapped there first
2570 * (!VM_FLAGS_OVERWRITE).
2571 */
2572 if ((flags & VM_FLAGS_ANYWHERE) ||
2573 (flags & VM_FLAGS_OVERWRITE)) {
2574 return KERN_INVALID_ARGUMENT;
2575 }
2576 }
2577
2578 if (size == 0 ||
2579 (offset & MIN(VM_MAP_PAGE_MASK(map), PAGE_MASK_64)) != 0) {
2580 *address = 0;
2581 return KERN_INVALID_ARGUMENT;
2582 }
2583
2584 if (map->pmap == kernel_pmap) {
2585 user_alias = VM_KERN_MEMORY_NONE;
2586 } else {
2587 user_alias = alias;
2588 }
2589
2590 if (user_alias == VM_MEMORY_MALLOC_MEDIUM) {
2591 chunk_size = MALLOC_MEDIUM_CHUNK_SIZE;
2592 }
2593
2594 #define RETURN(value) { result = value; goto BailOut; }
2595
2596 assertf(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK), "0x%llx", (uint64_t)*address);
2597 assertf(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK), "0x%llx", (uint64_t)size);
2598 if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK) {
2599 assertf(page_aligned(*address), "0x%llx", (uint64_t)*address);
2600 assertf(page_aligned(size), "0x%llx", (uint64_t)size);
2601 }
2602
2603 if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK &&
2604 !VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
2605 /*
2606 * In most cases, the caller rounds the size up to the
2607 * map's page size.
2608 * If we get a size that is explicitly not map-aligned here,
2609 * we'll have to respect the caller's wish and mark the
2610 * mapping as "not map-aligned" to avoid tripping the
2611 * map alignment checks later.
2612 */
2613 clear_map_aligned = TRUE;
2614 }
2615 if (!anywhere &&
2616 VM_MAP_PAGE_MASK(map) >= PAGE_MASK &&
2617 !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2618 /*
2619 * We've been asked to map at a fixed address and that
2620 * address is not aligned to the map's specific alignment.
2621 * The caller should know what it's doing (i.e. most likely
2622 * mapping some fragmented copy map, transferring memory from
2623 * a VM map with a different alignment), so clear map_aligned
2624 * for this new VM map entry and proceed.
2625 */
2626 clear_map_aligned = TRUE;
2627 }
2628
2629 /*
2630 * Only zero-fill objects are allowed to be purgable.
2631 * LP64todo - limit purgable objects to 32-bits for now
2632 */
2633 if (purgable &&
2634 (offset != 0 ||
2635 (object != VM_OBJECT_NULL &&
2636 (object->vo_size != size ||
2637 object->purgable == VM_PURGABLE_DENY))
2638 #if __LP64__
2639 || size > ANON_MAX_SIZE
2640 #endif
2641 )) {
2642 return KERN_INVALID_ARGUMENT;
2643 }
2644
2645 start = *address;
2646
2647 if (anywhere) {
2648 vm_map_lock(map);
2649 map_locked = TRUE;
2650
2651 if (flags & VM_FLAGS_RANDOM_ADDR) {
2652 vmk_flags.vmkf_random_address = true;
2653 }
2654
2655 result = vm_map_locate_space(map, size, mask, vmk_flags,
2656 &start, &entry);
2657 if (result != KERN_SUCCESS) {
2658 goto BailOut;
2659 }
2660
2661 *address = start;
2662 end = start + size;
2663 assert(VM_MAP_PAGE_ALIGNED(*address,
2664 VM_MAP_PAGE_MASK(map)));
2665 } else {
2666 vm_map_offset_t effective_min_offset, effective_max_offset;
2667
2668 effective_min_offset = map->min_offset;
2669 effective_max_offset = map->max_offset;
2670
2671 if (vmk_flags.vmkf_beyond_max) {
2672 /*
2673 * Allow an insertion beyond the map's max offset.
2674 */
2675 effective_max_offset = 0x00000000FFFFF000ULL;
2676 if (vm_map_is_64bit(map)) {
2677 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
2678 }
2679 #if XNU_TARGET_OS_OSX
2680 } else if (__improbable(vmk_flags.vmkf_32bit_map_va)) {
2681 effective_max_offset = MIN(map->max_offset, 0x00000000FFFFF000ULL);
2682 #endif /* XNU_TARGET_OS_OSX */
2683 }
2684
2685 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT &&
2686 !overwrite &&
2687 user_alias == VM_MEMORY_REALLOC) {
2688 /*
2689 * Force realloc() to switch to a new allocation,
2690 * to prevent 4k-fragmented virtual ranges.
2691 */
2692 // DEBUG4K_ERROR("no realloc in place");
2693 return KERN_NO_SPACE;
2694 }
2695
2696 /*
2697 * Verify that:
2698 * the address doesn't itself violate
2699 * the mask requirement.
2700 */
2701
2702 vm_map_lock(map);
2703 map_locked = TRUE;
2704 if ((start & mask) != 0) {
2705 RETURN(KERN_NO_SPACE);
2706 }
2707
2708 /*
2709 * ... the address is within bounds
2710 */
2711
2712 end = start + size;
2713
2714 if ((start < effective_min_offset) ||
2715 (end > effective_max_offset) ||
2716 (start >= end)) {
2717 RETURN(KERN_INVALID_ADDRESS);
2718 }
2719
2720 if (overwrite) {
2721 vmr_flags_t remove_flags = VM_MAP_REMOVE_NO_MAP_ALIGN;
2722 kern_return_t remove_kr;
2723
2724 /*
2725 * Fixed mapping and "overwrite" flag: attempt to
2726 * remove all existing mappings in the specified
2727 * address range, saving them in our "zap_old_list".
2728 *
2729 * This avoids releasing the VM map lock in
2730 * vm_map_entry_delete() and allows atomicity
2731 * when we want to replace some mappings with a new one.
2732 * It also allows us to restore the old VM mappings if the
2733 * new mapping fails.
2734 */
2735 remove_flags |= VM_MAP_REMOVE_NO_YIELD;
2736
2737 if (vmk_flags.vmkf_overwrite_immutable) {
2738 /* we can overwrite immutable mappings */
2739 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2740 }
2741 if (vmk_flags.vmkf_remap_prot_copy) {
2742 remove_flags |= VM_MAP_REMOVE_IMMUTABLE_CODE;
2743 }
2744 remove_kr = vm_map_delete(map, start, end, remove_flags,
2745 KMEM_GUARD_NONE, &zap_old_list).kmr_return;
2746 if (remove_kr) {
2747 /* XXX FBDP restore zap_old_list? */
2748 RETURN(remove_kr);
2749 }
2750 }
2751
2752 /*
2753 * ... the starting address isn't allocated
2754 */
2755
2756 if (vm_map_lookup_entry(map, start, &entry)) {
2757 if (!(vmk_flags.vmkf_already)) {
2758 RETURN(KERN_NO_SPACE);
2759 }
2760 /*
2761 * Check if what's already there is what we want.
2762 */
2763 tmp_start = start;
2764 tmp_offset = offset;
2765 if (entry->vme_start < start) {
2766 tmp_start -= start - entry->vme_start;
2767 tmp_offset -= start - entry->vme_start;
2768 }
2769 for (; entry->vme_start < end;
2770 entry = entry->vme_next) {
2771 /*
2772 * Check if the mapping's attributes
2773 * match the existing map entry.
2774 */
2775 if (entry == vm_map_to_entry(map) ||
2776 entry->vme_start != tmp_start ||
2777 entry->is_sub_map != is_submap ||
2778 VME_OFFSET(entry) != tmp_offset ||
2779 entry->needs_copy != needs_copy ||
2780 entry->protection != cur_protection ||
2781 entry->max_protection != max_protection ||
2782 entry->inheritance != inheritance ||
2783 entry->iokit_acct != iokit_acct ||
2784 VME_ALIAS(entry) != alias) {
2785 /* not the same mapping ! */
2786 RETURN(KERN_NO_SPACE);
2787 }
2788 /*
2789 * Check if the same object is being mapped.
2790 */
2791 if (is_submap) {
2792 if (VME_SUBMAP(entry) !=
2793 (vm_map_t) object) {
2794 /* not the same submap */
2795 RETURN(KERN_NO_SPACE);
2796 }
2797 } else {
2798 if (VME_OBJECT(entry) != object) {
2799 /* not the same VM object... */
2800 vm_object_t obj2;
2801
2802 obj2 = VME_OBJECT(entry);
2803 if ((obj2 == VM_OBJECT_NULL ||
2804 obj2->internal) &&
2805 (object == VM_OBJECT_NULL ||
2806 object->internal)) {
2807 /*
2808 * ... but both are
2809 * anonymous memory,
2810 * so equivalent.
2811 */
2812 } else {
2813 RETURN(KERN_NO_SPACE);
2814 }
2815 }
2816 }
2817
2818 tmp_offset += entry->vme_end - entry->vme_start;
2819 tmp_start += entry->vme_end - entry->vme_start;
2820 if (entry->vme_end >= end) {
2821 /* reached the end of our mapping */
2822 break;
2823 }
2824 }
2825 /* it all matches: let's use what's already there ! */
2826 RETURN(KERN_MEMORY_PRESENT);
2827 }
2828
2829 /*
2830 * ... the next region doesn't overlap the
2831 * end point.
2832 */
2833
2834 if ((entry->vme_next != vm_map_to_entry(map)) &&
2835 (entry->vme_next->vme_start < end)) {
2836 RETURN(KERN_NO_SPACE);
2837 }
2838 }
2839
2840 /*
2841 * At this point,
2842 * "start" and "end" should define the endpoints of the
2843 * available new range, and
2844 * "entry" should refer to the region before the new
2845 * range, and
2846 *
2847 * the map should be locked.
2848 */
2849
2850 /*
2851 * See whether we can avoid creating a new entry (and object) by
2852 * extending one of our neighbors. [So far, we only attempt to
2853 * extend from below.] Note that we can never extend/join
2854 * purgable objects because they need to remain distinct
2855 * entities in order to implement their "volatile object"
2856 * semantics.
2857 */
2858
2859 if (purgable ||
2860 entry_for_jit ||
2861 entry_for_tpro ||
2862 vm_memory_malloc_no_cow(user_alias)) {
2863 if (object == VM_OBJECT_NULL) {
2864 object = vm_object_allocate(size);
2865 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2866 object->true_share = FALSE;
2867 if (purgable) {
2868 task_t owner;
2869 object->purgable = VM_PURGABLE_NONVOLATILE;
2870 if (map->pmap == kernel_pmap) {
2871 /*
2872 * Purgeable mappings made in a kernel
2873 * map are "owned" by the kernel itself
2874 * rather than the current user task
2875 * because they're likely to be used by
2876 * more than this user task (see
2877 * execargs_purgeable_allocate(), for
2878 * example).
2879 */
2880 owner = kernel_task;
2881 } else {
2882 owner = current_task();
2883 }
2884 assert(object->vo_owner == NULL);
2885 assert(object->resident_page_count == 0);
2886 assert(object->wired_page_count == 0);
2887 vm_object_lock(object);
2888 vm_purgeable_nonvolatile_enqueue(object, owner);
2889 vm_object_unlock(object);
2890 }
2891 offset = (vm_object_offset_t)0;
2892 }
2893 } else if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
2894 /* no coalescing if address space uses sub-pages */
2895 } else if ((is_submap == FALSE) &&
2896 (object == VM_OBJECT_NULL) &&
2897 (entry != vm_map_to_entry(map)) &&
2898 (entry->vme_end == start) &&
2899 (!entry->is_shared) &&
2900 (!entry->is_sub_map) &&
2901 (!entry->in_transition) &&
2902 (!entry->needs_wakeup) &&
2903 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2904 (entry->protection == cur_protection) &&
2905 (entry->max_protection == max_protection) &&
2906 (entry->inheritance == inheritance) &&
2907 ((user_alias == VM_MEMORY_REALLOC) ||
2908 (VME_ALIAS(entry) == alias)) &&
2909 (entry->no_cache == no_cache) &&
2910 (entry->vme_permanent == permanent) &&
2911 /* no coalescing for immutable executable mappings */
2912 !((entry->protection & VM_PROT_EXECUTE) &&
2913 entry->vme_permanent) &&
2914 (!entry->superpage_size && !superpage_size) &&
2915 /*
2916 * No coalescing if not map-aligned, to avoid propagating
2917 * that condition any further than needed:
2918 */
2919 (!entry->map_aligned || !clear_map_aligned) &&
2920 (!entry->zero_wired_pages) &&
2921 (!entry->used_for_jit && !entry_for_jit) &&
2922 (!entry->pmap_cs_associated) &&
2923 (entry->iokit_acct == iokit_acct) &&
2924 (!entry->vme_resilient_codesign) &&
2925 (!entry->vme_resilient_media) &&
2926 (!entry->vme_atomic) &&
2927 (entry->vme_no_copy_on_read == no_copy_on_read) &&
2928
2929 ((entry->vme_end - entry->vme_start) + size <=
2930 (user_alias == VM_MEMORY_REALLOC ?
2931 ANON_CHUNK_SIZE :
2932 NO_COALESCE_LIMIT)) &&
2933
2934 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
2935 if (vm_object_coalesce(VME_OBJECT(entry),
2936 VM_OBJECT_NULL,
2937 VME_OFFSET(entry),
2938 (vm_object_offset_t) 0,
2939 (vm_map_size_t)(entry->vme_end - entry->vme_start),
2940 (vm_map_size_t)(end - entry->vme_end))) {
2941 /*
2942 * Coalesced the two objects - can extend
2943 * the previous map entry to include the
2944 * new range.
2945 */
2946 map->size += (end - entry->vme_end);
2947 assert(entry->vme_start < end);
2948 assert(VM_MAP_PAGE_ALIGNED(end,
2949 VM_MAP_PAGE_MASK(map)));
2950 if (__improbable(vm_debug_events)) {
2951 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
2952 }
2953 entry->vme_end = end;
2954 if (map->holelistenabled) {
2955 vm_map_store_update_first_free(map, entry, TRUE);
2956 } else {
2957 vm_map_store_update_first_free(map, map->first_free, TRUE);
2958 }
2959 new_mapping_established = TRUE;
2960 RETURN(KERN_SUCCESS);
2961 }
2962 }
2963
2964 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2965 new_entry = NULL;
2966
2967 if (vmk_flags.vmkf_submap_adjust) {
2968 vm_map_adjust_offsets((vm_map_t)caller_object, start, end);
2969 offset = start;
2970 }
2971
2972 for (tmp2_start = start; tmp2_start < end; tmp2_start += step) {
2973 tmp2_end = tmp2_start + step;
2974 /*
2975 * Create a new entry
2976 *
2977 * XXX FBDP
2978 * The reserved "page zero" in each process's address space can
2979 * be arbitrarily large. Splitting it into separate objects and
2980 * therefore different VM map entries serves no purpose and just
2981 * slows down operations on the VM map, so let's not split the
2982 * allocation into chunks if the max protection is NONE. That
2983 * memory should never be accessible, so it will never get to the
2984 * default pager.
2985 */
2986 tmp_start = tmp2_start;
2987 if (!is_submap &&
2988 object == VM_OBJECT_NULL &&
2989 size > chunk_size &&
2990 max_protection != VM_PROT_NONE &&
2991 superpage_size == 0) {
2992 tmp_end = tmp_start + chunk_size;
2993 } else {
2994 tmp_end = tmp2_end;
2995 }
2996 do {
2997 if (!is_submap &&
2998 object != VM_OBJECT_NULL &&
2999 object->internal &&
3000 offset + (tmp_end - tmp_start) > object->vo_size) {
3001 // printf("FBDP object %p size 0x%llx overmapping offset 0x%llx size 0x%llx\n", object, object->vo_size, offset, (uint64_t)(tmp_end - tmp_start));
3002 DTRACE_VM5(vm_map_enter_overmap,
3003 vm_map_t, map,
3004 vm_map_address_t, tmp_start,
3005 vm_map_address_t, tmp_end,
3006 vm_object_offset_t, offset,
3007 vm_object_size_t, object->vo_size);
3008 }
3009 new_entry = vm_map_entry_insert(map,
3010 entry, tmp_start, tmp_end,
3011 object, offset, vmk_flags,
3012 needs_copy,
3013 cur_protection, max_protection,
3014 (entry_for_jit && !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map) ?
3015 VM_INHERIT_NONE : inheritance),
3016 no_cache,
3017 permanent,
3018 superpage_size,
3019 clear_map_aligned,
3020 alias);
3021
3022 assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
3023
3024 if (resilient_codesign) {
3025 int reject_prot = (needs_copy ? VM_PROT_ALLEXEC : (VM_PROT_WRITE | VM_PROT_ALLEXEC));
3026 if (!((cur_protection | max_protection) & reject_prot)) {
3027 new_entry->vme_resilient_codesign = TRUE;
3028 }
3029 }
3030
3031 if (resilient_media &&
3032 (object == VM_OBJECT_NULL ||
3033 object->internal)) {
3034 new_entry->vme_resilient_media = TRUE;
3035 }
3036
3037 assert(!new_entry->iokit_acct);
3038 if (!is_submap &&
3039 object != VM_OBJECT_NULL &&
3040 (object->purgable != VM_PURGABLE_DENY ||
3041 object->vo_ledger_tag)) {
3042 assert(new_entry->use_pmap);
3043 assert(!new_entry->iokit_acct);
3044 /*
3045 * Turn off pmap accounting since
3046 * purgeable (or tagged) objects have their
3047 * own ledgers.
3048 */
3049 new_entry->use_pmap = FALSE;
3050 } else if (!is_submap &&
3051 iokit_acct &&
3052 object != VM_OBJECT_NULL &&
3053 object->internal) {
3054 /* alternate accounting */
3055 assert(!new_entry->iokit_acct);
3056 assert(new_entry->use_pmap);
3057 new_entry->iokit_acct = TRUE;
3058 new_entry->use_pmap = FALSE;
3059 DTRACE_VM4(
3060 vm_map_iokit_mapped_region,
3061 vm_map_t, map,
3062 vm_map_offset_t, new_entry->vme_start,
3063 vm_map_offset_t, new_entry->vme_end,
3064 int, VME_ALIAS(new_entry));
3065 vm_map_iokit_mapped_region(
3066 map,
3067 (new_entry->vme_end -
3068 new_entry->vme_start));
3069 } else if (!is_submap) {
3070 assert(!new_entry->iokit_acct);
3071 assert(new_entry->use_pmap);
3072 }
3073
3074 if (is_submap) {
3075 vm_map_t submap;
3076 boolean_t submap_is_64bit;
3077 boolean_t use_pmap;
3078
3079 assert(new_entry->is_sub_map);
3080 assert(!new_entry->use_pmap);
3081 assert(!new_entry->iokit_acct);
3082 submap = (vm_map_t) object;
3083 submap_is_64bit = vm_map_is_64bit(submap);
3084 use_pmap = vmk_flags.vmkf_nested_pmap;
3085 #ifndef NO_NESTED_PMAP
3086 if (use_pmap && submap->pmap == NULL) {
3087 ledger_t ledger = map->pmap->ledger;
3088 /* we need a sub pmap to nest... */
3089 submap->pmap = pmap_create_options(ledger, 0,
3090 submap_is_64bit ? PMAP_CREATE_64BIT : 0);
3091 if (submap->pmap == NULL) {
3092 /* let's proceed without nesting... */
3093 }
3094 #if defined(__arm64__)
3095 else {
3096 pmap_set_nested(submap->pmap);
3097 }
3098 #endif
3099 }
3100 if (use_pmap && submap->pmap != NULL) {
3101 if (VM_MAP_PAGE_SHIFT(map) != VM_MAP_PAGE_SHIFT(submap)) {
3102 DEBUG4K_ERROR("map %p (%d) submap %p (%d): incompatible page sizes\n", map, VM_MAP_PAGE_SHIFT(map), submap, VM_MAP_PAGE_SHIFT(submap));
3103 kr = KERN_FAILURE;
3104 } else {
3105 kr = pmap_nest(map->pmap,
3106 submap->pmap,
3107 tmp_start,
3108 tmp_end - tmp_start);
3109 }
3110 if (kr != KERN_SUCCESS) {
3111 printf("vm_map_enter: "
3112 "pmap_nest(0x%llx,0x%llx) "
3113 "error 0x%x\n",
3114 (long long)tmp_start,
3115 (long long)tmp_end,
3116 kr);
3117 } else {
3118 /* we're now nested ! */
3119 new_entry->use_pmap = TRUE;
3120 pmap_empty = FALSE;
3121 }
3122 }
3123 #endif /* NO_NESTED_PMAP */
3124 }
3125 entry = new_entry;
3126
3127 if (superpage_size) {
3128 vm_page_t pages, m;
3129 vm_object_t sp_object;
3130 vm_object_offset_t sp_offset;
3131
3132 VME_OFFSET_SET(entry, 0);
3133
3134 /* allocate one superpage */
3135 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES - 1, TRUE, 0);
3136 if (kr != KERN_SUCCESS) {
3137 /* deallocate whole range... */
3138 new_mapping_established = TRUE;
3139 /* ... but only up to "tmp_end" */
3140 size -= end - tmp_end;
3141 RETURN(kr);
3142 }
3143
3144 /* create one vm_object per superpage */
3145 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
3146 sp_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
3147 sp_object->phys_contiguous = TRUE;
3148 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages) * PAGE_SIZE;
3149 VME_OBJECT_SET(entry, sp_object, false, 0);
3150 assert(entry->use_pmap);
3151
3152 /* enter the base pages into the object */
3153 vm_object_lock(sp_object);
3154 for (sp_offset = 0;
3155 sp_offset < SUPERPAGE_SIZE;
3156 sp_offset += PAGE_SIZE) {
3157 m = pages;
3158 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
3159 pages = NEXT_PAGE(m);
3160 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3161 vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
3162 }
3163 vm_object_unlock(sp_object);
3164 }
3165 } while (tmp_end != tmp2_end &&
3166 (tmp_start = tmp_end) &&
3167 (tmp_end = (tmp2_end - tmp_end > chunk_size) ?
3168 tmp_end + chunk_size : tmp2_end));
3169 }
3170
3171 new_mapping_established = TRUE;
3172
3173 BailOut:
3174 assert(map_locked == TRUE);
3175
3176 /*
3177 * Address space limit enforcement (RLIMIT_AS and RLIMIT_DATA):
3178 * If we have identified and possibly established the new mapping(s),
3179 * make sure we did not go beyond the address space limit.
3180 */
3181 if (result == KERN_SUCCESS) {
3182 if (map->size_limit != RLIM_INFINITY &&
3183 map->size > map->size_limit) {
3184 /*
3185 * Establishing the requested mappings would exceed
3186 * the process's RLIMIT_AS limit: fail with
3187 * KERN_NO_SPACE.
3188 */
3189 result = KERN_NO_SPACE;
3190 printf("%d[%s] %s: map size 0x%llx over RLIMIT_AS 0x%llx\n",
3191 proc_selfpid(),
3192 (get_bsdtask_info(current_task())
3193 ? proc_name_address(get_bsdtask_info(current_task()))
3194 : "?"),
3195 __FUNCTION__,
3196 (uint64_t) map->size,
3197 (uint64_t) map->size_limit);
3198 DTRACE_VM2(vm_map_enter_RLIMIT_AS,
3199 vm_map_size_t, map->size,
3200 uint64_t, map->size_limit);
3201 vm_map_enter_RLIMIT_AS_count++;
3202 } else if (map->data_limit != RLIM_INFINITY &&
3203 map->size > map->data_limit) {
3204 /*
3205 * Establishing the requested mappings would exceed
3206 * the process's RLIMIT_DATA limit: fail with
3207 * KERN_NO_SPACE.
3208 */
3209 result = KERN_NO_SPACE;
3210 printf("%d[%s] %s: map size 0x%llx over RLIMIT_DATA 0x%llx\n",
3211 proc_selfpid(),
3212 (get_bsdtask_info(current_task())
3213 ? proc_name_address(get_bsdtask_info(current_task()))
3214 : "?"),
3215 __FUNCTION__,
3216 (uint64_t) map->size,
3217 (uint64_t) map->data_limit);
3218 DTRACE_VM2(vm_map_enter_RLIMIT_DATA,
3219 vm_map_size_t, map->size,
3220 uint64_t, map->data_limit);
3221 vm_map_enter_RLIMIT_DATA_count++;
3222 }
3223 }
3224
3225 if (result == KERN_SUCCESS) {
3226 vm_prot_t pager_prot;
3227 memory_object_t pager;
3228
3229 #if DEBUG
3230 if (pmap_empty &&
3231 !(vmk_flags.vmkf_no_pmap_check)) {
3232 assert(pmap_is_empty(map->pmap,
3233 *address,
3234 *address + size));
3235 }
3236 #endif /* DEBUG */
3237
3238 /*
3239 * For "named" VM objects, let the pager know that the
3240 * memory object is being mapped. Some pagers need to keep
3241 * track of this, to know when they can reclaim the memory
3242 * object, for example.
3243 * VM calls memory_object_map() for each mapping (specifying
3244 * the protection of each mapping) and calls
3245 * memory_object_last_unmap() when all the mappings are gone.
3246 */
3247 pager_prot = max_protection;
3248 if (needs_copy) {
3249 /*
3250 * Copy-On-Write mapping: won't modify
3251 * the memory object.
3252 */
3253 pager_prot &= ~VM_PROT_WRITE;
3254 }
3255 if (!is_submap &&
3256 object != VM_OBJECT_NULL &&
3257 object->named &&
3258 object->pager != MEMORY_OBJECT_NULL) {
3259 vm_object_lock(object);
3260 pager = object->pager;
3261 if (object->named &&
3262 pager != MEMORY_OBJECT_NULL) {
3263 assert(object->pager_ready);
3264 vm_object_mapping_wait(object, THREAD_UNINT);
3265 vm_object_mapping_begin(object);
3266 vm_object_unlock(object);
3267
3268 kr = memory_object_map(pager, pager_prot);
3269 assert(kr == KERN_SUCCESS);
3270
3271 vm_object_lock(object);
3272 vm_object_mapping_end(object);
3273 }
3274 vm_object_unlock(object);
3275 }
3276 }
3277
3278 assert(map_locked == TRUE);
3279
3280 if (new_mapping_established) {
3281 /*
3282 * If we release the map lock for any reason below,
3283 * another thread could deallocate our new mapping,
3284 * releasing the caller's reference on "caller_object",
3285 * which was transferred to the mapping.
3286 * If this was the only reference, the object could be
3287 * destroyed.
3288 *
3289 * We need to take an extra reference on "caller_object"
3290 * to keep it alive if we need to return the caller's
3291 * reference to the caller in case of failure.
3292 */
3293 if (is_submap) {
3294 vm_map_reference((vm_map_t)caller_object);
3295 } else {
3296 vm_object_reference(caller_object);
3297 }
3298 }
3299
3300 if (!keep_map_locked) {
3301 vm_map_unlock(map);
3302 map_locked = FALSE;
3303 entry = VM_MAP_ENTRY_NULL;
3304 new_entry = VM_MAP_ENTRY_NULL;
3305 }
3306
3307 /*
3308 * We can't hold the map lock if we enter this block.
3309 */
3310
3311 if (result == KERN_SUCCESS) {
3312 /* Wire down the new entry if the user
3313 * requested all new map entries be wired.
3314 */
3315 if ((map->wiring_required) || (superpage_size)) {
3316 assert(!keep_map_locked);
3317 pmap_empty = FALSE; /* pmap won't be empty */
3318 kr = vm_map_wire_kernel(map, start, end,
3319 cur_protection, VM_KERN_MEMORY_MLOCK,
3320 TRUE);
3321 result = kr;
3322 }
3323
3324 }
3325
3326 if (result != KERN_SUCCESS) {
3327 if (new_mapping_established) {
3328 vmr_flags_t remove_flags = VM_MAP_REMOVE_NO_FLAGS;
3329
3330 /*
3331 * We have to get rid of the new mappings since we
3332 * won't make them available to the user.
3333 * Try and do that atomically, to minimize the risk
3334 * that someone else create new mappings that range.
3335 */
3336 if (!map_locked) {
3337 vm_map_lock(map);
3338 map_locked = TRUE;
3339 }
3340 remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
3341 remove_flags |= VM_MAP_REMOVE_NO_YIELD;
3342 if (permanent) {
3343 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
3344 }
3345 (void) vm_map_delete(map,
3346 *address, *address + size,
3347 remove_flags,
3348 KMEM_GUARD_NONE, &zap_new_list);
3349 }
3350
3351 if (vm_map_zap_first_entry(&zap_old_list)) {
3352 vm_map_entry_t entry1, entry2;
3353
3354 /*
3355 * The new mapping failed. Attempt to restore
3356 * the old mappings, saved in the "zap_old_map".
3357 */
3358 if (!map_locked) {
3359 vm_map_lock(map);
3360 map_locked = TRUE;
3361 }
3362
3363 /* first check if the coast is still clear */
3364 start = vm_map_zap_first_entry(&zap_old_list)->vme_start;
3365 end = vm_map_zap_last_entry(&zap_old_list)->vme_end;
3366
3367 if (vm_map_lookup_entry(map, start, &entry1) ||
3368 vm_map_lookup_entry(map, end, &entry2) ||
3369 entry1 != entry2) {
3370 /*
3371 * Part of that range has already been
3372 * re-mapped: we can't restore the old
3373 * mappings...
3374 */
3375 vm_map_enter_restore_failures++;
3376 } else {
3377 /*
3378 * Transfer the saved map entries from
3379 * "zap_old_map" to the original "map",
3380 * inserting them all after "entry1".
3381 */
3382 while ((entry2 = vm_map_zap_pop(&zap_old_list))) {
3383 vm_map_size_t entry_size;
3384
3385 entry_size = (entry2->vme_end -
3386 entry2->vme_start);
3387 vm_map_store_entry_link(map, entry1, entry2,
3388 VM_MAP_KERNEL_FLAGS_NONE);
3389 map->size += entry_size;
3390 entry1 = entry2;
3391 }
3392 if (map->wiring_required) {
3393 /*
3394 * XXX TODO: we should rewire the
3395 * old pages here...
3396 */
3397 }
3398 vm_map_enter_restore_successes++;
3399 }
3400 }
3401 }
3402
3403 /*
3404 * The caller is responsible for releasing the lock if it requested to
3405 * keep the map locked.
3406 */
3407 if (map_locked && !keep_map_locked) {
3408 vm_map_unlock(map);
3409 }
3410
3411 vm_map_zap_dispose(&zap_old_list);
3412 vm_map_zap_dispose(&zap_new_list);
3413
3414 if (new_mapping_established) {
3415 /*
3416 * The caller had a reference on "caller_object" and we
3417 * transferred that reference to the mapping.
3418 * We also took an extra reference on "caller_object" to keep
3419 * it alive while the map was unlocked.
3420 */
3421 if (result == KERN_SUCCESS) {
3422 /*
3423 * On success, the caller's reference on the object gets
3424 * tranferred to the mapping.
3425 * Release our extra reference.
3426 */
3427 if (is_submap) {
3428 vm_map_deallocate((vm_map_t)caller_object);
3429 } else {
3430 vm_object_deallocate(caller_object);
3431 }
3432 } else {
3433 /*
3434 * On error, the caller expects to still have a
3435 * reference on the object it gave us.
3436 * Let's use our extra reference for that.
3437 */
3438 }
3439 }
3440
3441 return result;
3442
3443 #undef RETURN
3444 }
3445
3446 #if __arm64__
3447 extern const struct memory_object_pager_ops fourk_pager_ops;
3448 kern_return_t
vm_map_enter_fourk(vm_map_t map,vm_map_offset_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t alias,vm_object_t object,vm_object_offset_t offset,boolean_t needs_copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)3449 vm_map_enter_fourk(
3450 vm_map_t map,
3451 vm_map_offset_t *address, /* IN/OUT */
3452 vm_map_size_t size,
3453 vm_map_offset_t mask,
3454 int flags,
3455 vm_map_kernel_flags_t vmk_flags,
3456 vm_tag_t alias,
3457 vm_object_t object,
3458 vm_object_offset_t offset,
3459 boolean_t needs_copy,
3460 vm_prot_t cur_protection,
3461 vm_prot_t max_protection,
3462 vm_inherit_t inheritance)
3463 {
3464 vm_map_entry_t entry, new_entry;
3465 vm_map_offset_t start, fourk_start;
3466 vm_map_offset_t end, fourk_end;
3467 vm_map_size_t fourk_size;
3468 kern_return_t result = KERN_SUCCESS;
3469 boolean_t map_locked = FALSE;
3470 boolean_t pmap_empty = TRUE;
3471 boolean_t new_mapping_established = FALSE;
3472 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3473 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
3474 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
3475 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
3476 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
3477 const boolean_t is_submap = vmk_flags.vmkf_submap;
3478 boolean_t permanent = (((flags & VM_FLAGS_PERMANENT) != 0) || vmk_flags.vmkf_permanent);
3479 const boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
3480 // boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
3481 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3482 vm_map_offset_t effective_min_offset, effective_max_offset;
3483 kern_return_t kr;
3484 boolean_t clear_map_aligned = FALSE;
3485 memory_object_t fourk_mem_obj;
3486 vm_object_t fourk_object;
3487 vm_map_offset_t fourk_pager_offset;
3488 int fourk_pager_index_start, fourk_pager_index_num;
3489 int cur_idx;
3490 boolean_t fourk_copy;
3491 vm_object_t copy_object;
3492 vm_object_offset_t copy_offset;
3493 VM_MAP_ZAP_DECLARE(zap_list);
3494
3495 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
3496 panic("%s:%d", __FUNCTION__, __LINE__);
3497 }
3498 fourk_mem_obj = MEMORY_OBJECT_NULL;
3499 fourk_object = VM_OBJECT_NULL;
3500
3501 if (superpage_size) {
3502 return KERN_NOT_SUPPORTED;
3503 }
3504
3505 if ((cur_protection & VM_PROT_WRITE) &&
3506 (cur_protection & VM_PROT_EXECUTE) &&
3507 #if XNU_TARGET_OS_OSX
3508 map->pmap != kernel_pmap &&
3509 (vm_map_cs_enforcement(map)
3510 #if __arm64__
3511 || !VM_MAP_IS_EXOTIC(map)
3512 #endif /* __arm64__ */
3513 ) &&
3514 #endif /* XNU_TARGET_OS_OSX */
3515 !entry_for_jit) {
3516 DTRACE_VM3(cs_wx,
3517 uint64_t, 0,
3518 uint64_t, 0,
3519 vm_prot_t, cur_protection);
3520 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
3521 "turning off execute\n",
3522 proc_selfpid(),
3523 (get_bsdtask_info(current_task())
3524 ? proc_name_address(get_bsdtask_info(current_task()))
3525 : "?"),
3526 __FUNCTION__);
3527 cur_protection &= ~VM_PROT_EXECUTE;
3528 }
3529
3530 /*
3531 * If the task has requested executable lockdown,
3532 * deny any new executable mapping.
3533 */
3534 if (map->map_disallow_new_exec == TRUE) {
3535 if (cur_protection & VM_PROT_EXECUTE) {
3536 return KERN_PROTECTION_FAILURE;
3537 }
3538 }
3539
3540 if (is_submap) {
3541 return KERN_NOT_SUPPORTED;
3542 }
3543 if (vmk_flags.vmkf_already) {
3544 return KERN_NOT_SUPPORTED;
3545 }
3546 if (purgable || entry_for_jit) {
3547 return KERN_NOT_SUPPORTED;
3548 }
3549
3550 effective_min_offset = map->min_offset;
3551
3552 if (vmk_flags.vmkf_beyond_max) {
3553 return KERN_NOT_SUPPORTED;
3554 } else {
3555 effective_max_offset = map->max_offset;
3556 }
3557
3558 if (size == 0 ||
3559 (offset & FOURK_PAGE_MASK) != 0) {
3560 *address = 0;
3561 return KERN_INVALID_ARGUMENT;
3562 }
3563
3564 #define RETURN(value) { result = value; goto BailOut; }
3565
3566 assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3567 assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3568
3569 if (!anywhere && overwrite) {
3570 return KERN_NOT_SUPPORTED;
3571 }
3572
3573 fourk_start = *address;
3574 fourk_size = size;
3575 fourk_end = fourk_start + fourk_size;
3576
3577 start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3578 end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3579 size = end - start;
3580
3581 if (anywhere) {
3582 return KERN_NOT_SUPPORTED;
3583 } else {
3584 /*
3585 * Verify that:
3586 * the address doesn't itself violate
3587 * the mask requirement.
3588 */
3589
3590 vm_map_lock(map);
3591 map_locked = TRUE;
3592 if ((start & mask) != 0) {
3593 RETURN(KERN_NO_SPACE);
3594 }
3595
3596 /*
3597 * ... the address is within bounds
3598 */
3599
3600 end = start + size;
3601
3602 if ((start < effective_min_offset) ||
3603 (end > effective_max_offset) ||
3604 (start >= end)) {
3605 RETURN(KERN_INVALID_ADDRESS);
3606 }
3607
3608 /*
3609 * ... the starting address isn't allocated
3610 */
3611 if (vm_map_lookup_entry(map, start, &entry)) {
3612 vm_object_t cur_object, shadow_object;
3613
3614 /*
3615 * We might already some 4K mappings
3616 * in a 16K page here.
3617 */
3618
3619 if (entry->vme_end - entry->vme_start
3620 != SIXTEENK_PAGE_SIZE) {
3621 RETURN(KERN_NO_SPACE);
3622 }
3623 if (entry->is_sub_map) {
3624 RETURN(KERN_NO_SPACE);
3625 }
3626 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3627 RETURN(KERN_NO_SPACE);
3628 }
3629
3630 /* go all the way down the shadow chain */
3631 cur_object = VME_OBJECT(entry);
3632 vm_object_lock(cur_object);
3633 while (cur_object->shadow != VM_OBJECT_NULL) {
3634 shadow_object = cur_object->shadow;
3635 vm_object_lock(shadow_object);
3636 vm_object_unlock(cur_object);
3637 cur_object = shadow_object;
3638 shadow_object = VM_OBJECT_NULL;
3639 }
3640 if (cur_object->internal ||
3641 cur_object->pager == NULL) {
3642 vm_object_unlock(cur_object);
3643 RETURN(KERN_NO_SPACE);
3644 }
3645 if (cur_object->pager->mo_pager_ops
3646 != &fourk_pager_ops) {
3647 vm_object_unlock(cur_object);
3648 RETURN(KERN_NO_SPACE);
3649 }
3650 fourk_object = cur_object;
3651 fourk_mem_obj = fourk_object->pager;
3652
3653 /* keep the "4K" object alive */
3654 vm_object_reference_locked(fourk_object);
3655 memory_object_reference(fourk_mem_obj);
3656 vm_object_unlock(fourk_object);
3657
3658 /* merge permissions */
3659 entry->protection |= cur_protection;
3660 entry->max_protection |= max_protection;
3661
3662 if ((entry->protection & VM_PROT_WRITE) &&
3663 (entry->protection & VM_PROT_ALLEXEC) &&
3664 fourk_binary_compatibility_unsafe &&
3665 fourk_binary_compatibility_allow_wx) {
3666 /* write+execute: need to be "jit" */
3667 entry->used_for_jit = TRUE;
3668 }
3669 goto map_in_fourk_pager;
3670 }
3671
3672 /*
3673 * ... the next region doesn't overlap the
3674 * end point.
3675 */
3676
3677 if ((entry->vme_next != vm_map_to_entry(map)) &&
3678 (entry->vme_next->vme_start < end)) {
3679 RETURN(KERN_NO_SPACE);
3680 }
3681 }
3682
3683 /*
3684 * At this point,
3685 * "start" and "end" should define the endpoints of the
3686 * available new range, and
3687 * "entry" should refer to the region before the new
3688 * range, and
3689 *
3690 * the map should be locked.
3691 */
3692
3693 /* create a new "4K" pager */
3694 fourk_mem_obj = fourk_pager_create();
3695 fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3696 assert(fourk_object);
3697
3698 /* keep the "4" object alive */
3699 vm_object_reference(fourk_object);
3700
3701 /* create a "copy" object, to map the "4K" object copy-on-write */
3702 fourk_copy = TRUE;
3703 result = vm_object_copy_strategically(fourk_object,
3704 0,
3705 end - start,
3706 ©_object,
3707 ©_offset,
3708 &fourk_copy);
3709 assert(result == KERN_SUCCESS);
3710 assert(copy_object != VM_OBJECT_NULL);
3711 assert(copy_offset == 0);
3712
3713 /* map the "4K" pager's copy object */
3714 new_entry = vm_map_entry_insert(map,
3715 entry,
3716 vm_map_trunc_page(start, VM_MAP_PAGE_MASK(map)),
3717 vm_map_round_page(end, VM_MAP_PAGE_MASK(map)),
3718 copy_object,
3719 0, /* offset */
3720 vmk_flags,
3721 FALSE, /* needs_copy */
3722 cur_protection, max_protection,
3723 (entry_for_jit && !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map) ?
3724 VM_INHERIT_NONE : inheritance),
3725 no_cache,
3726 permanent,
3727 superpage_size,
3728 clear_map_aligned,
3729 alias);
3730 entry = new_entry;
3731
3732 #if VM_MAP_DEBUG_FOURK
3733 if (vm_map_debug_fourk) {
3734 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3735 map,
3736 (uint64_t) entry->vme_start,
3737 (uint64_t) entry->vme_end,
3738 fourk_mem_obj);
3739 }
3740 #endif /* VM_MAP_DEBUG_FOURK */
3741
3742 new_mapping_established = TRUE;
3743
3744 map_in_fourk_pager:
3745 /* "map" the original "object" where it belongs in the "4K" pager */
3746 fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3747 fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3748 if (fourk_size > SIXTEENK_PAGE_SIZE) {
3749 fourk_pager_index_num = 4;
3750 } else {
3751 fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3752 }
3753 if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3754 fourk_pager_index_num = 4 - fourk_pager_index_start;
3755 }
3756 for (cur_idx = 0;
3757 cur_idx < fourk_pager_index_num;
3758 cur_idx++) {
3759 vm_object_t old_object;
3760 vm_object_offset_t old_offset;
3761
3762 kr = fourk_pager_populate(fourk_mem_obj,
3763 TRUE, /* overwrite */
3764 fourk_pager_index_start + cur_idx,
3765 object,
3766 (object
3767 ? (offset +
3768 (cur_idx * FOURK_PAGE_SIZE))
3769 : 0),
3770 &old_object,
3771 &old_offset);
3772 #if VM_MAP_DEBUG_FOURK
3773 if (vm_map_debug_fourk) {
3774 if (old_object == (vm_object_t) -1 &&
3775 old_offset == (vm_object_offset_t) -1) {
3776 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3777 "pager [%p:0x%llx] "
3778 "populate[%d] "
3779 "[object:%p,offset:0x%llx]\n",
3780 map,
3781 (uint64_t) entry->vme_start,
3782 (uint64_t) entry->vme_end,
3783 fourk_mem_obj,
3784 VME_OFFSET(entry),
3785 fourk_pager_index_start + cur_idx,
3786 object,
3787 (object
3788 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3789 : 0));
3790 } else {
3791 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3792 "pager [%p:0x%llx] "
3793 "populate[%d] [object:%p,offset:0x%llx] "
3794 "old [%p:0x%llx]\n",
3795 map,
3796 (uint64_t) entry->vme_start,
3797 (uint64_t) entry->vme_end,
3798 fourk_mem_obj,
3799 VME_OFFSET(entry),
3800 fourk_pager_index_start + cur_idx,
3801 object,
3802 (object
3803 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3804 : 0),
3805 old_object,
3806 old_offset);
3807 }
3808 }
3809 #endif /* VM_MAP_DEBUG_FOURK */
3810
3811 assert(kr == KERN_SUCCESS);
3812 if (object != old_object &&
3813 object != VM_OBJECT_NULL &&
3814 object != (vm_object_t) -1) {
3815 vm_object_reference(object);
3816 }
3817 if (object != old_object &&
3818 old_object != VM_OBJECT_NULL &&
3819 old_object != (vm_object_t) -1) {
3820 vm_object_deallocate(old_object);
3821 }
3822 }
3823
3824 BailOut:
3825 assert(map_locked == TRUE);
3826
3827 if (result == KERN_SUCCESS) {
3828 vm_prot_t pager_prot;
3829 memory_object_t pager;
3830
3831 #if DEBUG
3832 if (pmap_empty &&
3833 !(vmk_flags.vmkf_no_pmap_check)) {
3834 assert(pmap_is_empty(map->pmap,
3835 *address,
3836 *address + size));
3837 }
3838 #endif /* DEBUG */
3839
3840 /*
3841 * For "named" VM objects, let the pager know that the
3842 * memory object is being mapped. Some pagers need to keep
3843 * track of this, to know when they can reclaim the memory
3844 * object, for example.
3845 * VM calls memory_object_map() for each mapping (specifying
3846 * the protection of each mapping) and calls
3847 * memory_object_last_unmap() when all the mappings are gone.
3848 */
3849 pager_prot = max_protection;
3850 if (needs_copy) {
3851 /*
3852 * Copy-On-Write mapping: won't modify
3853 * the memory object.
3854 */
3855 pager_prot &= ~VM_PROT_WRITE;
3856 }
3857 if (!is_submap &&
3858 object != VM_OBJECT_NULL &&
3859 object->named &&
3860 object->pager != MEMORY_OBJECT_NULL) {
3861 vm_object_lock(object);
3862 pager = object->pager;
3863 if (object->named &&
3864 pager != MEMORY_OBJECT_NULL) {
3865 assert(object->pager_ready);
3866 vm_object_mapping_wait(object, THREAD_UNINT);
3867 vm_object_mapping_begin(object);
3868 vm_object_unlock(object);
3869
3870 kr = memory_object_map(pager, pager_prot);
3871 assert(kr == KERN_SUCCESS);
3872
3873 vm_object_lock(object);
3874 vm_object_mapping_end(object);
3875 }
3876 vm_object_unlock(object);
3877 }
3878 if (!is_submap &&
3879 fourk_object != VM_OBJECT_NULL &&
3880 fourk_object->named &&
3881 fourk_object->pager != MEMORY_OBJECT_NULL) {
3882 vm_object_lock(fourk_object);
3883 pager = fourk_object->pager;
3884 if (fourk_object->named &&
3885 pager != MEMORY_OBJECT_NULL) {
3886 assert(fourk_object->pager_ready);
3887 vm_object_mapping_wait(fourk_object,
3888 THREAD_UNINT);
3889 vm_object_mapping_begin(fourk_object);
3890 vm_object_unlock(fourk_object);
3891
3892 kr = memory_object_map(pager, VM_PROT_READ);
3893 assert(kr == KERN_SUCCESS);
3894
3895 vm_object_lock(fourk_object);
3896 vm_object_mapping_end(fourk_object);
3897 }
3898 vm_object_unlock(fourk_object);
3899 }
3900 }
3901
3902 if (fourk_object != VM_OBJECT_NULL) {
3903 vm_object_deallocate(fourk_object);
3904 fourk_object = VM_OBJECT_NULL;
3905 memory_object_deallocate(fourk_mem_obj);
3906 fourk_mem_obj = MEMORY_OBJECT_NULL;
3907 }
3908
3909 assert(map_locked == TRUE);
3910
3911 if (!keep_map_locked) {
3912 vm_map_unlock(map);
3913 map_locked = FALSE;
3914 }
3915
3916 /*
3917 * We can't hold the map lock if we enter this block.
3918 */
3919
3920 if (result == KERN_SUCCESS) {
3921 /* Wire down the new entry if the user
3922 * requested all new map entries be wired.
3923 */
3924 if ((map->wiring_required) || (superpage_size)) {
3925 assert(!keep_map_locked);
3926 pmap_empty = FALSE; /* pmap won't be empty */
3927 kr = vm_map_wire_kernel(map, start, end,
3928 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3929 TRUE);
3930 result = kr;
3931 }
3932
3933 }
3934
3935 if (result != KERN_SUCCESS) {
3936 if (new_mapping_established) {
3937 /*
3938 * We have to get rid of the new mappings since we
3939 * won't make them available to the user.
3940 * Try and do that atomically, to minimize the risk
3941 * that someone else create new mappings that range.
3942 */
3943
3944 if (!map_locked) {
3945 vm_map_lock(map);
3946 map_locked = TRUE;
3947 }
3948 (void)vm_map_delete(map, *address, *address + size,
3949 VM_MAP_REMOVE_NO_MAP_ALIGN | VM_MAP_REMOVE_NO_YIELD,
3950 KMEM_GUARD_NONE, &zap_list);
3951 }
3952 }
3953
3954 /*
3955 * The caller is responsible for releasing the lock if it requested to
3956 * keep the map locked.
3957 */
3958 if (map_locked && !keep_map_locked) {
3959 vm_map_unlock(map);
3960 }
3961
3962 vm_map_zap_dispose(&zap_list);
3963
3964 return result;
3965
3966 #undef RETURN
3967 }
3968 #endif /* __arm64__ */
3969
3970 /*
3971 * Counters for the prefault optimization.
3972 */
3973 int64_t vm_prefault_nb_pages = 0;
3974 int64_t vm_prefault_nb_bailout = 0;
3975
3976 static kern_return_t
vm_map_enter_mem_object_helper(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,ipc_port_t port,vm_object_offset_t offset,boolean_t copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance,upl_page_list_ptr_t page_list,unsigned int page_list_count)3977 vm_map_enter_mem_object_helper(
3978 vm_map_t target_map,
3979 vm_map_offset_t *address,
3980 vm_map_size_t initial_size,
3981 vm_map_offset_t mask,
3982 int flags,
3983 vm_map_kernel_flags_t vmk_flags,
3984 vm_tag_t tag,
3985 ipc_port_t port,
3986 vm_object_offset_t offset,
3987 boolean_t copy,
3988 vm_prot_t cur_protection,
3989 vm_prot_t max_protection,
3990 vm_inherit_t inheritance,
3991 upl_page_list_ptr_t page_list,
3992 unsigned int page_list_count)
3993 {
3994 vm_map_address_t map_addr;
3995 vm_map_size_t map_size;
3996 vm_object_t object;
3997 vm_object_size_t size;
3998 kern_return_t result;
3999 boolean_t mask_cur_protection, mask_max_protection;
4000 boolean_t kernel_prefault, try_prefault = (page_list_count != 0);
4001 vm_map_offset_t offset_in_mapping = 0;
4002 #if __arm64__
4003 boolean_t fourk = vmk_flags.vmkf_fourk;
4004 #endif /* __arm64__ */
4005
4006 if (VM_MAP_PAGE_SHIFT(target_map) < PAGE_SHIFT) {
4007 /* XXX TODO4K prefaulting depends on page size... */
4008 try_prefault = FALSE;
4009 }
4010
4011 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
4012
4013 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
4014 mask_max_protection = max_protection & VM_PROT_IS_MASK;
4015 cur_protection &= ~VM_PROT_IS_MASK;
4016 max_protection &= ~VM_PROT_IS_MASK;
4017
4018 /*
4019 * Check arguments for validity
4020 */
4021 if ((target_map == VM_MAP_NULL) ||
4022 (cur_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4023 (max_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4024 (inheritance > VM_INHERIT_LAST_VALID) ||
4025 (try_prefault && (copy || !page_list)) ||
4026 initial_size == 0) {
4027 return KERN_INVALID_ARGUMENT;
4028 }
4029
4030 #if __arm64__
4031 if (cur_protection & VM_PROT_EXECUTE) {
4032 cur_protection |= VM_PROT_READ;
4033 }
4034
4035 if (fourk && VM_MAP_PAGE_SHIFT(target_map) < PAGE_SHIFT) {
4036 /* no "fourk" if map is using a sub-page page size */
4037 fourk = FALSE;
4038 }
4039 if (fourk) {
4040 map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
4041 map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
4042 } else
4043 #endif /* __arm64__ */
4044 {
4045 map_addr = vm_map_trunc_page(*address,
4046 VM_MAP_PAGE_MASK(target_map));
4047 map_size = vm_map_round_page(initial_size,
4048 VM_MAP_PAGE_MASK(target_map));
4049 }
4050 size = vm_object_round_page(initial_size);
4051
4052 /*
4053 * Find the vm object (if any) corresponding to this port.
4054 */
4055 if (!IP_VALID(port)) {
4056 object = VM_OBJECT_NULL;
4057 offset = 0;
4058 copy = FALSE;
4059 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
4060 vm_named_entry_t named_entry;
4061 vm_object_offset_t data_offset;
4062
4063 named_entry = mach_memory_entry_from_port(port);
4064
4065 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4066 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4067 data_offset = named_entry->data_offset;
4068 offset += named_entry->data_offset;
4069 } else {
4070 data_offset = 0;
4071 }
4072
4073 /* a few checks to make sure user is obeying rules */
4074 if (size == 0) {
4075 if (offset >= named_entry->size) {
4076 return KERN_INVALID_RIGHT;
4077 }
4078 size = named_entry->size - offset;
4079 }
4080 if (mask_max_protection) {
4081 max_protection &= named_entry->protection;
4082 }
4083 if (mask_cur_protection) {
4084 cur_protection &= named_entry->protection;
4085 }
4086 if ((named_entry->protection & max_protection) !=
4087 max_protection) {
4088 return KERN_INVALID_RIGHT;
4089 }
4090 if ((named_entry->protection & cur_protection) !=
4091 cur_protection) {
4092 return KERN_INVALID_RIGHT;
4093 }
4094 if (offset + size < offset) {
4095 /* overflow */
4096 return KERN_INVALID_ARGUMENT;
4097 }
4098 if (named_entry->size < (offset + initial_size)) {
4099 return KERN_INVALID_ARGUMENT;
4100 }
4101
4102 if (named_entry->is_copy) {
4103 /* for a vm_map_copy, we can only map it whole */
4104 if ((size != named_entry->size) &&
4105 (vm_map_round_page(size,
4106 VM_MAP_PAGE_MASK(target_map)) ==
4107 named_entry->size)) {
4108 /* XXX FBDP use the rounded size... */
4109 size = vm_map_round_page(
4110 size,
4111 VM_MAP_PAGE_MASK(target_map));
4112 }
4113 }
4114
4115 /* the callers parameter offset is defined to be the */
4116 /* offset from beginning of named entry offset in object */
4117 offset = offset + named_entry->offset;
4118
4119 if (!VM_MAP_PAGE_ALIGNED(size,
4120 VM_MAP_PAGE_MASK(target_map))) {
4121 /*
4122 * Let's not map more than requested;
4123 * vm_map_enter() will handle this "not map-aligned"
4124 * case.
4125 */
4126 map_size = size;
4127 }
4128
4129 named_entry_lock(named_entry);
4130 if (named_entry->is_sub_map) {
4131 vm_map_t submap;
4132
4133 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4134 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4135 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
4136 }
4137
4138 submap = named_entry->backing.map;
4139 vm_map_reference(submap);
4140 named_entry_unlock(named_entry);
4141
4142 vmk_flags.vmkf_submap = TRUE;
4143
4144 result = vm_map_enter(target_map,
4145 &map_addr,
4146 map_size,
4147 mask,
4148 flags,
4149 vmk_flags,
4150 tag,
4151 (vm_object_t)(uintptr_t) submap,
4152 offset,
4153 copy,
4154 cur_protection,
4155 max_protection,
4156 inheritance);
4157 if (result != KERN_SUCCESS) {
4158 vm_map_deallocate(submap);
4159 } else {
4160 /*
4161 * No need to lock "submap" just to check its
4162 * "mapped" flag: that flag is never reset
4163 * once it's been set and if we race, we'll
4164 * just end up setting it twice, which is OK.
4165 */
4166 if (submap->mapped_in_other_pmaps == FALSE &&
4167 vm_map_pmap(submap) != PMAP_NULL &&
4168 vm_map_pmap(submap) !=
4169 vm_map_pmap(target_map)) {
4170 /*
4171 * This submap is being mapped in a map
4172 * that uses a different pmap.
4173 * Set its "mapped_in_other_pmaps" flag
4174 * to indicate that we now need to
4175 * remove mappings from all pmaps rather
4176 * than just the submap's pmap.
4177 */
4178 vm_map_lock(submap);
4179 submap->mapped_in_other_pmaps = TRUE;
4180 vm_map_unlock(submap);
4181 }
4182 *address = map_addr;
4183 }
4184 return result;
4185 } else if (named_entry->is_copy) {
4186 kern_return_t kr;
4187 vm_map_copy_t copy_map;
4188 vm_map_entry_t copy_entry;
4189 vm_map_offset_t copy_addr;
4190 vm_map_copy_t target_copy_map;
4191 vm_map_offset_t overmap_start, overmap_end;
4192 vm_map_offset_t trimmed_start;
4193 vm_map_size_t target_size;
4194
4195 if (flags & ~(VM_FLAGS_FIXED |
4196 VM_FLAGS_ANYWHERE |
4197 VM_FLAGS_OVERWRITE |
4198 VM_FLAGS_RETURN_4K_DATA_ADDR |
4199 VM_FLAGS_RETURN_DATA_ADDR |
4200 VM_FLAGS_ALIAS_MASK)) {
4201 named_entry_unlock(named_entry);
4202 return KERN_INVALID_ARGUMENT;
4203 }
4204
4205 copy_map = named_entry->backing.copy;
4206 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
4207 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
4208 /* unsupported type; should not happen */
4209 printf("vm_map_enter_mem_object: "
4210 "memory_entry->backing.copy "
4211 "unsupported type 0x%x\n",
4212 copy_map->type);
4213 named_entry_unlock(named_entry);
4214 return KERN_INVALID_ARGUMENT;
4215 }
4216
4217 if (VM_MAP_PAGE_SHIFT(target_map) != copy_map->cpy_hdr.page_shift) {
4218 DEBUG4K_SHARE("copy_map %p offset %llx size 0x%llx pgshift %d -> target_map %p pgshift %d\n", copy_map, offset, (uint64_t)map_size, copy_map->cpy_hdr.page_shift, target_map, VM_MAP_PAGE_SHIFT(target_map));
4219 }
4220
4221 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4222 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4223 offset_in_mapping = offset & VM_MAP_PAGE_MASK(target_map);
4224 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4225 offset_in_mapping &= ~((signed)(0xFFF));
4226 }
4227 }
4228
4229 target_copy_map = VM_MAP_COPY_NULL;
4230 target_size = copy_map->size;
4231 overmap_start = 0;
4232 overmap_end = 0;
4233 trimmed_start = 0;
4234 if (copy_map->cpy_hdr.page_shift != VM_MAP_PAGE_SHIFT(target_map)) {
4235 DEBUG4K_ADJUST("adjusting...\n");
4236 kr = vm_map_copy_adjust_to_target(
4237 copy_map,
4238 offset /* includes data_offset */,
4239 initial_size,
4240 target_map,
4241 copy,
4242 &target_copy_map,
4243 &overmap_start,
4244 &overmap_end,
4245 &trimmed_start);
4246 if (kr != KERN_SUCCESS) {
4247 named_entry_unlock(named_entry);
4248 return kr;
4249 }
4250 target_size = target_copy_map->size;
4251 if (trimmed_start >= data_offset) {
4252 data_offset = offset & VM_MAP_PAGE_MASK(target_map);
4253 } else {
4254 data_offset -= trimmed_start;
4255 }
4256 } else {
4257 /*
4258 * Assert that the vm_map_copy is coming from the right
4259 * zone and hasn't been forged
4260 */
4261 vm_map_copy_require(copy_map);
4262 target_copy_map = copy_map;
4263 }
4264
4265 /* reserve a contiguous range */
4266 kr = vm_map_enter(target_map,
4267 &map_addr,
4268 vm_map_round_page(target_size, VM_MAP_PAGE_MASK(target_map)),
4269 mask,
4270 flags & (VM_FLAGS_ANYWHERE |
4271 VM_FLAGS_OVERWRITE |
4272 VM_FLAGS_RETURN_4K_DATA_ADDR |
4273 VM_FLAGS_RETURN_DATA_ADDR),
4274 vmk_flags,
4275 tag,
4276 VM_OBJECT_NULL,
4277 0,
4278 FALSE, /* copy */
4279 cur_protection,
4280 max_protection,
4281 inheritance);
4282 if (kr != KERN_SUCCESS) {
4283 DEBUG4K_ERROR("kr 0x%x\n", kr);
4284 if (target_copy_map != copy_map) {
4285 vm_map_copy_discard(target_copy_map);
4286 target_copy_map = VM_MAP_COPY_NULL;
4287 }
4288 named_entry_unlock(named_entry);
4289 return kr;
4290 }
4291
4292 copy_addr = map_addr;
4293
4294 for (copy_entry = vm_map_copy_first_entry(target_copy_map);
4295 copy_entry != vm_map_copy_to_entry(target_copy_map);
4296 copy_entry = copy_entry->vme_next) {
4297 int remap_flags;
4298 vm_map_kernel_flags_t vmk_remap_flags;
4299 vm_map_t copy_submap = VM_MAP_NULL;
4300 vm_object_t copy_object = VM_OBJECT_NULL;
4301 vm_map_size_t copy_size;
4302 vm_object_offset_t copy_offset;
4303 int copy_vm_alias;
4304 boolean_t do_copy;
4305
4306 do_copy = FALSE;
4307 remap_flags = 0;
4308 vmk_remap_flags = VM_MAP_KERNEL_FLAGS_NONE;
4309
4310 if (copy_entry->is_sub_map) {
4311 copy_submap = VME_SUBMAP(copy_entry);
4312 copy_object = (vm_object_t)copy_submap;
4313 } else {
4314 copy_object = VME_OBJECT(copy_entry);
4315 }
4316 copy_offset = VME_OFFSET(copy_entry);
4317 copy_size = (copy_entry->vme_end -
4318 copy_entry->vme_start);
4319 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
4320 if (copy_vm_alias == 0) {
4321 /*
4322 * Caller does not want a specific
4323 * alias for this new mapping: use
4324 * the alias of the original mapping.
4325 */
4326 copy_vm_alias = VME_ALIAS(copy_entry);
4327 }
4328
4329 /* sanity check */
4330 if ((copy_addr + copy_size) >
4331 (map_addr +
4332 overmap_start + overmap_end +
4333 named_entry->size /* XXX full size */)) {
4334 /* over-mapping too much !? */
4335 kr = KERN_INVALID_ARGUMENT;
4336 DEBUG4K_ERROR("kr 0x%x\n", kr);
4337 /* abort */
4338 break;
4339 }
4340
4341 /* take a reference on the object */
4342 if (copy_entry->is_sub_map) {
4343 vmk_remap_flags.vmkf_submap = TRUE;
4344 vm_map_reference(copy_submap);
4345 } else {
4346 if (!copy &&
4347 copy_object != VM_OBJECT_NULL &&
4348 copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4349 /*
4350 * We need to resolve our side of this
4351 * "symmetric" copy-on-write now; we
4352 * need a new object to map and share,
4353 * instead of the current one which
4354 * might still be shared with the
4355 * original mapping.
4356 *
4357 * Note: A "vm_map_copy_t" does not
4358 * have a lock but we're protected by
4359 * the named entry's lock here.
4360 */
4361 // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4362 VME_OBJECT_SHADOW(copy_entry, copy_size, TRUE);
4363 assert(copy_object != VME_OBJECT(copy_entry));
4364 if (!copy_entry->needs_copy &&
4365 copy_entry->protection & VM_PROT_WRITE) {
4366 vm_prot_t prot;
4367
4368 prot = copy_entry->protection & ~VM_PROT_WRITE;
4369 vm_object_pmap_protect(copy_object,
4370 copy_offset,
4371 copy_size,
4372 PMAP_NULL,
4373 PAGE_SIZE,
4374 0,
4375 prot);
4376 }
4377 copy_entry->needs_copy = FALSE;
4378 copy_entry->is_shared = TRUE;
4379 copy_object = VME_OBJECT(copy_entry);
4380 copy_offset = VME_OFFSET(copy_entry);
4381 vm_object_lock(copy_object);
4382 /* we're about to make a shared mapping of this object */
4383 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4384 copy_object->true_share = TRUE;
4385 vm_object_unlock(copy_object);
4386 }
4387
4388 if (copy_object != VM_OBJECT_NULL &&
4389 copy_object->named &&
4390 copy_object->pager != MEMORY_OBJECT_NULL &&
4391 copy_object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4392 memory_object_t pager;
4393 vm_prot_t pager_prot;
4394
4395 /*
4396 * For "named" VM objects, let the pager know that the
4397 * memory object is being mapped. Some pagers need to keep
4398 * track of this, to know when they can reclaim the memory
4399 * object, for example.
4400 * VM calls memory_object_map() for each mapping (specifying
4401 * the protection of each mapping) and calls
4402 * memory_object_last_unmap() when all the mappings are gone.
4403 */
4404 pager_prot = max_protection;
4405 if (copy) {
4406 /*
4407 * Copy-On-Write mapping: won't modify the
4408 * memory object.
4409 */
4410 pager_prot &= ~VM_PROT_WRITE;
4411 }
4412 vm_object_lock(copy_object);
4413 pager = copy_object->pager;
4414 if (copy_object->named &&
4415 pager != MEMORY_OBJECT_NULL &&
4416 copy_object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4417 assert(copy_object->pager_ready);
4418 vm_object_mapping_wait(copy_object, THREAD_UNINT);
4419 vm_object_mapping_begin(copy_object);
4420 vm_object_unlock(copy_object);
4421
4422 kr = memory_object_map(pager, pager_prot);
4423 assert(kr == KERN_SUCCESS);
4424
4425 vm_object_lock(copy_object);
4426 vm_object_mapping_end(copy_object);
4427 }
4428 vm_object_unlock(copy_object);
4429 }
4430
4431 /*
4432 * Perform the copy if requested
4433 */
4434
4435 if (copy && copy_object != VM_OBJECT_NULL) {
4436 vm_object_t new_object;
4437 vm_object_offset_t new_offset;
4438
4439 result = vm_object_copy_strategically(copy_object, copy_offset,
4440 copy_size,
4441 &new_object, &new_offset,
4442 &do_copy);
4443
4444
4445 if (result == KERN_MEMORY_RESTART_COPY) {
4446 boolean_t success;
4447 boolean_t src_needs_copy;
4448
4449 /*
4450 * XXX
4451 * We currently ignore src_needs_copy.
4452 * This really is the issue of how to make
4453 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4454 * non-kernel users to use. Solution forthcoming.
4455 * In the meantime, since we don't allow non-kernel
4456 * memory managers to specify symmetric copy,
4457 * we won't run into problems here.
4458 */
4459 new_object = copy_object;
4460 new_offset = copy_offset;
4461 success = vm_object_copy_quickly(new_object,
4462 new_offset,
4463 copy_size,
4464 &src_needs_copy,
4465 &do_copy);
4466 assert(success);
4467 result = KERN_SUCCESS;
4468 }
4469 if (result != KERN_SUCCESS) {
4470 kr = result;
4471 break;
4472 }
4473
4474 copy_object = new_object;
4475 copy_offset = new_offset;
4476 /*
4477 * No extra object reference for the mapping:
4478 * the mapping should be the only thing keeping
4479 * this new object alive.
4480 */
4481 } else {
4482 /*
4483 * We already have the right object
4484 * to map.
4485 */
4486 copy_object = VME_OBJECT(copy_entry);
4487 /* take an extra ref for the mapping below */
4488 vm_object_reference(copy_object);
4489 }
4490 }
4491
4492 /* over-map the object into destination */
4493 remap_flags |= flags;
4494 remap_flags |= VM_FLAGS_FIXED;
4495 remap_flags |= VM_FLAGS_OVERWRITE;
4496 remap_flags &= ~VM_FLAGS_ANYWHERE;
4497 if (!copy && !copy_entry->is_sub_map) {
4498 /*
4499 * copy-on-write should have been
4500 * resolved at this point, or we would
4501 * end up sharing instead of copying.
4502 */
4503 assert(!copy_entry->needs_copy);
4504 }
4505 #if XNU_TARGET_OS_OSX
4506 if (copy_entry->used_for_jit) {
4507 vmk_remap_flags.vmkf_map_jit = TRUE;
4508 }
4509 #endif /* XNU_TARGET_OS_OSX */
4510
4511 assertf((copy_vm_alias & VME_ALIAS_MASK) == copy_vm_alias,
4512 "VM Tag truncated from 0x%x to 0x%x\n", copy_vm_alias, (copy_vm_alias & VME_ALIAS_MASK));
4513 kr = vm_map_enter(target_map,
4514 ©_addr,
4515 copy_size,
4516 (vm_map_offset_t) 0,
4517 remap_flags,
4518 vmk_remap_flags,
4519 (vm_tag_t) copy_vm_alias, /* see comment at end of vm_fault_unwire re. cast*/
4520 copy_object,
4521 copy_offset,
4522 ((copy_object == NULL)
4523 ? FALSE
4524 : (copy || copy_entry->needs_copy)),
4525 cur_protection,
4526 max_protection,
4527 inheritance);
4528 if (kr != KERN_SUCCESS) {
4529 DEBUG4K_SHARE("failed kr 0x%x\n", kr);
4530 if (copy_entry->is_sub_map) {
4531 vm_map_deallocate(copy_submap);
4532 } else {
4533 vm_object_deallocate(copy_object);
4534 }
4535 /* abort */
4536 break;
4537 }
4538
4539 /* next mapping */
4540 copy_addr += copy_size;
4541 }
4542
4543 if (kr == KERN_SUCCESS) {
4544 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4545 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4546 *address = map_addr + offset_in_mapping;
4547 } else {
4548 *address = map_addr;
4549 }
4550 if (overmap_start) {
4551 *address += overmap_start;
4552 DEBUG4K_SHARE("map %p map_addr 0x%llx offset_in_mapping 0x%llx overmap_start 0x%llx -> *address 0x%llx\n", target_map, (uint64_t)map_addr, (uint64_t) offset_in_mapping, (uint64_t)overmap_start, (uint64_t)*address);
4553 }
4554 }
4555 named_entry_unlock(named_entry);
4556 if (target_copy_map != copy_map) {
4557 vm_map_copy_discard(target_copy_map);
4558 target_copy_map = VM_MAP_COPY_NULL;
4559 }
4560
4561 if (kr != KERN_SUCCESS) {
4562 if (!(flags & VM_FLAGS_OVERWRITE)) {
4563 /* deallocate the contiguous range */
4564 (void) vm_deallocate(target_map,
4565 map_addr,
4566 map_size);
4567 }
4568 }
4569
4570 return kr;
4571 }
4572
4573 if (named_entry->is_object) {
4574 unsigned int access;
4575 vm_prot_t protections;
4576 unsigned int wimg_mode;
4577
4578 /* we are mapping a VM object */
4579
4580 protections = named_entry->protection & (VM_PROT_ALL | VM_PROT_ALLEXEC);
4581 access = GET_MAP_MEM(named_entry->protection);
4582
4583 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4584 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4585 offset_in_mapping = offset - VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(target_map));
4586 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4587 offset_in_mapping &= ~((signed)(0xFFF));
4588 }
4589 offset = VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(target_map));
4590 map_size = VM_MAP_ROUND_PAGE((offset + offset_in_mapping + initial_size) - offset, VM_MAP_PAGE_MASK(target_map));
4591 }
4592
4593 object = vm_named_entry_to_vm_object(named_entry);
4594 assert(object != VM_OBJECT_NULL);
4595 vm_object_lock(object);
4596 named_entry_unlock(named_entry);
4597
4598 vm_object_reference_locked(object);
4599
4600 wimg_mode = object->wimg_bits;
4601 vm_prot_to_wimg(access, &wimg_mode);
4602 if (object->wimg_bits != wimg_mode) {
4603 vm_object_change_wimg_mode(object, wimg_mode);
4604 }
4605
4606 vm_object_unlock(object);
4607 } else {
4608 panic("invalid VM named entry %p", named_entry);
4609 }
4610 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4611 /*
4612 * JMM - This is temporary until we unify named entries
4613 * and raw memory objects.
4614 *
4615 * Detected fake ip_kotype for a memory object. In
4616 * this case, the port isn't really a port at all, but
4617 * instead is just a raw memory object.
4618 */
4619 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4620 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4621 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4622 }
4623
4624 object = memory_object_to_vm_object((memory_object_t)port);
4625 if (object == VM_OBJECT_NULL) {
4626 return KERN_INVALID_OBJECT;
4627 }
4628 vm_object_reference(object);
4629
4630 /* wait for object (if any) to be ready */
4631 if (object != VM_OBJECT_NULL) {
4632 if (object == kernel_object) {
4633 printf("Warning: Attempt to map kernel object"
4634 " by a non-private kernel entity\n");
4635 return KERN_INVALID_OBJECT;
4636 }
4637 if (!object->pager_ready) {
4638 vm_object_lock(object);
4639
4640 while (!object->pager_ready) {
4641 vm_object_wait(object,
4642 VM_OBJECT_EVENT_PAGER_READY,
4643 THREAD_UNINT);
4644 vm_object_lock(object);
4645 }
4646 vm_object_unlock(object);
4647 }
4648 }
4649 } else {
4650 return KERN_INVALID_OBJECT;
4651 }
4652
4653 if (object != VM_OBJECT_NULL &&
4654 object->named &&
4655 object->pager != MEMORY_OBJECT_NULL &&
4656 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4657 memory_object_t pager;
4658 vm_prot_t pager_prot;
4659 kern_return_t kr;
4660
4661 /*
4662 * For "named" VM objects, let the pager know that the
4663 * memory object is being mapped. Some pagers need to keep
4664 * track of this, to know when they can reclaim the memory
4665 * object, for example.
4666 * VM calls memory_object_map() for each mapping (specifying
4667 * the protection of each mapping) and calls
4668 * memory_object_last_unmap() when all the mappings are gone.
4669 */
4670 pager_prot = max_protection;
4671 if (copy) {
4672 /*
4673 * Copy-On-Write mapping: won't modify the
4674 * memory object.
4675 */
4676 pager_prot &= ~VM_PROT_WRITE;
4677 }
4678 vm_object_lock(object);
4679 pager = object->pager;
4680 if (object->named &&
4681 pager != MEMORY_OBJECT_NULL &&
4682 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4683 assert(object->pager_ready);
4684 vm_object_mapping_wait(object, THREAD_UNINT);
4685 vm_object_mapping_begin(object);
4686 vm_object_unlock(object);
4687
4688 kr = memory_object_map(pager, pager_prot);
4689 assert(kr == KERN_SUCCESS);
4690
4691 vm_object_lock(object);
4692 vm_object_mapping_end(object);
4693 }
4694 vm_object_unlock(object);
4695 }
4696
4697 /*
4698 * Perform the copy if requested
4699 */
4700
4701 if (copy) {
4702 vm_object_t new_object;
4703 vm_object_offset_t new_offset;
4704
4705 result = vm_object_copy_strategically(object, offset,
4706 map_size,
4707 &new_object, &new_offset,
4708 ©);
4709
4710
4711 if (result == KERN_MEMORY_RESTART_COPY) {
4712 boolean_t success;
4713 boolean_t src_needs_copy;
4714
4715 /*
4716 * XXX
4717 * We currently ignore src_needs_copy.
4718 * This really is the issue of how to make
4719 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4720 * non-kernel users to use. Solution forthcoming.
4721 * In the meantime, since we don't allow non-kernel
4722 * memory managers to specify symmetric copy,
4723 * we won't run into problems here.
4724 */
4725 new_object = object;
4726 new_offset = offset;
4727 success = vm_object_copy_quickly(new_object,
4728 new_offset,
4729 map_size,
4730 &src_needs_copy,
4731 ©);
4732 assert(success);
4733 result = KERN_SUCCESS;
4734 }
4735 /*
4736 * Throw away the reference to the
4737 * original object, as it won't be mapped.
4738 */
4739
4740 vm_object_deallocate(object);
4741
4742 if (result != KERN_SUCCESS) {
4743 return result;
4744 }
4745
4746 object = new_object;
4747 offset = new_offset;
4748 }
4749
4750 /*
4751 * If non-kernel users want to try to prefault pages, the mapping and prefault
4752 * needs to be atomic.
4753 */
4754 kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4755 vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4756
4757 #if __arm64__
4758 if (fourk) {
4759 /* map this object in a "4K" pager */
4760 result = vm_map_enter_fourk(target_map,
4761 &map_addr,
4762 map_size,
4763 (vm_map_offset_t) mask,
4764 flags,
4765 vmk_flags,
4766 tag,
4767 object,
4768 offset,
4769 copy,
4770 cur_protection,
4771 max_protection,
4772 inheritance);
4773 } else
4774 #endif /* __arm64__ */
4775 {
4776 result = vm_map_enter(target_map,
4777 &map_addr, map_size,
4778 (vm_map_offset_t)mask,
4779 flags,
4780 vmk_flags,
4781 tag,
4782 object, offset,
4783 copy,
4784 cur_protection, max_protection,
4785 inheritance);
4786 }
4787 if (result != KERN_SUCCESS) {
4788 vm_object_deallocate(object);
4789 }
4790
4791 /*
4792 * Try to prefault, and do not forget to release the vm map lock.
4793 */
4794 if (result == KERN_SUCCESS && try_prefault) {
4795 mach_vm_address_t va = map_addr;
4796 kern_return_t kr = KERN_SUCCESS;
4797 unsigned int i = 0;
4798 int pmap_options;
4799
4800 pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
4801 if (object->internal) {
4802 pmap_options |= PMAP_OPTIONS_INTERNAL;
4803 }
4804
4805 for (i = 0; i < page_list_count; ++i) {
4806 if (!UPL_VALID_PAGE(page_list, i)) {
4807 if (kernel_prefault) {
4808 assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4809 result = KERN_MEMORY_ERROR;
4810 break;
4811 }
4812 } else {
4813 /*
4814 * If this function call failed, we should stop
4815 * trying to optimize, other calls are likely
4816 * going to fail too.
4817 *
4818 * We are not gonna report an error for such
4819 * failure though. That's an optimization, not
4820 * something critical.
4821 */
4822 kr = pmap_enter_options(target_map->pmap,
4823 va, UPL_PHYS_PAGE(page_list, i),
4824 cur_protection, VM_PROT_NONE,
4825 0, TRUE, pmap_options, NULL);
4826 if (kr != KERN_SUCCESS) {
4827 OSIncrementAtomic64(&vm_prefault_nb_bailout);
4828 if (kernel_prefault) {
4829 result = kr;
4830 }
4831 break;
4832 }
4833 OSIncrementAtomic64(&vm_prefault_nb_pages);
4834 }
4835
4836 /* Next virtual address */
4837 va += PAGE_SIZE;
4838 }
4839 if (vmk_flags.vmkf_keep_map_locked) {
4840 vm_map_unlock(target_map);
4841 }
4842 }
4843
4844 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4845 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4846 *address = map_addr + offset_in_mapping;
4847 } else {
4848 *address = map_addr;
4849 }
4850 return result;
4851 }
4852
4853 kern_return_t
vm_map_enter_mem_object(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,ipc_port_t port,vm_object_offset_t offset,boolean_t copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)4854 vm_map_enter_mem_object(
4855 vm_map_t target_map,
4856 vm_map_offset_t *address,
4857 vm_map_size_t initial_size,
4858 vm_map_offset_t mask,
4859 int flags,
4860 vm_map_kernel_flags_t vmk_flags,
4861 vm_tag_t tag,
4862 ipc_port_t port,
4863 vm_object_offset_t offset,
4864 boolean_t copy,
4865 vm_prot_t cur_protection,
4866 vm_prot_t max_protection,
4867 vm_inherit_t inheritance)
4868 {
4869 kern_return_t ret;
4870
4871 ret = vm_map_enter_mem_object_helper(target_map,
4872 address,
4873 initial_size,
4874 mask,
4875 flags,
4876 vmk_flags,
4877 tag,
4878 port,
4879 offset,
4880 copy,
4881 cur_protection,
4882 max_protection,
4883 inheritance,
4884 NULL,
4885 0);
4886
4887 #if KASAN
4888 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4889 kasan_notify_address(*address, initial_size);
4890 }
4891 #endif
4892
4893 return ret;
4894 }
4895
4896 kern_return_t
vm_map_enter_mem_object_prefault(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,ipc_port_t port,vm_object_offset_t offset,vm_prot_t cur_protection,vm_prot_t max_protection,upl_page_list_ptr_t page_list,unsigned int page_list_count)4897 vm_map_enter_mem_object_prefault(
4898 vm_map_t target_map,
4899 vm_map_offset_t *address,
4900 vm_map_size_t initial_size,
4901 vm_map_offset_t mask,
4902 int flags,
4903 vm_map_kernel_flags_t vmk_flags,
4904 vm_tag_t tag,
4905 ipc_port_t port,
4906 vm_object_offset_t offset,
4907 vm_prot_t cur_protection,
4908 vm_prot_t max_protection,
4909 upl_page_list_ptr_t page_list,
4910 unsigned int page_list_count)
4911 {
4912 kern_return_t ret;
4913
4914 ret = vm_map_enter_mem_object_helper(target_map,
4915 address,
4916 initial_size,
4917 mask,
4918 flags,
4919 vmk_flags,
4920 tag,
4921 port,
4922 offset,
4923 FALSE,
4924 cur_protection,
4925 max_protection,
4926 VM_INHERIT_DEFAULT,
4927 page_list,
4928 page_list_count);
4929
4930 #if KASAN
4931 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4932 kasan_notify_address(*address, initial_size);
4933 }
4934 #endif
4935
4936 return ret;
4937 }
4938
4939
4940 kern_return_t
vm_map_enter_mem_object_control(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,memory_object_control_t control,vm_object_offset_t offset,boolean_t copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)4941 vm_map_enter_mem_object_control(
4942 vm_map_t target_map,
4943 vm_map_offset_t *address,
4944 vm_map_size_t initial_size,
4945 vm_map_offset_t mask,
4946 int flags,
4947 vm_map_kernel_flags_t vmk_flags,
4948 vm_tag_t tag,
4949 memory_object_control_t control,
4950 vm_object_offset_t offset,
4951 boolean_t copy,
4952 vm_prot_t cur_protection,
4953 vm_prot_t max_protection,
4954 vm_inherit_t inheritance)
4955 {
4956 vm_map_address_t map_addr;
4957 vm_map_size_t map_size;
4958 vm_object_t object;
4959 vm_object_size_t size;
4960 kern_return_t result;
4961 memory_object_t pager;
4962 vm_prot_t pager_prot;
4963 kern_return_t kr;
4964 #if __arm64__
4965 boolean_t fourk = vmk_flags.vmkf_fourk;
4966 #endif /* __arm64__ */
4967
4968 /*
4969 * Check arguments for validity
4970 */
4971 if ((target_map == VM_MAP_NULL) ||
4972 (cur_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4973 (max_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4974 (inheritance > VM_INHERIT_LAST_VALID) ||
4975 initial_size == 0) {
4976 return KERN_INVALID_ARGUMENT;
4977 }
4978
4979 #if __arm64__
4980 if (fourk && VM_MAP_PAGE_MASK(target_map) < PAGE_MASK) {
4981 fourk = FALSE;
4982 }
4983
4984 if (fourk) {
4985 map_addr = vm_map_trunc_page(*address,
4986 FOURK_PAGE_MASK);
4987 map_size = vm_map_round_page(initial_size,
4988 FOURK_PAGE_MASK);
4989 } else
4990 #endif /* __arm64__ */
4991 {
4992 map_addr = vm_map_trunc_page(*address,
4993 VM_MAP_PAGE_MASK(target_map));
4994 map_size = vm_map_round_page(initial_size,
4995 VM_MAP_PAGE_MASK(target_map));
4996 }
4997 size = vm_object_round_page(initial_size);
4998
4999 object = memory_object_control_to_vm_object(control);
5000
5001 if (object == VM_OBJECT_NULL) {
5002 return KERN_INVALID_OBJECT;
5003 }
5004
5005 if (object == kernel_object) {
5006 printf("Warning: Attempt to map kernel object"
5007 " by a non-private kernel entity\n");
5008 return KERN_INVALID_OBJECT;
5009 }
5010
5011 vm_object_lock(object);
5012 object->ref_count++;
5013
5014 /*
5015 * For "named" VM objects, let the pager know that the
5016 * memory object is being mapped. Some pagers need to keep
5017 * track of this, to know when they can reclaim the memory
5018 * object, for example.
5019 * VM calls memory_object_map() for each mapping (specifying
5020 * the protection of each mapping) and calls
5021 * memory_object_last_unmap() when all the mappings are gone.
5022 */
5023 pager_prot = max_protection;
5024 if (copy) {
5025 pager_prot &= ~VM_PROT_WRITE;
5026 }
5027 pager = object->pager;
5028 if (object->named &&
5029 pager != MEMORY_OBJECT_NULL &&
5030 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
5031 assert(object->pager_ready);
5032 vm_object_mapping_wait(object, THREAD_UNINT);
5033 vm_object_mapping_begin(object);
5034 vm_object_unlock(object);
5035
5036 kr = memory_object_map(pager, pager_prot);
5037 assert(kr == KERN_SUCCESS);
5038
5039 vm_object_lock(object);
5040 vm_object_mapping_end(object);
5041 }
5042 vm_object_unlock(object);
5043
5044 /*
5045 * Perform the copy if requested
5046 */
5047
5048 if (copy) {
5049 vm_object_t new_object;
5050 vm_object_offset_t new_offset;
5051
5052 result = vm_object_copy_strategically(object, offset, size,
5053 &new_object, &new_offset,
5054 ©);
5055
5056
5057 if (result == KERN_MEMORY_RESTART_COPY) {
5058 boolean_t success;
5059 boolean_t src_needs_copy;
5060
5061 /*
5062 * XXX
5063 * We currently ignore src_needs_copy.
5064 * This really is the issue of how to make
5065 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
5066 * non-kernel users to use. Solution forthcoming.
5067 * In the meantime, since we don't allow non-kernel
5068 * memory managers to specify symmetric copy,
5069 * we won't run into problems here.
5070 */
5071 new_object = object;
5072 new_offset = offset;
5073 success = vm_object_copy_quickly(new_object,
5074 new_offset, size,
5075 &src_needs_copy,
5076 ©);
5077 assert(success);
5078 result = KERN_SUCCESS;
5079 }
5080 /*
5081 * Throw away the reference to the
5082 * original object, as it won't be mapped.
5083 */
5084
5085 vm_object_deallocate(object);
5086
5087 if (result != KERN_SUCCESS) {
5088 return result;
5089 }
5090
5091 object = new_object;
5092 offset = new_offset;
5093 }
5094
5095 #if __arm64__
5096 if (fourk) {
5097 result = vm_map_enter_fourk(target_map,
5098 &map_addr,
5099 map_size,
5100 (vm_map_offset_t)mask,
5101 flags,
5102 vmk_flags,
5103 tag,
5104 object, offset,
5105 copy,
5106 cur_protection, max_protection,
5107 inheritance);
5108 } else
5109 #endif /* __arm64__ */
5110 {
5111 result = vm_map_enter(target_map,
5112 &map_addr, map_size,
5113 (vm_map_offset_t)mask,
5114 flags,
5115 vmk_flags,
5116 tag,
5117 object, offset,
5118 copy,
5119 cur_protection, max_protection,
5120 inheritance);
5121 }
5122 if (result != KERN_SUCCESS) {
5123 vm_object_deallocate(object);
5124 }
5125 *address = map_addr;
5126
5127 return result;
5128 }
5129
5130
5131 #if VM_CPM
5132
5133 #ifdef MACH_ASSERT
5134 extern pmap_paddr_t avail_start, avail_end;
5135 #endif
5136
5137 /*
5138 * Allocate memory in the specified map, with the caveat that
5139 * the memory is physically contiguous. This call may fail
5140 * if the system can't find sufficient contiguous memory.
5141 * This call may cause or lead to heart-stopping amounts of
5142 * paging activity.
5143 *
5144 * Memory obtained from this call should be freed in the
5145 * normal way, viz., via vm_deallocate.
5146 */
5147 kern_return_t
vm_map_enter_cpm(vm_map_t map,vm_map_offset_t * addr,vm_map_size_t size,int flags,vm_map_kernel_flags_t vmk_flags)5148 vm_map_enter_cpm(
5149 vm_map_t map,
5150 vm_map_offset_t *addr,
5151 vm_map_size_t size,
5152 int flags,
5153 vm_map_kernel_flags_t vmk_flags)
5154 {
5155 vm_object_t cpm_obj;
5156 pmap_t pmap;
5157 vm_page_t m, pages;
5158 kern_return_t kr;
5159 vm_map_offset_t va, start, end, offset;
5160 #if MACH_ASSERT
5161 vm_map_offset_t prev_addr = 0;
5162 #endif /* MACH_ASSERT */
5163
5164 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
5165 vm_tag_t tag;
5166
5167 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
5168 /* XXX TODO4K do we need to support this? */
5169 *addr = 0;
5170 return KERN_NOT_SUPPORTED;
5171 }
5172
5173 VM_GET_FLAGS_ALIAS(flags, tag);
5174
5175 if (size == 0) {
5176 *addr = 0;
5177 return KERN_SUCCESS;
5178 }
5179 if (anywhere) {
5180 *addr = vm_map_min(map);
5181 } else {
5182 *addr = vm_map_trunc_page(*addr,
5183 VM_MAP_PAGE_MASK(map));
5184 }
5185 size = vm_map_round_page(size,
5186 VM_MAP_PAGE_MASK(map));
5187
5188 /*
5189 * LP64todo - cpm_allocate should probably allow
5190 * allocations of >4GB, but not with the current
5191 * algorithm, so just cast down the size for now.
5192 */
5193 if (size > VM_MAX_ADDRESS) {
5194 return KERN_RESOURCE_SHORTAGE;
5195 }
5196 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
5197 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS) {
5198 return kr;
5199 }
5200
5201 cpm_obj = vm_object_allocate((vm_object_size_t)size);
5202 assert(cpm_obj != VM_OBJECT_NULL);
5203 assert(cpm_obj->internal);
5204 assert(cpm_obj->vo_size == (vm_object_size_t)size);
5205 assert(cpm_obj->can_persist == FALSE);
5206 assert(cpm_obj->pager_created == FALSE);
5207 assert(cpm_obj->pageout == FALSE);
5208 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5209
5210 /*
5211 * Insert pages into object.
5212 */
5213
5214 vm_object_lock(cpm_obj);
5215 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5216 m = pages;
5217 pages = NEXT_PAGE(m);
5218 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
5219
5220 assert(!m->vmp_gobbled);
5221 assert(!m->vmp_wanted);
5222 assert(!m->vmp_pageout);
5223 assert(!m->vmp_tabled);
5224 assert(VM_PAGE_WIRED(m));
5225 assert(m->vmp_busy);
5226 assert(VM_PAGE_GET_PHYS_PAGE(m) >= (avail_start >> PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m) <= (avail_end >> PAGE_SHIFT));
5227
5228 m->vmp_busy = FALSE;
5229 vm_page_insert(m, cpm_obj, offset);
5230 }
5231 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
5232 vm_object_unlock(cpm_obj);
5233
5234 /*
5235 * Hang onto a reference on the object in case a
5236 * multi-threaded application for some reason decides
5237 * to deallocate the portion of the address space into
5238 * which we will insert this object.
5239 *
5240 * Unfortunately, we must insert the object now before
5241 * we can talk to the pmap module about which addresses
5242 * must be wired down. Hence, the race with a multi-
5243 * threaded app.
5244 */
5245 vm_object_reference(cpm_obj);
5246
5247 /*
5248 * Insert object into map.
5249 */
5250
5251 kr = vm_map_enter(
5252 map,
5253 addr,
5254 size,
5255 (vm_map_offset_t)0,
5256 flags,
5257 vmk_flags,
5258 cpm_obj,
5259 (vm_object_offset_t)0,
5260 FALSE,
5261 VM_PROT_ALL,
5262 VM_PROT_ALL,
5263 VM_INHERIT_DEFAULT);
5264
5265 if (kr != KERN_SUCCESS) {
5266 /*
5267 * A CPM object doesn't have can_persist set,
5268 * so all we have to do is deallocate it to
5269 * free up these pages.
5270 */
5271 assert(cpm_obj->pager_created == FALSE);
5272 assert(cpm_obj->can_persist == FALSE);
5273 assert(cpm_obj->pageout == FALSE);
5274 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5275 vm_object_deallocate(cpm_obj); /* kill acquired ref */
5276 vm_object_deallocate(cpm_obj); /* kill creation ref */
5277 }
5278
5279 /*
5280 * Inform the physical mapping system that the
5281 * range of addresses may not fault, so that
5282 * page tables and such can be locked down as well.
5283 */
5284 start = *addr;
5285 end = start + size;
5286 pmap = vm_map_pmap(map);
5287 pmap_pageable(pmap, start, end, FALSE);
5288
5289 /*
5290 * Enter each page into the pmap, to avoid faults.
5291 * Note that this loop could be coded more efficiently,
5292 * if the need arose, rather than looking up each page
5293 * again.
5294 */
5295 for (offset = 0, va = start; offset < size;
5296 va += PAGE_SIZE, offset += PAGE_SIZE) {
5297 int type_of_fault;
5298
5299 vm_object_lock(cpm_obj);
5300 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5301 assert(m != VM_PAGE_NULL);
5302
5303 vm_page_zero_fill(m);
5304
5305 type_of_fault = DBG_ZERO_FILL_FAULT;
5306
5307 vm_fault_enter(m, pmap, va,
5308 PAGE_SIZE, 0,
5309 VM_PROT_ALL, VM_PROT_WRITE,
5310 VM_PAGE_WIRED(m),
5311 FALSE, /* change_wiring */
5312 VM_KERN_MEMORY_NONE, /* tag - not wiring */
5313 FALSE, /* no_cache */
5314 FALSE, /* cs_bypass */
5315 0, /* user_tag */
5316 0, /* pmap_options */
5317 NULL, /* need_retry */
5318 &type_of_fault);
5319
5320 vm_object_unlock(cpm_obj);
5321 }
5322
5323 #if MACH_ASSERT
5324 /*
5325 * Verify ordering in address space.
5326 */
5327 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5328 vm_object_lock(cpm_obj);
5329 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5330 vm_object_unlock(cpm_obj);
5331 if (m == VM_PAGE_NULL) {
5332 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
5333 cpm_obj, (uint64_t)offset);
5334 }
5335 assert(m->vmp_tabled);
5336 assert(!m->vmp_busy);
5337 assert(!m->vmp_wanted);
5338 assert(!m->vmp_fictitious);
5339 assert(!m->vmp_private);
5340 assert(!m->vmp_absent);
5341 assert(!m->vmp_cleaning);
5342 assert(!m->vmp_laundry);
5343 assert(!m->vmp_precious);
5344 assert(!m->vmp_clustered);
5345 if (offset != 0) {
5346 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
5347 printf("start 0x%llx end 0x%llx va 0x%llx\n",
5348 (uint64_t)start, (uint64_t)end, (uint64_t)va);
5349 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
5350 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
5351 panic("vm_allocate_cpm: pages not contig!");
5352 }
5353 }
5354 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
5355 }
5356 #endif /* MACH_ASSERT */
5357
5358 vm_object_deallocate(cpm_obj); /* kill extra ref */
5359
5360 return kr;
5361 }
5362
5363
5364 #else /* VM_CPM */
5365
5366 /*
5367 * Interface is defined in all cases, but unless the kernel
5368 * is built explicitly for this option, the interface does
5369 * nothing.
5370 */
5371
5372 kern_return_t
vm_map_enter_cpm(__unused vm_map_t map,__unused vm_map_offset_t * addr,__unused vm_map_size_t size,__unused int flags,__unused vm_map_kernel_flags_t vmk_flags)5373 vm_map_enter_cpm(
5374 __unused vm_map_t map,
5375 __unused vm_map_offset_t *addr,
5376 __unused vm_map_size_t size,
5377 __unused int flags,
5378 __unused vm_map_kernel_flags_t vmk_flags)
5379 {
5380 return KERN_FAILURE;
5381 }
5382 #endif /* VM_CPM */
5383
5384 /* Not used without nested pmaps */
5385 #ifndef NO_NESTED_PMAP
5386 /*
5387 * Clip and unnest a portion of a nested submap mapping.
5388 */
5389
5390
5391 static void
vm_map_clip_unnest(vm_map_t map,vm_map_entry_t entry,vm_map_offset_t start_unnest,vm_map_offset_t end_unnest)5392 vm_map_clip_unnest(
5393 vm_map_t map,
5394 vm_map_entry_t entry,
5395 vm_map_offset_t start_unnest,
5396 vm_map_offset_t end_unnest)
5397 {
5398 vm_map_offset_t old_start_unnest = start_unnest;
5399 vm_map_offset_t old_end_unnest = end_unnest;
5400
5401 assert(entry->is_sub_map);
5402 assert(VME_SUBMAP(entry) != NULL);
5403 assert(entry->use_pmap);
5404
5405 /*
5406 * Query the platform for the optimal unnest range.
5407 * DRK: There's some duplication of effort here, since
5408 * callers may have adjusted the range to some extent. This
5409 * routine was introduced to support 1GiB subtree nesting
5410 * for x86 platforms, which can also nest on 2MiB boundaries
5411 * depending on size/alignment.
5412 */
5413 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
5414 assert(VME_SUBMAP(entry)->is_nested_map);
5415 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
5416 log_unnest_badness(map,
5417 old_start_unnest,
5418 old_end_unnest,
5419 VME_SUBMAP(entry)->is_nested_map,
5420 (entry->vme_start +
5421 VME_SUBMAP(entry)->lowest_unnestable_start -
5422 VME_OFFSET(entry)));
5423 }
5424
5425 if (entry->vme_start > start_unnest ||
5426 entry->vme_end < end_unnest) {
5427 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
5428 "bad nested entry: start=0x%llx end=0x%llx\n",
5429 (long long)start_unnest, (long long)end_unnest,
5430 (long long)entry->vme_start, (long long)entry->vme_end);
5431 }
5432
5433 if (start_unnest > entry->vme_start) {
5434 _vm_map_clip_start(&map->hdr,
5435 entry,
5436 start_unnest);
5437 if (map->holelistenabled) {
5438 vm_map_store_update_first_free(map, NULL, FALSE);
5439 } else {
5440 vm_map_store_update_first_free(map, map->first_free, FALSE);
5441 }
5442 }
5443 if (entry->vme_end > end_unnest) {
5444 _vm_map_clip_end(&map->hdr,
5445 entry,
5446 end_unnest);
5447 if (map->holelistenabled) {
5448 vm_map_store_update_first_free(map, NULL, FALSE);
5449 } else {
5450 vm_map_store_update_first_free(map, map->first_free, FALSE);
5451 }
5452 }
5453
5454 pmap_unnest(map->pmap,
5455 entry->vme_start,
5456 entry->vme_end - entry->vme_start);
5457 if ((map->mapped_in_other_pmaps) && os_ref_get_count_raw(&map->map_refcnt) != 0) {
5458 /* clean up parent map/maps */
5459 vm_map_submap_pmap_clean(
5460 map, entry->vme_start,
5461 entry->vme_end,
5462 VME_SUBMAP(entry),
5463 VME_OFFSET(entry));
5464 }
5465 entry->use_pmap = FALSE;
5466 if ((map->pmap != kernel_pmap) &&
5467 (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
5468 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
5469 }
5470 }
5471 #endif /* NO_NESTED_PMAP */
5472
5473 __abortlike
5474 static void
__vm_map_clip_atomic_entry_panic(vm_map_t map,vm_map_entry_t entry,vm_map_offset_t where)5475 __vm_map_clip_atomic_entry_panic(
5476 vm_map_t map,
5477 vm_map_entry_t entry,
5478 vm_map_offset_t where)
5479 {
5480 panic("vm_map_clip(%p): Attempting to clip an atomic VM map entry "
5481 "%p [0x%llx:0x%llx] at 0x%llx", map, entry,
5482 (uint64_t)entry->vme_start,
5483 (uint64_t)entry->vme_end,
5484 (uint64_t)where);
5485 }
5486
5487 /*
5488 * vm_map_clip_start: [ internal use only ]
5489 *
5490 * Asserts that the given entry begins at or after
5491 * the specified address; if necessary,
5492 * it splits the entry into two.
5493 */
5494 void
vm_map_clip_start(vm_map_t map,vm_map_entry_t entry,vm_map_offset_t startaddr)5495 vm_map_clip_start(
5496 vm_map_t map,
5497 vm_map_entry_t entry,
5498 vm_map_offset_t startaddr)
5499 {
5500 #ifndef NO_NESTED_PMAP
5501 if (entry->is_sub_map &&
5502 entry->use_pmap &&
5503 startaddr >= entry->vme_start) {
5504 vm_map_offset_t start_unnest, end_unnest;
5505
5506 /*
5507 * Make sure "startaddr" is no longer in a nested range
5508 * before we clip. Unnest only the minimum range the platform
5509 * can handle.
5510 * vm_map_clip_unnest may perform additional adjustments to
5511 * the unnest range.
5512 */
5513 start_unnest = startaddr & ~(pmap_shared_region_size_min(map->pmap) - 1);
5514 end_unnest = start_unnest + pmap_shared_region_size_min(map->pmap);
5515 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5516 }
5517 #endif /* NO_NESTED_PMAP */
5518 if (startaddr > entry->vme_start) {
5519 if (!entry->is_sub_map &&
5520 VME_OBJECT(entry) &&
5521 VME_OBJECT(entry)->phys_contiguous) {
5522 pmap_remove(map->pmap,
5523 (addr64_t)(entry->vme_start),
5524 (addr64_t)(entry->vme_end));
5525 }
5526 if (entry->vme_atomic) {
5527 __vm_map_clip_atomic_entry_panic(map, entry, startaddr);
5528 }
5529
5530 DTRACE_VM5(
5531 vm_map_clip_start,
5532 vm_map_t, map,
5533 vm_map_offset_t, entry->vme_start,
5534 vm_map_offset_t, entry->vme_end,
5535 vm_map_offset_t, startaddr,
5536 int, VME_ALIAS(entry));
5537
5538 _vm_map_clip_start(&map->hdr, entry, startaddr);
5539 if (map->holelistenabled) {
5540 vm_map_store_update_first_free(map, NULL, FALSE);
5541 } else {
5542 vm_map_store_update_first_free(map, map->first_free, FALSE);
5543 }
5544 }
5545 }
5546
5547
5548 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5549 MACRO_BEGIN \
5550 if ((startaddr) > (entry)->vme_start) \
5551 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5552 MACRO_END
5553
5554 /*
5555 * This routine is called only when it is known that
5556 * the entry must be split.
5557 */
5558 static void
_vm_map_clip_start(struct vm_map_header * map_header,vm_map_entry_t entry,vm_map_offset_t start)5559 _vm_map_clip_start(
5560 struct vm_map_header *map_header,
5561 vm_map_entry_t entry,
5562 vm_map_offset_t start)
5563 {
5564 vm_map_entry_t new_entry;
5565
5566 /*
5567 * Split off the front portion --
5568 * note that we must insert the new
5569 * entry BEFORE this one, so that
5570 * this entry has the specified starting
5571 * address.
5572 */
5573
5574 if (entry->map_aligned) {
5575 assert(VM_MAP_PAGE_ALIGNED(start,
5576 VM_MAP_HDR_PAGE_MASK(map_header)));
5577 }
5578
5579 new_entry = _vm_map_entry_create(map_header);
5580 vm_map_entry_copy_full(new_entry, entry);
5581
5582 new_entry->vme_end = start;
5583 assert(new_entry->vme_start < new_entry->vme_end);
5584 VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
5585 assert(start < entry->vme_end);
5586 entry->vme_start = start;
5587
5588 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
5589
5590 if (entry->is_sub_map) {
5591 vm_map_reference(VME_SUBMAP(new_entry));
5592 } else {
5593 vm_object_reference(VME_OBJECT(new_entry));
5594 }
5595 }
5596
5597
5598 /*
5599 * vm_map_clip_end: [ internal use only ]
5600 *
5601 * Asserts that the given entry ends at or before
5602 * the specified address; if necessary,
5603 * it splits the entry into two.
5604 */
5605 void
vm_map_clip_end(vm_map_t map,vm_map_entry_t entry,vm_map_offset_t endaddr)5606 vm_map_clip_end(
5607 vm_map_t map,
5608 vm_map_entry_t entry,
5609 vm_map_offset_t endaddr)
5610 {
5611 if (endaddr > entry->vme_end) {
5612 /*
5613 * Within the scope of this clipping, limit "endaddr" to
5614 * the end of this map entry...
5615 */
5616 endaddr = entry->vme_end;
5617 }
5618 #ifndef NO_NESTED_PMAP
5619 if (entry->is_sub_map && entry->use_pmap) {
5620 vm_map_offset_t start_unnest, end_unnest;
5621
5622 /*
5623 * Make sure the range between the start of this entry and
5624 * the new "endaddr" is no longer nested before we clip.
5625 * Unnest only the minimum range the platform can handle.
5626 * vm_map_clip_unnest may perform additional adjustments to
5627 * the unnest range.
5628 */
5629 start_unnest = entry->vme_start;
5630 end_unnest =
5631 (endaddr + pmap_shared_region_size_min(map->pmap) - 1) &
5632 ~(pmap_shared_region_size_min(map->pmap) - 1);
5633 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5634 }
5635 #endif /* NO_NESTED_PMAP */
5636 if (endaddr < entry->vme_end) {
5637 if (!entry->is_sub_map &&
5638 VME_OBJECT(entry) &&
5639 VME_OBJECT(entry)->phys_contiguous) {
5640 pmap_remove(map->pmap,
5641 (addr64_t)(entry->vme_start),
5642 (addr64_t)(entry->vme_end));
5643 }
5644 if (entry->vme_atomic) {
5645 __vm_map_clip_atomic_entry_panic(map, entry, endaddr);
5646 }
5647 DTRACE_VM5(
5648 vm_map_clip_end,
5649 vm_map_t, map,
5650 vm_map_offset_t, entry->vme_start,
5651 vm_map_offset_t, entry->vme_end,
5652 vm_map_offset_t, endaddr,
5653 int, VME_ALIAS(entry));
5654
5655 _vm_map_clip_end(&map->hdr, entry, endaddr);
5656 if (map->holelistenabled) {
5657 vm_map_store_update_first_free(map, NULL, FALSE);
5658 } else {
5659 vm_map_store_update_first_free(map, map->first_free, FALSE);
5660 }
5661 }
5662 }
5663
5664
5665 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5666 MACRO_BEGIN \
5667 if ((endaddr) < (entry)->vme_end) \
5668 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5669 MACRO_END
5670
5671 /*
5672 * This routine is called only when it is known that
5673 * the entry must be split.
5674 */
5675 static void
_vm_map_clip_end(struct vm_map_header * map_header,vm_map_entry_t entry,vm_map_offset_t end)5676 _vm_map_clip_end(
5677 struct vm_map_header *map_header,
5678 vm_map_entry_t entry,
5679 vm_map_offset_t end)
5680 {
5681 vm_map_entry_t new_entry;
5682
5683 /*
5684 * Create a new entry and insert it
5685 * AFTER the specified entry
5686 */
5687
5688 if (entry->map_aligned) {
5689 assert(VM_MAP_PAGE_ALIGNED(end,
5690 VM_MAP_HDR_PAGE_MASK(map_header)));
5691 }
5692
5693 new_entry = _vm_map_entry_create(map_header);
5694 vm_map_entry_copy_full(new_entry, entry);
5695
5696 assert(entry->vme_start < end);
5697 new_entry->vme_start = entry->vme_end = end;
5698 VME_OFFSET_SET(new_entry,
5699 VME_OFFSET(new_entry) + (end - entry->vme_start));
5700 assert(new_entry->vme_start < new_entry->vme_end);
5701
5702 _vm_map_store_entry_link(map_header, entry, new_entry);
5703
5704 if (entry->is_sub_map) {
5705 vm_map_reference(VME_SUBMAP(new_entry));
5706 } else {
5707 vm_object_reference(VME_OBJECT(new_entry));
5708 }
5709 }
5710
5711
5712 /*
5713 * VM_MAP_RANGE_CHECK: [ internal use only ]
5714 *
5715 * Asserts that the starting and ending region
5716 * addresses fall within the valid range of the map.
5717 */
5718 #define VM_MAP_RANGE_CHECK(map, start, end) \
5719 MACRO_BEGIN \
5720 if (start < vm_map_min(map)) \
5721 start = vm_map_min(map); \
5722 if (end > vm_map_max(map)) \
5723 end = vm_map_max(map); \
5724 if (start > end) \
5725 start = end; \
5726 MACRO_END
5727
5728 /*
5729 * vm_map_range_check: [ internal use only ]
5730 *
5731 * Check that the region defined by the specified start and
5732 * end addresses are wholly contained within a single map
5733 * entry or set of adjacent map entries of the spacified map,
5734 * i.e. the specified region contains no unmapped space.
5735 * If any or all of the region is unmapped, FALSE is returned.
5736 * Otherwise, TRUE is returned and if the output argument 'entry'
5737 * is not NULL it points to the map entry containing the start
5738 * of the region.
5739 *
5740 * The map is locked for reading on entry and is left locked.
5741 */
5742 static boolean_t
vm_map_range_check(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_map_entry_t * entry)5743 vm_map_range_check(
5744 vm_map_t map,
5745 vm_map_offset_t start,
5746 vm_map_offset_t end,
5747 vm_map_entry_t *entry)
5748 {
5749 vm_map_entry_t cur;
5750 vm_map_offset_t prev;
5751
5752 /*
5753 * Basic sanity checks first
5754 */
5755 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
5756 return FALSE;
5757 }
5758
5759 /*
5760 * Check first if the region starts within a valid
5761 * mapping for the map.
5762 */
5763 if (!vm_map_lookup_entry(map, start, &cur)) {
5764 return FALSE;
5765 }
5766
5767 /*
5768 * Optimize for the case that the region is contained
5769 * in a single map entry.
5770 */
5771 if (entry != (vm_map_entry_t *) NULL) {
5772 *entry = cur;
5773 }
5774 if (end <= cur->vme_end) {
5775 return TRUE;
5776 }
5777
5778 /*
5779 * If the region is not wholly contained within a
5780 * single entry, walk the entries looking for holes.
5781 */
5782 prev = cur->vme_end;
5783 cur = cur->vme_next;
5784 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
5785 if (end <= cur->vme_end) {
5786 return TRUE;
5787 }
5788 prev = cur->vme_end;
5789 cur = cur->vme_next;
5790 }
5791 return FALSE;
5792 }
5793
5794 /*
5795 * vm_map_protect:
5796 *
5797 * Sets the protection of the specified address
5798 * region in the target map. If "set_max" is
5799 * specified, the maximum protection is to be set;
5800 * otherwise, only the current protection is affected.
5801 */
5802 kern_return_t
vm_map_protect(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t new_prot,boolean_t set_max)5803 vm_map_protect(
5804 vm_map_t map,
5805 vm_map_offset_t start,
5806 vm_map_offset_t end,
5807 vm_prot_t new_prot,
5808 boolean_t set_max)
5809 {
5810 vm_map_entry_t current;
5811 vm_map_offset_t prev;
5812 vm_map_entry_t entry;
5813 vm_prot_t new_max;
5814 int pmap_options = 0;
5815 kern_return_t kr;
5816
5817 if (new_prot & VM_PROT_COPY) {
5818 vm_map_offset_t new_start;
5819 vm_prot_t cur_prot, max_prot;
5820 vm_map_kernel_flags_t kflags;
5821
5822 /* LP64todo - see below */
5823 if (start >= map->max_offset) {
5824 return KERN_INVALID_ADDRESS;
5825 }
5826
5827 if ((new_prot & VM_PROT_ALLEXEC) &&
5828 map->pmap != kernel_pmap &&
5829 (vm_map_cs_enforcement(map)
5830 #if XNU_TARGET_OS_OSX && __arm64__
5831 || !VM_MAP_IS_EXOTIC(map)
5832 #endif /* XNU_TARGET_OS_OSX && __arm64__ */
5833 ) &&
5834 VM_MAP_POLICY_WX_FAIL(map)) {
5835 DTRACE_VM3(cs_wx,
5836 uint64_t, (uint64_t) start,
5837 uint64_t, (uint64_t) end,
5838 vm_prot_t, new_prot);
5839 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5840 proc_selfpid(),
5841 (get_bsdtask_info(current_task())
5842 ? proc_name_address(get_bsdtask_info(current_task()))
5843 : "?"),
5844 __FUNCTION__);
5845 return KERN_PROTECTION_FAILURE;
5846 }
5847
5848 /*
5849 * Let vm_map_remap_extract() know that it will need to:
5850 * + make a copy of the mapping
5851 * + add VM_PROT_WRITE to the max protections
5852 * + remove any protections that are no longer allowed from the
5853 * max protections (to avoid any WRITE/EXECUTE conflict, for
5854 * example).
5855 * Note that "max_prot" is an IN/OUT parameter only for this
5856 * specific (VM_PROT_COPY) case. It's usually an OUT parameter
5857 * only.
5858 */
5859 max_prot = new_prot & (VM_PROT_ALL | VM_PROT_ALLEXEC);
5860 cur_prot = VM_PROT_NONE;
5861 kflags = VM_MAP_KERNEL_FLAGS_NONE;
5862 kflags.vmkf_remap_prot_copy = TRUE;
5863 new_start = start;
5864 kr = vm_map_remap(map,
5865 &new_start,
5866 end - start,
5867 0, /* mask */
5868 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
5869 kflags,
5870 0,
5871 map,
5872 start,
5873 TRUE, /* copy-on-write remapping! */
5874 &cur_prot, /* IN/OUT */
5875 &max_prot, /* IN/OUT */
5876 VM_INHERIT_DEFAULT);
5877 if (kr != KERN_SUCCESS) {
5878 return kr;
5879 }
5880 new_prot &= ~VM_PROT_COPY;
5881 }
5882
5883 vm_map_lock(map);
5884
5885 /* LP64todo - remove this check when vm_map_commpage64()
5886 * no longer has to stuff in a map_entry for the commpage
5887 * above the map's max_offset.
5888 */
5889 if (start >= map->max_offset) {
5890 vm_map_unlock(map);
5891 return KERN_INVALID_ADDRESS;
5892 }
5893
5894 while (1) {
5895 /*
5896 * Lookup the entry. If it doesn't start in a valid
5897 * entry, return an error.
5898 */
5899 if (!vm_map_lookup_entry(map, start, &entry)) {
5900 vm_map_unlock(map);
5901 return KERN_INVALID_ADDRESS;
5902 }
5903
5904 if (entry->superpage_size && (start & (SUPERPAGE_SIZE - 1))) { /* extend request to whole entry */
5905 start = SUPERPAGE_ROUND_DOWN(start);
5906 continue;
5907 }
5908 break;
5909 }
5910 if (entry->superpage_size) {
5911 end = SUPERPAGE_ROUND_UP(end);
5912 }
5913
5914 /*
5915 * Make a first pass to check for protection and address
5916 * violations.
5917 */
5918
5919 current = entry;
5920 prev = current->vme_start;
5921 while ((current != vm_map_to_entry(map)) &&
5922 (current->vme_start < end)) {
5923 /*
5924 * If there is a hole, return an error.
5925 */
5926 if (current->vme_start != prev) {
5927 vm_map_unlock(map);
5928 return KERN_INVALID_ADDRESS;
5929 }
5930
5931 new_max = current->max_protection;
5932
5933 #if defined(__x86_64__)
5934 /* Allow max mask to include execute prot bits if this map doesn't enforce CS */
5935 if (set_max && (new_prot & VM_PROT_ALLEXEC) && !vm_map_cs_enforcement(map)) {
5936 new_max = (new_max & ~VM_PROT_ALLEXEC) | (new_prot & VM_PROT_ALLEXEC);
5937 }
5938 #endif
5939 if ((new_prot & new_max) != new_prot) {
5940 vm_map_unlock(map);
5941 return KERN_PROTECTION_FAILURE;
5942 }
5943
5944 if (current->used_for_jit &&
5945 pmap_has_prot_policy(map->pmap, current->translated_allow_execute, current->protection)) {
5946 vm_map_unlock(map);
5947 return KERN_PROTECTION_FAILURE;
5948 }
5949
5950 #if __arm64e__
5951 /* Disallow remapping hw assisted TPRO mappings */
5952 if (current->used_for_tpro) {
5953 vm_map_unlock(map);
5954 return KERN_PROTECTION_FAILURE;
5955 }
5956 #endif /* __arm64e__ */
5957
5958
5959 if ((new_prot & VM_PROT_WRITE) &&
5960 (new_prot & VM_PROT_ALLEXEC) &&
5961 #if XNU_TARGET_OS_OSX
5962 map->pmap != kernel_pmap &&
5963 (vm_map_cs_enforcement(map)
5964 #if __arm64__
5965 || !VM_MAP_IS_EXOTIC(map)
5966 #endif /* __arm64__ */
5967 ) &&
5968 #endif /* XNU_TARGET_OS_OSX */
5969 !(current->used_for_jit)) {
5970 DTRACE_VM3(cs_wx,
5971 uint64_t, (uint64_t) current->vme_start,
5972 uint64_t, (uint64_t) current->vme_end,
5973 vm_prot_t, new_prot);
5974 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5975 proc_selfpid(),
5976 (get_bsdtask_info(current_task())
5977 ? proc_name_address(get_bsdtask_info(current_task()))
5978 : "?"),
5979 __FUNCTION__);
5980 new_prot &= ~VM_PROT_ALLEXEC;
5981 if (VM_MAP_POLICY_WX_FAIL(map)) {
5982 vm_map_unlock(map);
5983 return KERN_PROTECTION_FAILURE;
5984 }
5985 }
5986
5987 /*
5988 * If the task has requested executable lockdown,
5989 * deny both:
5990 * - adding executable protections OR
5991 * - adding write protections to an existing executable mapping.
5992 */
5993 if (map->map_disallow_new_exec == TRUE) {
5994 if ((new_prot & VM_PROT_ALLEXEC) ||
5995 ((current->protection & VM_PROT_EXECUTE) && (new_prot & VM_PROT_WRITE))) {
5996 vm_map_unlock(map);
5997 return KERN_PROTECTION_FAILURE;
5998 }
5999 }
6000
6001 prev = current->vme_end;
6002 current = current->vme_next;
6003 }
6004
6005 #if __arm64__
6006 if (end > prev &&
6007 end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
6008 vm_map_entry_t prev_entry;
6009
6010 prev_entry = current->vme_prev;
6011 if (prev_entry != vm_map_to_entry(map) &&
6012 !prev_entry->map_aligned &&
6013 (vm_map_round_page(prev_entry->vme_end,
6014 VM_MAP_PAGE_MASK(map))
6015 == end)) {
6016 /*
6017 * The last entry in our range is not "map-aligned"
6018 * but it would have reached all the way to "end"
6019 * if it had been map-aligned, so this is not really
6020 * a hole in the range and we can proceed.
6021 */
6022 prev = end;
6023 }
6024 }
6025 #endif /* __arm64__ */
6026
6027 if (end > prev) {
6028 vm_map_unlock(map);
6029 return KERN_INVALID_ADDRESS;
6030 }
6031
6032 /*
6033 * Go back and fix up protections.
6034 * Clip to start here if the range starts within
6035 * the entry.
6036 */
6037
6038 current = entry;
6039 if (current != vm_map_to_entry(map)) {
6040 /* clip and unnest if necessary */
6041 vm_map_clip_start(map, current, start);
6042 }
6043
6044 while ((current != vm_map_to_entry(map)) &&
6045 (current->vme_start < end)) {
6046 vm_prot_t old_prot;
6047
6048 vm_map_clip_end(map, current, end);
6049
6050 if (current->is_sub_map) {
6051 /* clipping did unnest if needed */
6052 assert(!current->use_pmap);
6053 }
6054
6055 old_prot = current->protection;
6056
6057 if (set_max) {
6058 current->max_protection = new_prot;
6059 /* Consider either EXECUTE or UEXEC as EXECUTE for this masking */
6060 current->protection = (new_prot & old_prot);
6061 } else {
6062 current->protection = new_prot;
6063 }
6064
6065 /*
6066 * Update physical map if necessary.
6067 * If the request is to turn off write protection,
6068 * we won't do it for real (in pmap). This is because
6069 * it would cause copy-on-write to fail. We've already
6070 * set, the new protection in the map, so if a
6071 * write-protect fault occurred, it will be fixed up
6072 * properly, COW or not.
6073 */
6074 if (current->protection != old_prot) {
6075 /* Look one level in we support nested pmaps */
6076 /* from mapped submaps which are direct entries */
6077 /* in our map */
6078
6079 vm_prot_t prot;
6080
6081 prot = current->protection;
6082 if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
6083 prot &= ~VM_PROT_WRITE;
6084 } else {
6085 assert(!VME_OBJECT(current)->code_signed);
6086 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
6087 if (prot & VM_PROT_WRITE) {
6088 /*
6089 * For write requests on the
6090 * compressor, we wil ask the
6091 * pmap layer to prevent us from
6092 * taking a write fault when we
6093 * attempt to access the mapping
6094 * next.
6095 */
6096 pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
6097 }
6098 }
6099
6100 if (override_nx(map, VME_ALIAS(current)) && prot) {
6101 prot |= VM_PROT_EXECUTE;
6102 }
6103
6104 #if DEVELOPMENT || DEBUG
6105 if (!(old_prot & VM_PROT_EXECUTE) &&
6106 (prot & VM_PROT_EXECUTE) &&
6107 panic_on_unsigned_execute &&
6108 (proc_selfcsflags() & CS_KILL)) {
6109 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
6110 }
6111 #endif /* DEVELOPMENT || DEBUG */
6112
6113 if (pmap_has_prot_policy(map->pmap, current->translated_allow_execute, prot)) {
6114 if (current->wired_count) {
6115 panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x",
6116 map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
6117 }
6118
6119 /* If the pmap layer cares about this
6120 * protection type, force a fault for
6121 * each page so that vm_fault will
6122 * repopulate the page with the full
6123 * set of protections.
6124 */
6125 /*
6126 * TODO: We don't seem to need this,
6127 * but this is due to an internal
6128 * implementation detail of
6129 * pmap_protect. Do we want to rely
6130 * on this?
6131 */
6132 prot = VM_PROT_NONE;
6133 }
6134
6135 if (current->is_sub_map && current->use_pmap) {
6136 pmap_protect(VME_SUBMAP(current)->pmap,
6137 current->vme_start,
6138 current->vme_end,
6139 prot);
6140 } else {
6141 pmap_protect_options(map->pmap,
6142 current->vme_start,
6143 current->vme_end,
6144 prot,
6145 pmap_options,
6146 NULL);
6147 }
6148 }
6149 current = current->vme_next;
6150 }
6151
6152 current = entry;
6153 while ((current != vm_map_to_entry(map)) &&
6154 (current->vme_start <= end)) {
6155 vm_map_simplify_entry(map, current);
6156 current = current->vme_next;
6157 }
6158
6159 vm_map_unlock(map);
6160 return KERN_SUCCESS;
6161 }
6162
6163 /*
6164 * vm_map_inherit:
6165 *
6166 * Sets the inheritance of the specified address
6167 * range in the target map. Inheritance
6168 * affects how the map will be shared with
6169 * child maps at the time of vm_map_fork.
6170 */
6171 kern_return_t
vm_map_inherit(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_inherit_t new_inheritance)6172 vm_map_inherit(
6173 vm_map_t map,
6174 vm_map_offset_t start,
6175 vm_map_offset_t end,
6176 vm_inherit_t new_inheritance)
6177 {
6178 vm_map_entry_t entry;
6179 vm_map_entry_t temp_entry;
6180
6181 vm_map_lock(map);
6182
6183 VM_MAP_RANGE_CHECK(map, start, end);
6184
6185 if (vm_map_lookup_entry(map, start, &temp_entry)) {
6186 entry = temp_entry;
6187 } else {
6188 temp_entry = temp_entry->vme_next;
6189 entry = temp_entry;
6190 }
6191
6192 /* first check entire range for submaps which can't support the */
6193 /* given inheritance. */
6194 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6195 if (entry->is_sub_map) {
6196 if (new_inheritance == VM_INHERIT_COPY) {
6197 vm_map_unlock(map);
6198 return KERN_INVALID_ARGUMENT;
6199 }
6200 }
6201
6202 entry = entry->vme_next;
6203 }
6204
6205 entry = temp_entry;
6206 if (entry != vm_map_to_entry(map)) {
6207 /* clip and unnest if necessary */
6208 vm_map_clip_start(map, entry, start);
6209 }
6210
6211 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6212 vm_map_clip_end(map, entry, end);
6213 if (entry->is_sub_map) {
6214 /* clip did unnest if needed */
6215 assert(!entry->use_pmap);
6216 }
6217
6218 entry->inheritance = new_inheritance;
6219
6220 entry = entry->vme_next;
6221 }
6222
6223 vm_map_unlock(map);
6224 return KERN_SUCCESS;
6225 }
6226
6227 /*
6228 * Update the accounting for the amount of wired memory in this map. If the user has
6229 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
6230 */
6231
6232 static kern_return_t
add_wire_counts(vm_map_t map,vm_map_entry_t entry,boolean_t user_wire)6233 add_wire_counts(
6234 vm_map_t map,
6235 vm_map_entry_t entry,
6236 boolean_t user_wire)
6237 {
6238 vm_map_size_t size;
6239
6240 if (user_wire) {
6241 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
6242
6243 /*
6244 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
6245 * this map entry.
6246 */
6247
6248 if (entry->user_wired_count == 0) {
6249 size = entry->vme_end - entry->vme_start;
6250
6251 /*
6252 * Since this is the first time the user is wiring this map entry, check to see if we're
6253 * exceeding the user wire limits. There is a per map limit which is the smaller of either
6254 * the process's rlimit or the global vm_per_task_user_wire_limit which caps this value. There is also
6255 * a system-wide limit on the amount of memory all users can wire. If the user is over either
6256 * limit, then we fail.
6257 */
6258
6259 if (size + map->user_wire_size > MIN(map->user_wire_limit, vm_per_task_user_wire_limit) ||
6260 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6261 if (size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6262 #if DEVELOPMENT || DEBUG
6263 if (panic_on_mlock_failure) {
6264 panic("mlock: Over global wire limit. %llu bytes wired and requested to wire %llu bytes more", ptoa_64(total_wire_count), (uint64_t) size);
6265 }
6266 #endif /* DEVELOPMENT || DEBUG */
6267 os_atomic_inc(&vm_add_wire_count_over_global_limit, relaxed);
6268 } else {
6269 os_atomic_inc(&vm_add_wire_count_over_user_limit, relaxed);
6270 #if DEVELOPMENT || DEBUG
6271 if (panic_on_mlock_failure) {
6272 panic("mlock: Over process wire limit. %llu bytes wired and requested to wire %llu bytes more", (uint64_t) map->user_wire_size, (uint64_t) size);
6273 }
6274 #endif /* DEVELOPMENT || DEBUG */
6275 }
6276 return KERN_RESOURCE_SHORTAGE;
6277 }
6278
6279 /*
6280 * The first time the user wires an entry, we also increment the wired_count and add this to
6281 * the total that has been wired in the map.
6282 */
6283
6284 if (entry->wired_count >= MAX_WIRE_COUNT) {
6285 return KERN_FAILURE;
6286 }
6287
6288 entry->wired_count++;
6289 map->user_wire_size += size;
6290 }
6291
6292 if (entry->user_wired_count >= MAX_WIRE_COUNT) {
6293 return KERN_FAILURE;
6294 }
6295
6296 entry->user_wired_count++;
6297 } else {
6298 /*
6299 * The kernel's wiring the memory. Just bump the count and continue.
6300 */
6301
6302 if (entry->wired_count >= MAX_WIRE_COUNT) {
6303 panic("vm_map_wire: too many wirings");
6304 }
6305
6306 entry->wired_count++;
6307 }
6308
6309 return KERN_SUCCESS;
6310 }
6311
6312 /*
6313 * Update the memory wiring accounting now that the given map entry is being unwired.
6314 */
6315
6316 static void
subtract_wire_counts(vm_map_t map,vm_map_entry_t entry,boolean_t user_wire)6317 subtract_wire_counts(
6318 vm_map_t map,
6319 vm_map_entry_t entry,
6320 boolean_t user_wire)
6321 {
6322 if (user_wire) {
6323 /*
6324 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
6325 */
6326
6327 if (entry->user_wired_count == 1) {
6328 /*
6329 * We're removing the last user wire reference. Decrement the wired_count and the total
6330 * user wired memory for this map.
6331 */
6332
6333 assert(entry->wired_count >= 1);
6334 entry->wired_count--;
6335 map->user_wire_size -= entry->vme_end - entry->vme_start;
6336 }
6337
6338 assert(entry->user_wired_count >= 1);
6339 entry->user_wired_count--;
6340 } else {
6341 /*
6342 * The kernel is unwiring the memory. Just update the count.
6343 */
6344
6345 assert(entry->wired_count >= 1);
6346 entry->wired_count--;
6347 }
6348 }
6349
6350 int cs_executable_wire = 0;
6351
6352 /*
6353 * vm_map_wire:
6354 *
6355 * Sets the pageability of the specified address range in the
6356 * target map as wired. Regions specified as not pageable require
6357 * locked-down physical memory and physical page maps. The
6358 * access_type variable indicates types of accesses that must not
6359 * generate page faults. This is checked against protection of
6360 * memory being locked-down.
6361 *
6362 * The map must not be locked, but a reference must remain to the
6363 * map throughout the call.
6364 */
6365 static kern_return_t
vm_map_wire_nested(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t caller_prot,vm_tag_t tag,boolean_t user_wire,pmap_t map_pmap,vm_map_offset_t pmap_addr,ppnum_t * physpage_p)6366 vm_map_wire_nested(
6367 vm_map_t map,
6368 vm_map_offset_t start,
6369 vm_map_offset_t end,
6370 vm_prot_t caller_prot,
6371 vm_tag_t tag,
6372 boolean_t user_wire,
6373 pmap_t map_pmap,
6374 vm_map_offset_t pmap_addr,
6375 ppnum_t *physpage_p)
6376 {
6377 vm_map_entry_t entry;
6378 vm_prot_t access_type;
6379 struct vm_map_entry *first_entry, tmp_entry;
6380 vm_map_t real_map;
6381 vm_map_offset_t s, e;
6382 kern_return_t rc;
6383 boolean_t need_wakeup;
6384 boolean_t main_map = FALSE;
6385 wait_interrupt_t interruptible_state;
6386 thread_t cur_thread;
6387 unsigned int last_timestamp;
6388 vm_map_size_t size;
6389 boolean_t wire_and_extract;
6390 vm_prot_t extra_prots;
6391
6392 extra_prots = VM_PROT_COPY;
6393 extra_prots |= VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6394 #if XNU_TARGET_OS_OSX
6395 if (map->pmap == kernel_pmap ||
6396 !vm_map_cs_enforcement(map)) {
6397 extra_prots &= ~VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6398 }
6399 #endif /* XNU_TARGET_OS_OSX */
6400
6401 access_type = (caller_prot & (VM_PROT_ALL | VM_PROT_ALLEXEC));
6402
6403 wire_and_extract = FALSE;
6404 if (physpage_p != NULL) {
6405 /*
6406 * The caller wants the physical page number of the
6407 * wired page. We return only one physical page number
6408 * so this works for only one page at a time.
6409 */
6410 if ((end - start) != PAGE_SIZE) {
6411 return KERN_INVALID_ARGUMENT;
6412 }
6413 wire_and_extract = TRUE;
6414 *physpage_p = 0;
6415 }
6416
6417 vm_map_lock(map);
6418 if (map_pmap == NULL) {
6419 main_map = TRUE;
6420 }
6421 last_timestamp = map->timestamp;
6422
6423 VM_MAP_RANGE_CHECK(map, start, end);
6424 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6425 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
6426
6427 if (start == end) {
6428 /* We wired what the caller asked for, zero pages */
6429 vm_map_unlock(map);
6430 return KERN_SUCCESS;
6431 }
6432
6433 need_wakeup = FALSE;
6434 cur_thread = current_thread();
6435
6436 s = start;
6437 rc = KERN_SUCCESS;
6438
6439 if (vm_map_lookup_entry(map, s, &first_entry)) {
6440 entry = first_entry;
6441 /*
6442 * vm_map_clip_start will be done later.
6443 * We don't want to unnest any nested submaps here !
6444 */
6445 } else {
6446 /* Start address is not in map */
6447 rc = KERN_INVALID_ADDRESS;
6448 goto done;
6449 }
6450
6451 while ((entry != vm_map_to_entry(map)) && (s < end)) {
6452 /*
6453 * At this point, we have wired from "start" to "s".
6454 * We still need to wire from "s" to "end".
6455 *
6456 * "entry" hasn't been clipped, so it could start before "s"
6457 * and/or end after "end".
6458 */
6459
6460 /* "e" is how far we want to wire in this entry */
6461 e = entry->vme_end;
6462 if (e > end) {
6463 e = end;
6464 }
6465
6466 /*
6467 * If another thread is wiring/unwiring this entry then
6468 * block after informing other thread to wake us up.
6469 */
6470 if (entry->in_transition) {
6471 wait_result_t wait_result;
6472
6473 /*
6474 * We have not clipped the entry. Make sure that
6475 * the start address is in range so that the lookup
6476 * below will succeed.
6477 * "s" is the current starting point: we've already
6478 * wired from "start" to "s" and we still have
6479 * to wire from "s" to "end".
6480 */
6481
6482 entry->needs_wakeup = TRUE;
6483
6484 /*
6485 * wake up anybody waiting on entries that we have
6486 * already wired.
6487 */
6488 if (need_wakeup) {
6489 vm_map_entry_wakeup(map);
6490 need_wakeup = FALSE;
6491 }
6492 /*
6493 * User wiring is interruptible
6494 */
6495 wait_result = vm_map_entry_wait(map,
6496 (user_wire) ? THREAD_ABORTSAFE :
6497 THREAD_UNINT);
6498 if (user_wire && wait_result == THREAD_INTERRUPTED) {
6499 /*
6500 * undo the wirings we have done so far
6501 * We do not clear the needs_wakeup flag,
6502 * because we cannot tell if we were the
6503 * only one waiting.
6504 */
6505 rc = KERN_FAILURE;
6506 goto done;
6507 }
6508
6509 /*
6510 * Cannot avoid a lookup here. reset timestamp.
6511 */
6512 last_timestamp = map->timestamp;
6513
6514 /*
6515 * The entry could have been clipped, look it up again.
6516 * Worse that can happen is, it may not exist anymore.
6517 */
6518 if (!vm_map_lookup_entry(map, s, &first_entry)) {
6519 /*
6520 * User: undo everything upto the previous
6521 * entry. let vm_map_unwire worry about
6522 * checking the validity of the range.
6523 */
6524 rc = KERN_FAILURE;
6525 goto done;
6526 }
6527 entry = first_entry;
6528 continue;
6529 }
6530
6531 if (entry->is_sub_map) {
6532 vm_map_offset_t sub_start;
6533 vm_map_offset_t sub_end;
6534 vm_map_offset_t local_start;
6535 vm_map_offset_t local_end;
6536 pmap_t pmap;
6537
6538 if (wire_and_extract) {
6539 /*
6540 * Wiring would result in copy-on-write
6541 * which would not be compatible with
6542 * the sharing we have with the original
6543 * provider of this memory.
6544 */
6545 rc = KERN_INVALID_ARGUMENT;
6546 goto done;
6547 }
6548
6549 vm_map_clip_start(map, entry, s);
6550 vm_map_clip_end(map, entry, end);
6551
6552 sub_start = VME_OFFSET(entry);
6553 sub_end = entry->vme_end;
6554 sub_end += VME_OFFSET(entry) - entry->vme_start;
6555
6556 local_end = entry->vme_end;
6557 if (map_pmap == NULL) {
6558 vm_object_t object;
6559 vm_object_offset_t offset;
6560 vm_prot_t prot;
6561 boolean_t wired;
6562 vm_map_entry_t local_entry;
6563 vm_map_version_t version;
6564 vm_map_t lookup_map;
6565
6566 if (entry->use_pmap) {
6567 pmap = VME_SUBMAP(entry)->pmap;
6568 /* ppc implementation requires that */
6569 /* submaps pmap address ranges line */
6570 /* up with parent map */
6571 #ifdef notdef
6572 pmap_addr = sub_start;
6573 #endif
6574 pmap_addr = s;
6575 } else {
6576 pmap = map->pmap;
6577 pmap_addr = s;
6578 }
6579
6580 if (entry->wired_count) {
6581 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6582 goto done;
6583 }
6584
6585 /*
6586 * The map was not unlocked:
6587 * no need to goto re-lookup.
6588 * Just go directly to next entry.
6589 */
6590 entry = entry->vme_next;
6591 s = entry->vme_start;
6592 continue;
6593 }
6594
6595 /* call vm_map_lookup_and_lock_object to */
6596 /* cause any needs copy to be */
6597 /* evaluated */
6598 local_start = entry->vme_start;
6599 lookup_map = map;
6600 vm_map_lock_write_to_read(map);
6601 rc = vm_map_lookup_and_lock_object(
6602 &lookup_map, local_start,
6603 (access_type | extra_prots),
6604 OBJECT_LOCK_EXCLUSIVE,
6605 &version, &object,
6606 &offset, &prot, &wired,
6607 NULL,
6608 &real_map, NULL);
6609 if (rc != KERN_SUCCESS) {
6610 vm_map_unlock_read(lookup_map);
6611 assert(map_pmap == NULL);
6612 vm_map_unwire(map, start,
6613 s, user_wire);
6614 return rc;
6615 }
6616 vm_object_unlock(object);
6617 if (real_map != lookup_map) {
6618 vm_map_unlock(real_map);
6619 }
6620 vm_map_unlock_read(lookup_map);
6621 vm_map_lock(map);
6622
6623 /* we unlocked, so must re-lookup */
6624 if (!vm_map_lookup_entry(map,
6625 local_start,
6626 &local_entry)) {
6627 rc = KERN_FAILURE;
6628 goto done;
6629 }
6630
6631 /*
6632 * entry could have been "simplified",
6633 * so re-clip
6634 */
6635 entry = local_entry;
6636 assert(s == local_start);
6637 vm_map_clip_start(map, entry, s);
6638 vm_map_clip_end(map, entry, end);
6639 /* re-compute "e" */
6640 e = entry->vme_end;
6641 if (e > end) {
6642 e = end;
6643 }
6644
6645 /* did we have a change of type? */
6646 if (!entry->is_sub_map) {
6647 last_timestamp = map->timestamp;
6648 continue;
6649 }
6650 } else {
6651 local_start = entry->vme_start;
6652 pmap = map_pmap;
6653 }
6654
6655 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6656 goto done;
6657 }
6658
6659 entry->in_transition = TRUE;
6660
6661 vm_map_unlock(map);
6662 rc = vm_map_wire_nested(VME_SUBMAP(entry),
6663 sub_start, sub_end,
6664 caller_prot, tag,
6665 user_wire, pmap, pmap_addr,
6666 NULL);
6667 vm_map_lock(map);
6668
6669 /*
6670 * Find the entry again. It could have been clipped
6671 * after we unlocked the map.
6672 */
6673 if (!vm_map_lookup_entry(map, local_start,
6674 &first_entry)) {
6675 panic("vm_map_wire: re-lookup failed");
6676 }
6677 entry = first_entry;
6678
6679 assert(local_start == s);
6680 /* re-compute "e" */
6681 e = entry->vme_end;
6682 if (e > end) {
6683 e = end;
6684 }
6685
6686 last_timestamp = map->timestamp;
6687 while ((entry != vm_map_to_entry(map)) &&
6688 (entry->vme_start < e)) {
6689 assert(entry->in_transition);
6690 entry->in_transition = FALSE;
6691 if (entry->needs_wakeup) {
6692 entry->needs_wakeup = FALSE;
6693 need_wakeup = TRUE;
6694 }
6695 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
6696 subtract_wire_counts(map, entry, user_wire);
6697 }
6698 entry = entry->vme_next;
6699 }
6700 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
6701 goto done;
6702 }
6703
6704 /* no need to relookup again */
6705 s = entry->vme_start;
6706 continue;
6707 }
6708
6709 /*
6710 * If this entry is already wired then increment
6711 * the appropriate wire reference count.
6712 */
6713 if (entry->wired_count) {
6714 if ((entry->protection & access_type) != access_type) {
6715 /* found a protection problem */
6716
6717 /*
6718 * XXX FBDP
6719 * We should always return an error
6720 * in this case but since we didn't
6721 * enforce it before, let's do
6722 * it only for the new "wire_and_extract"
6723 * code path for now...
6724 */
6725 if (wire_and_extract) {
6726 rc = KERN_PROTECTION_FAILURE;
6727 goto done;
6728 }
6729 }
6730
6731 /*
6732 * entry is already wired down, get our reference
6733 * after clipping to our range.
6734 */
6735 vm_map_clip_start(map, entry, s);
6736 vm_map_clip_end(map, entry, end);
6737
6738 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6739 goto done;
6740 }
6741
6742 if (wire_and_extract) {
6743 vm_object_t object;
6744 vm_object_offset_t offset;
6745 vm_page_t m;
6746
6747 /*
6748 * We don't have to "wire" the page again
6749 * bit we still have to "extract" its
6750 * physical page number, after some sanity
6751 * checks.
6752 */
6753 assert((entry->vme_end - entry->vme_start)
6754 == PAGE_SIZE);
6755 assert(!entry->needs_copy);
6756 assert(!entry->is_sub_map);
6757 assert(VME_OBJECT(entry));
6758 if (((entry->vme_end - entry->vme_start)
6759 != PAGE_SIZE) ||
6760 entry->needs_copy ||
6761 entry->is_sub_map ||
6762 VME_OBJECT(entry) == VM_OBJECT_NULL) {
6763 rc = KERN_INVALID_ARGUMENT;
6764 goto done;
6765 }
6766
6767 object = VME_OBJECT(entry);
6768 offset = VME_OFFSET(entry);
6769 /* need exclusive lock to update m->dirty */
6770 if (entry->protection & VM_PROT_WRITE) {
6771 vm_object_lock(object);
6772 } else {
6773 vm_object_lock_shared(object);
6774 }
6775 m = vm_page_lookup(object, offset);
6776 assert(m != VM_PAGE_NULL);
6777 assert(VM_PAGE_WIRED(m));
6778 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6779 *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
6780 if (entry->protection & VM_PROT_WRITE) {
6781 vm_object_lock_assert_exclusive(
6782 object);
6783 m->vmp_dirty = TRUE;
6784 }
6785 } else {
6786 /* not already wired !? */
6787 *physpage_p = 0;
6788 }
6789 vm_object_unlock(object);
6790 }
6791
6792 /* map was not unlocked: no need to relookup */
6793 entry = entry->vme_next;
6794 s = entry->vme_start;
6795 continue;
6796 }
6797
6798 /*
6799 * Unwired entry or wire request transmitted via submap
6800 */
6801
6802 /*
6803 * Wiring would copy the pages to the shadow object.
6804 * The shadow object would not be code-signed so
6805 * attempting to execute code from these copied pages
6806 * would trigger a code-signing violation.
6807 */
6808
6809 if ((entry->protection & VM_PROT_EXECUTE)
6810 #if XNU_TARGET_OS_OSX
6811 &&
6812 map->pmap != kernel_pmap &&
6813 (vm_map_cs_enforcement(map)
6814 #if __arm64__
6815 || !VM_MAP_IS_EXOTIC(map)
6816 #endif /* __arm64__ */
6817 )
6818 #endif /* XNU_TARGET_OS_OSX */
6819 ) {
6820 #if MACH_ASSERT
6821 printf("pid %d[%s] wiring executable range from "
6822 "0x%llx to 0x%llx: rejected to preserve "
6823 "code-signing\n",
6824 proc_selfpid(),
6825 (get_bsdtask_info(current_task())
6826 ? proc_name_address(get_bsdtask_info(current_task()))
6827 : "?"),
6828 (uint64_t) entry->vme_start,
6829 (uint64_t) entry->vme_end);
6830 #endif /* MACH_ASSERT */
6831 DTRACE_VM2(cs_executable_wire,
6832 uint64_t, (uint64_t)entry->vme_start,
6833 uint64_t, (uint64_t)entry->vme_end);
6834 cs_executable_wire++;
6835 rc = KERN_PROTECTION_FAILURE;
6836 goto done;
6837 }
6838
6839 /*
6840 * Perform actions of vm_map_lookup that need the write
6841 * lock on the map: create a shadow object for a
6842 * copy-on-write region, or an object for a zero-fill
6843 * region.
6844 */
6845 size = entry->vme_end - entry->vme_start;
6846 /*
6847 * If wiring a copy-on-write page, we need to copy it now
6848 * even if we're only (currently) requesting read access.
6849 * This is aggressive, but once it's wired we can't move it.
6850 */
6851 if (entry->needs_copy) {
6852 if (wire_and_extract) {
6853 /*
6854 * We're supposed to share with the original
6855 * provider so should not be "needs_copy"
6856 */
6857 rc = KERN_INVALID_ARGUMENT;
6858 goto done;
6859 }
6860
6861 VME_OBJECT_SHADOW(entry, size,
6862 vm_map_always_shadow(map));
6863 entry->needs_copy = FALSE;
6864 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
6865 if (wire_and_extract) {
6866 /*
6867 * We're supposed to share with the original
6868 * provider so should already have an object.
6869 */
6870 rc = KERN_INVALID_ARGUMENT;
6871 goto done;
6872 }
6873 VME_OBJECT_SET(entry, vm_object_allocate(size), false, 0);
6874 VME_OFFSET_SET(entry, (vm_object_offset_t)0);
6875 assert(entry->use_pmap);
6876 } else if (VME_OBJECT(entry)->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
6877 if (wire_and_extract) {
6878 /*
6879 * We're supposed to share with the original
6880 * provider so should not be COPY_SYMMETRIC.
6881 */
6882 rc = KERN_INVALID_ARGUMENT;
6883 goto done;
6884 }
6885 /*
6886 * Force an unrequested "copy-on-write" but only for
6887 * the range we're wiring.
6888 */
6889 // printf("FBDP %s:%d map %p entry %p [ 0x%llx 0x%llx ] s 0x%llx end 0x%llx wire&extract=%d\n", __FUNCTION__, __LINE__, map, entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, (uint64_t)s, (uint64_t)end, wire_and_extract);
6890 vm_map_clip_start(map, entry, s);
6891 vm_map_clip_end(map, entry, end);
6892 /* recompute "size" */
6893 size = entry->vme_end - entry->vme_start;
6894 /* make a shadow object */
6895 vm_object_t orig_object;
6896 vm_object_offset_t orig_offset;
6897 orig_object = VME_OBJECT(entry);
6898 orig_offset = VME_OFFSET(entry);
6899 VME_OBJECT_SHADOW(entry, size, vm_map_always_shadow(map));
6900 if (VME_OBJECT(entry) != orig_object) {
6901 /*
6902 * This mapping has not been shared (or it would be
6903 * COPY_DELAY instead of COPY_SYMMETRIC) and it has
6904 * not been copied-on-write (or it would be marked
6905 * as "needs_copy" and would have been handled above
6906 * and also already write-protected).
6907 * We still need to write-protect here to prevent
6908 * other threads from modifying these pages while
6909 * we're in the process of copying and wiring
6910 * the copied pages.
6911 * Since the mapping is neither shared nor COWed,
6912 * we only need to write-protect the PTEs for this
6913 * mapping.
6914 */
6915 vm_object_pmap_protect(orig_object,
6916 orig_offset,
6917 size,
6918 map->pmap,
6919 VM_MAP_PAGE_SIZE(map),
6920 entry->vme_start,
6921 entry->protection & ~VM_PROT_WRITE);
6922 }
6923 }
6924 if (VME_OBJECT(entry)->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
6925 /*
6926 * Make the object COPY_DELAY to get a stable object
6927 * to wire.
6928 * That should avoid creating long shadow chains while
6929 * wiring/unwiring the same range repeatedly.
6930 * That also prevents part of the object from being
6931 * wired while another part is "needs_copy", which
6932 * could result in conflicting rules wrt copy-on-write.
6933 */
6934 vm_object_t object;
6935
6936 object = VME_OBJECT(entry);
6937 vm_object_lock(object);
6938 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
6939 assertf(vm_object_round_page(VME_OFFSET(entry) + size) - vm_object_trunc_page(VME_OFFSET(entry)) == object->vo_size,
6940 "object %p size 0x%llx entry %p [0x%llx:0x%llx:0x%llx] size 0x%llx\n",
6941 object, (uint64_t)object->vo_size,
6942 entry,
6943 (uint64_t)entry->vme_start,
6944 (uint64_t)entry->vme_end,
6945 (uint64_t)VME_OFFSET(entry),
6946 (uint64_t)size);
6947 assertf(object->ref_count == 1,
6948 "object %p ref_count %d\n",
6949 object, object->ref_count);
6950 assertf(!entry->needs_copy,
6951 "entry %p\n", entry);
6952 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
6953 object->true_share = TRUE;
6954 }
6955 vm_object_unlock(object);
6956 }
6957
6958 vm_map_clip_start(map, entry, s);
6959 vm_map_clip_end(map, entry, end);
6960
6961 /* re-compute "e" */
6962 e = entry->vme_end;
6963 if (e > end) {
6964 e = end;
6965 }
6966
6967 /*
6968 * Check for holes and protection mismatch.
6969 * Holes: Next entry should be contiguous unless this
6970 * is the end of the region.
6971 * Protection: Access requested must be allowed, unless
6972 * wiring is by protection class
6973 */
6974 if ((entry->vme_end < end) &&
6975 ((entry->vme_next == vm_map_to_entry(map)) ||
6976 (entry->vme_next->vme_start > entry->vme_end))) {
6977 /* found a hole */
6978 rc = KERN_INVALID_ADDRESS;
6979 goto done;
6980 }
6981 if ((entry->protection & access_type) != access_type) {
6982 /* found a protection problem */
6983 rc = KERN_PROTECTION_FAILURE;
6984 goto done;
6985 }
6986
6987 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
6988
6989 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6990 goto done;
6991 }
6992
6993 entry->in_transition = TRUE;
6994
6995 /*
6996 * This entry might get split once we unlock the map.
6997 * In vm_fault_wire(), we need the current range as
6998 * defined by this entry. In order for this to work
6999 * along with a simultaneous clip operation, we make a
7000 * temporary copy of this entry and use that for the
7001 * wiring. Note that the underlying objects do not
7002 * change during a clip.
7003 */
7004 tmp_entry = *entry;
7005
7006 /*
7007 * The in_transition state guarentees that the entry
7008 * (or entries for this range, if split occured) will be
7009 * there when the map lock is acquired for the second time.
7010 */
7011 vm_map_unlock(map);
7012
7013 if (!user_wire && cur_thread != THREAD_NULL) {
7014 interruptible_state = thread_interrupt_level(THREAD_UNINT);
7015 } else {
7016 interruptible_state = THREAD_UNINT;
7017 }
7018
7019 if (map_pmap) {
7020 rc = vm_fault_wire(map,
7021 &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
7022 physpage_p);
7023 } else {
7024 rc = vm_fault_wire(map,
7025 &tmp_entry, caller_prot, tag, map->pmap,
7026 tmp_entry.vme_start,
7027 physpage_p);
7028 }
7029
7030 if (!user_wire && cur_thread != THREAD_NULL) {
7031 thread_interrupt_level(interruptible_state);
7032 }
7033
7034 vm_map_lock(map);
7035
7036 if (last_timestamp + 1 != map->timestamp) {
7037 /*
7038 * Find the entry again. It could have been clipped
7039 * after we unlocked the map.
7040 */
7041 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7042 &first_entry)) {
7043 panic("vm_map_wire: re-lookup failed");
7044 }
7045
7046 entry = first_entry;
7047 }
7048
7049 last_timestamp = map->timestamp;
7050
7051 while ((entry != vm_map_to_entry(map)) &&
7052 (entry->vme_start < tmp_entry.vme_end)) {
7053 assert(entry->in_transition);
7054 entry->in_transition = FALSE;
7055 if (entry->needs_wakeup) {
7056 entry->needs_wakeup = FALSE;
7057 need_wakeup = TRUE;
7058 }
7059 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
7060 subtract_wire_counts(map, entry, user_wire);
7061 }
7062 entry = entry->vme_next;
7063 }
7064
7065 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
7066 goto done;
7067 }
7068
7069 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
7070 (tmp_entry.vme_end != end) && /* AND, we are not at the end of the requested range */
7071 (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
7072 /* found a "new" hole */
7073 s = tmp_entry.vme_end;
7074 rc = KERN_INVALID_ADDRESS;
7075 goto done;
7076 }
7077
7078 s = entry->vme_start;
7079 } /* end while loop through map entries */
7080
7081 done:
7082 if (rc == KERN_SUCCESS) {
7083 /* repair any damage we may have made to the VM map */
7084 vm_map_simplify_range(map, start, end);
7085 }
7086
7087 vm_map_unlock(map);
7088
7089 /*
7090 * wake up anybody waiting on entries we wired.
7091 */
7092 if (need_wakeup) {
7093 vm_map_entry_wakeup(map);
7094 }
7095
7096 if (rc != KERN_SUCCESS) {
7097 /* undo what has been wired so far */
7098 vm_map_unwire_nested(map, start, s, user_wire,
7099 map_pmap, pmap_addr);
7100 if (physpage_p) {
7101 *physpage_p = 0;
7102 }
7103 }
7104
7105 return rc;
7106 }
7107
7108 kern_return_t
vm_map_wire_external(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t caller_prot,boolean_t user_wire)7109 vm_map_wire_external(
7110 vm_map_t map,
7111 vm_map_offset_t start,
7112 vm_map_offset_t end,
7113 vm_prot_t caller_prot,
7114 boolean_t user_wire)
7115 {
7116 kern_return_t kret;
7117
7118 kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
7119 user_wire, (pmap_t)NULL, 0, NULL);
7120 return kret;
7121 }
7122
7123 kern_return_t
vm_map_wire_kernel(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t caller_prot,vm_tag_t tag,boolean_t user_wire)7124 vm_map_wire_kernel(
7125 vm_map_t map,
7126 vm_map_offset_t start,
7127 vm_map_offset_t end,
7128 vm_prot_t caller_prot,
7129 vm_tag_t tag,
7130 boolean_t user_wire)
7131 {
7132 kern_return_t kret;
7133
7134 kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
7135 user_wire, (pmap_t)NULL, 0, NULL);
7136 return kret;
7137 }
7138
7139 kern_return_t
vm_map_wire_and_extract_external(vm_map_t map,vm_map_offset_t start,vm_prot_t caller_prot,boolean_t user_wire,ppnum_t * physpage_p)7140 vm_map_wire_and_extract_external(
7141 vm_map_t map,
7142 vm_map_offset_t start,
7143 vm_prot_t caller_prot,
7144 boolean_t user_wire,
7145 ppnum_t *physpage_p)
7146 {
7147 kern_return_t kret;
7148
7149 kret = vm_map_wire_nested(map,
7150 start,
7151 start + VM_MAP_PAGE_SIZE(map),
7152 caller_prot,
7153 vm_tag_bt(),
7154 user_wire,
7155 (pmap_t)NULL,
7156 0,
7157 physpage_p);
7158 if (kret != KERN_SUCCESS &&
7159 physpage_p != NULL) {
7160 *physpage_p = 0;
7161 }
7162 return kret;
7163 }
7164
7165 kern_return_t
vm_map_wire_and_extract_kernel(vm_map_t map,vm_map_offset_t start,vm_prot_t caller_prot,vm_tag_t tag,boolean_t user_wire,ppnum_t * physpage_p)7166 vm_map_wire_and_extract_kernel(
7167 vm_map_t map,
7168 vm_map_offset_t start,
7169 vm_prot_t caller_prot,
7170 vm_tag_t tag,
7171 boolean_t user_wire,
7172 ppnum_t *physpage_p)
7173 {
7174 kern_return_t kret;
7175
7176 kret = vm_map_wire_nested(map,
7177 start,
7178 start + VM_MAP_PAGE_SIZE(map),
7179 caller_prot,
7180 tag,
7181 user_wire,
7182 (pmap_t)NULL,
7183 0,
7184 physpage_p);
7185 if (kret != KERN_SUCCESS &&
7186 physpage_p != NULL) {
7187 *physpage_p = 0;
7188 }
7189 return kret;
7190 }
7191
7192 /*
7193 * vm_map_unwire:
7194 *
7195 * Sets the pageability of the specified address range in the target
7196 * as pageable. Regions specified must have been wired previously.
7197 *
7198 * The map must not be locked, but a reference must remain to the map
7199 * throughout the call.
7200 *
7201 * Kernel will panic on failures. User unwire ignores holes and
7202 * unwired and intransition entries to avoid losing memory by leaving
7203 * it unwired.
7204 */
7205 static kern_return_t
vm_map_unwire_nested(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,boolean_t user_wire,pmap_t map_pmap,vm_map_offset_t pmap_addr)7206 vm_map_unwire_nested(
7207 vm_map_t map,
7208 vm_map_offset_t start,
7209 vm_map_offset_t end,
7210 boolean_t user_wire,
7211 pmap_t map_pmap,
7212 vm_map_offset_t pmap_addr)
7213 {
7214 vm_map_entry_t entry;
7215 struct vm_map_entry *first_entry, tmp_entry;
7216 boolean_t need_wakeup;
7217 boolean_t main_map = FALSE;
7218 unsigned int last_timestamp;
7219
7220 vm_map_lock(map);
7221 if (map_pmap == NULL) {
7222 main_map = TRUE;
7223 }
7224 last_timestamp = map->timestamp;
7225
7226 VM_MAP_RANGE_CHECK(map, start, end);
7227 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
7228 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
7229
7230 if (start == end) {
7231 /* We unwired what the caller asked for: zero pages */
7232 vm_map_unlock(map);
7233 return KERN_SUCCESS;
7234 }
7235
7236 if (vm_map_lookup_entry(map, start, &first_entry)) {
7237 entry = first_entry;
7238 /*
7239 * vm_map_clip_start will be done later.
7240 * We don't want to unnest any nested sub maps here !
7241 */
7242 } else {
7243 if (!user_wire) {
7244 panic("vm_map_unwire: start not found");
7245 }
7246 /* Start address is not in map. */
7247 vm_map_unlock(map);
7248 return KERN_INVALID_ADDRESS;
7249 }
7250
7251 if (entry->superpage_size) {
7252 /* superpages are always wired */
7253 vm_map_unlock(map);
7254 return KERN_INVALID_ADDRESS;
7255 }
7256
7257 need_wakeup = FALSE;
7258 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
7259 if (entry->in_transition) {
7260 /*
7261 * 1)
7262 * Another thread is wiring down this entry. Note
7263 * that if it is not for the other thread we would
7264 * be unwiring an unwired entry. This is not
7265 * permitted. If we wait, we will be unwiring memory
7266 * we did not wire.
7267 *
7268 * 2)
7269 * Another thread is unwiring this entry. We did not
7270 * have a reference to it, because if we did, this
7271 * entry will not be getting unwired now.
7272 */
7273 if (!user_wire) {
7274 /*
7275 * XXX FBDP
7276 * This could happen: there could be some
7277 * overlapping vslock/vsunlock operations
7278 * going on.
7279 * We should probably just wait and retry,
7280 * but then we have to be careful that this
7281 * entry could get "simplified" after
7282 * "in_transition" gets unset and before
7283 * we re-lookup the entry, so we would
7284 * have to re-clip the entry to avoid
7285 * re-unwiring what we have already unwired...
7286 * See vm_map_wire_nested().
7287 *
7288 * Or we could just ignore "in_transition"
7289 * here and proceed to decement the wired
7290 * count(s) on this entry. That should be fine
7291 * as long as "wired_count" doesn't drop all
7292 * the way to 0 (and we should panic if THAT
7293 * happens).
7294 */
7295 panic("vm_map_unwire: in_transition entry");
7296 }
7297
7298 entry = entry->vme_next;
7299 continue;
7300 }
7301
7302 if (entry->is_sub_map) {
7303 vm_map_offset_t sub_start;
7304 vm_map_offset_t sub_end;
7305 vm_map_offset_t local_end;
7306 pmap_t pmap;
7307
7308 vm_map_clip_start(map, entry, start);
7309 vm_map_clip_end(map, entry, end);
7310
7311 sub_start = VME_OFFSET(entry);
7312 sub_end = entry->vme_end - entry->vme_start;
7313 sub_end += VME_OFFSET(entry);
7314 local_end = entry->vme_end;
7315 if (map_pmap == NULL) {
7316 if (entry->use_pmap) {
7317 pmap = VME_SUBMAP(entry)->pmap;
7318 pmap_addr = sub_start;
7319 } else {
7320 pmap = map->pmap;
7321 pmap_addr = start;
7322 }
7323 if (entry->wired_count == 0 ||
7324 (user_wire && entry->user_wired_count == 0)) {
7325 if (!user_wire) {
7326 panic("vm_map_unwire: entry is unwired");
7327 }
7328 entry = entry->vme_next;
7329 continue;
7330 }
7331
7332 /*
7333 * Check for holes
7334 * Holes: Next entry should be contiguous unless
7335 * this is the end of the region.
7336 */
7337 if (((entry->vme_end < end) &&
7338 ((entry->vme_next == vm_map_to_entry(map)) ||
7339 (entry->vme_next->vme_start
7340 > entry->vme_end)))) {
7341 if (!user_wire) {
7342 panic("vm_map_unwire: non-contiguous region");
7343 }
7344 /*
7345 * entry = entry->vme_next;
7346 * continue;
7347 */
7348 }
7349
7350 subtract_wire_counts(map, entry, user_wire);
7351
7352 if (entry->wired_count != 0) {
7353 entry = entry->vme_next;
7354 continue;
7355 }
7356
7357 entry->in_transition = TRUE;
7358 tmp_entry = *entry;/* see comment in vm_map_wire() */
7359
7360 /*
7361 * We can unlock the map now. The in_transition state
7362 * guarantees existance of the entry.
7363 */
7364 vm_map_unlock(map);
7365 vm_map_unwire_nested(VME_SUBMAP(entry),
7366 sub_start, sub_end, user_wire, pmap, pmap_addr);
7367 vm_map_lock(map);
7368
7369 if (last_timestamp + 1 != map->timestamp) {
7370 /*
7371 * Find the entry again. It could have been
7372 * clipped or deleted after we unlocked the map.
7373 */
7374 if (!vm_map_lookup_entry(map,
7375 tmp_entry.vme_start,
7376 &first_entry)) {
7377 if (!user_wire) {
7378 panic("vm_map_unwire: re-lookup failed");
7379 }
7380 entry = first_entry->vme_next;
7381 } else {
7382 entry = first_entry;
7383 }
7384 }
7385 last_timestamp = map->timestamp;
7386
7387 /*
7388 * clear transition bit for all constituent entries
7389 * that were in the original entry (saved in
7390 * tmp_entry). Also check for waiters.
7391 */
7392 while ((entry != vm_map_to_entry(map)) &&
7393 (entry->vme_start < tmp_entry.vme_end)) {
7394 assert(entry->in_transition);
7395 entry->in_transition = FALSE;
7396 if (entry->needs_wakeup) {
7397 entry->needs_wakeup = FALSE;
7398 need_wakeup = TRUE;
7399 }
7400 entry = entry->vme_next;
7401 }
7402 continue;
7403 } else {
7404 tmp_entry = *entry;
7405 vm_map_unlock(map);
7406 vm_map_unwire_nested(VME_SUBMAP(entry),
7407 sub_start, sub_end, user_wire, map_pmap,
7408 pmap_addr);
7409 vm_map_lock(map);
7410
7411 if (last_timestamp + 1 != map->timestamp) {
7412 /*
7413 * Find the entry again. It could have been
7414 * clipped or deleted after we unlocked the map.
7415 */
7416 if (!vm_map_lookup_entry(map,
7417 tmp_entry.vme_start,
7418 &first_entry)) {
7419 if (!user_wire) {
7420 panic("vm_map_unwire: re-lookup failed");
7421 }
7422 entry = first_entry->vme_next;
7423 } else {
7424 entry = first_entry;
7425 }
7426 }
7427 last_timestamp = map->timestamp;
7428 }
7429 }
7430
7431
7432 if ((entry->wired_count == 0) ||
7433 (user_wire && entry->user_wired_count == 0)) {
7434 if (!user_wire) {
7435 panic("vm_map_unwire: entry is unwired");
7436 }
7437
7438 entry = entry->vme_next;
7439 continue;
7440 }
7441
7442 assert(entry->wired_count > 0 &&
7443 (!user_wire || entry->user_wired_count > 0));
7444
7445 vm_map_clip_start(map, entry, start);
7446 vm_map_clip_end(map, entry, end);
7447
7448 /*
7449 * Check for holes
7450 * Holes: Next entry should be contiguous unless
7451 * this is the end of the region.
7452 */
7453 if (((entry->vme_end < end) &&
7454 ((entry->vme_next == vm_map_to_entry(map)) ||
7455 (entry->vme_next->vme_start > entry->vme_end)))) {
7456 if (!user_wire) {
7457 panic("vm_map_unwire: non-contiguous region");
7458 }
7459 entry = entry->vme_next;
7460 continue;
7461 }
7462
7463 subtract_wire_counts(map, entry, user_wire);
7464
7465 if (entry->wired_count != 0) {
7466 entry = entry->vme_next;
7467 continue;
7468 }
7469
7470 if (entry->zero_wired_pages) {
7471 entry->zero_wired_pages = FALSE;
7472 }
7473
7474 entry->in_transition = TRUE;
7475 tmp_entry = *entry; /* see comment in vm_map_wire() */
7476
7477 /*
7478 * We can unlock the map now. The in_transition state
7479 * guarantees existance of the entry.
7480 */
7481 vm_map_unlock(map);
7482 if (map_pmap) {
7483 vm_fault_unwire(map,
7484 &tmp_entry, FALSE, map_pmap, pmap_addr);
7485 } else {
7486 vm_fault_unwire(map,
7487 &tmp_entry, FALSE, map->pmap,
7488 tmp_entry.vme_start);
7489 }
7490 vm_map_lock(map);
7491
7492 if (last_timestamp + 1 != map->timestamp) {
7493 /*
7494 * Find the entry again. It could have been clipped
7495 * or deleted after we unlocked the map.
7496 */
7497 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7498 &first_entry)) {
7499 if (!user_wire) {
7500 panic("vm_map_unwire: re-lookup failed");
7501 }
7502 entry = first_entry->vme_next;
7503 } else {
7504 entry = first_entry;
7505 }
7506 }
7507 last_timestamp = map->timestamp;
7508
7509 /*
7510 * clear transition bit for all constituent entries that
7511 * were in the original entry (saved in tmp_entry). Also
7512 * check for waiters.
7513 */
7514 while ((entry != vm_map_to_entry(map)) &&
7515 (entry->vme_start < tmp_entry.vme_end)) {
7516 assert(entry->in_transition);
7517 entry->in_transition = FALSE;
7518 if (entry->needs_wakeup) {
7519 entry->needs_wakeup = FALSE;
7520 need_wakeup = TRUE;
7521 }
7522 entry = entry->vme_next;
7523 }
7524 }
7525
7526 /*
7527 * We might have fragmented the address space when we wired this
7528 * range of addresses. Attempt to re-coalesce these VM map entries
7529 * with their neighbors now that they're no longer wired.
7530 * Under some circumstances, address space fragmentation can
7531 * prevent VM object shadow chain collapsing, which can cause
7532 * swap space leaks.
7533 */
7534 vm_map_simplify_range(map, start, end);
7535
7536 vm_map_unlock(map);
7537 /*
7538 * wake up anybody waiting on entries that we have unwired.
7539 */
7540 if (need_wakeup) {
7541 vm_map_entry_wakeup(map);
7542 }
7543 return KERN_SUCCESS;
7544 }
7545
7546 kern_return_t
vm_map_unwire(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,boolean_t user_wire)7547 vm_map_unwire(
7548 vm_map_t map,
7549 vm_map_offset_t start,
7550 vm_map_offset_t end,
7551 boolean_t user_wire)
7552 {
7553 return vm_map_unwire_nested(map, start, end,
7554 user_wire, (pmap_t)NULL, 0);
7555 }
7556
7557
7558 /*
7559 * vm_map_entry_zap: [ internal use only ]
7560 *
7561 * Remove the entry from the target map
7562 * and put it on a zap list.
7563 */
7564 static void
vm_map_entry_zap(vm_map_t map,vm_map_entry_t entry,vm_map_zap_t zap)7565 vm_map_entry_zap(
7566 vm_map_t map,
7567 vm_map_entry_t entry,
7568 vm_map_zap_t zap)
7569 {
7570 vm_map_offset_t s, e;
7571
7572 s = entry->vme_start;
7573 e = entry->vme_end;
7574 assert(VM_MAP_PAGE_ALIGNED(s, FOURK_PAGE_MASK));
7575 assert(VM_MAP_PAGE_ALIGNED(e, FOURK_PAGE_MASK));
7576 if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK) {
7577 assert(page_aligned(s));
7578 assert(page_aligned(e));
7579 }
7580 if (entry->map_aligned == TRUE) {
7581 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
7582 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
7583 }
7584 assert(entry->wired_count == 0);
7585 assert(entry->user_wired_count == 0);
7586 assert(!entry->vme_permanent);
7587
7588 vm_map_store_entry_unlink(map, entry, false);
7589 map->size -= e - s;
7590
7591 vm_map_zap_append(zap, entry);
7592 }
7593
7594 static void
vm_map_submap_pmap_clean(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_map_t sub_map,vm_map_offset_t offset)7595 vm_map_submap_pmap_clean(
7596 vm_map_t map,
7597 vm_map_offset_t start,
7598 vm_map_offset_t end,
7599 vm_map_t sub_map,
7600 vm_map_offset_t offset)
7601 {
7602 vm_map_offset_t submap_start;
7603 vm_map_offset_t submap_end;
7604 vm_map_size_t remove_size;
7605 vm_map_entry_t entry;
7606
7607 submap_end = offset + (end - start);
7608 submap_start = offset;
7609
7610 vm_map_lock_read(sub_map);
7611 if (vm_map_lookup_entry(sub_map, offset, &entry)) {
7612 remove_size = (entry->vme_end - entry->vme_start);
7613 if (offset > entry->vme_start) {
7614 remove_size -= offset - entry->vme_start;
7615 }
7616
7617
7618 if (submap_end < entry->vme_end) {
7619 remove_size -=
7620 entry->vme_end - submap_end;
7621 }
7622 if (entry->is_sub_map) {
7623 vm_map_submap_pmap_clean(
7624 sub_map,
7625 start,
7626 start + remove_size,
7627 VME_SUBMAP(entry),
7628 VME_OFFSET(entry));
7629 } else {
7630 if (map->mapped_in_other_pmaps &&
7631 os_ref_get_count_raw(&map->map_refcnt) != 0 &&
7632 VME_OBJECT(entry) != NULL) {
7633 vm_object_pmap_protect_options(
7634 VME_OBJECT(entry),
7635 (VME_OFFSET(entry) +
7636 offset -
7637 entry->vme_start),
7638 remove_size,
7639 PMAP_NULL,
7640 PAGE_SIZE,
7641 entry->vme_start,
7642 VM_PROT_NONE,
7643 PMAP_OPTIONS_REMOVE);
7644 } else {
7645 pmap_remove(map->pmap,
7646 (addr64_t)start,
7647 (addr64_t)(start + remove_size));
7648 }
7649 }
7650 }
7651
7652 entry = entry->vme_next;
7653
7654 while ((entry != vm_map_to_entry(sub_map))
7655 && (entry->vme_start < submap_end)) {
7656 remove_size = (entry->vme_end - entry->vme_start);
7657 if (submap_end < entry->vme_end) {
7658 remove_size -= entry->vme_end - submap_end;
7659 }
7660 if (entry->is_sub_map) {
7661 vm_map_submap_pmap_clean(
7662 sub_map,
7663 (start + entry->vme_start) - offset,
7664 ((start + entry->vme_start) - offset) + remove_size,
7665 VME_SUBMAP(entry),
7666 VME_OFFSET(entry));
7667 } else {
7668 if (map->mapped_in_other_pmaps &&
7669 os_ref_get_count_raw(&map->map_refcnt) != 0 &&
7670 VME_OBJECT(entry) != NULL) {
7671 vm_object_pmap_protect_options(
7672 VME_OBJECT(entry),
7673 VME_OFFSET(entry),
7674 remove_size,
7675 PMAP_NULL,
7676 PAGE_SIZE,
7677 entry->vme_start,
7678 VM_PROT_NONE,
7679 PMAP_OPTIONS_REMOVE);
7680 } else {
7681 pmap_remove(map->pmap,
7682 (addr64_t)((start + entry->vme_start)
7683 - offset),
7684 (addr64_t)(((start + entry->vme_start)
7685 - offset) + remove_size));
7686 }
7687 }
7688 entry = entry->vme_next;
7689 }
7690 vm_map_unlock_read(sub_map);
7691 return;
7692 }
7693
7694 /*
7695 * virt_memory_guard_ast:
7696 *
7697 * Handle the AST callout for a virtual memory guard.
7698 * raise an EXC_GUARD exception and terminate the task
7699 * if configured to do so.
7700 */
7701 void
virt_memory_guard_ast(thread_t thread,mach_exception_data_type_t code,mach_exception_data_type_t subcode)7702 virt_memory_guard_ast(
7703 thread_t thread,
7704 mach_exception_data_type_t code,
7705 mach_exception_data_type_t subcode)
7706 {
7707 task_t task = get_threadtask(thread);
7708 assert(task != kernel_task);
7709 assert(task == current_task());
7710 kern_return_t sync_exception_result;
7711 uint32_t behavior;
7712
7713 behavior = task->task_exc_guard;
7714
7715 /* Is delivery enabled */
7716 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7717 return;
7718 }
7719
7720 /* If only once, make sure we're that once */
7721 while (behavior & TASK_EXC_GUARD_VM_ONCE) {
7722 uint32_t new_behavior = behavior & ~TASK_EXC_GUARD_VM_DELIVER;
7723
7724 if (OSCompareAndSwap(behavior, new_behavior, &task->task_exc_guard)) {
7725 break;
7726 }
7727 behavior = task->task_exc_guard;
7728 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7729 return;
7730 }
7731 }
7732
7733 /* Raise exception synchronously and see if handler claimed it */
7734 sync_exception_result = task_exception_notify(EXC_GUARD, code, subcode);
7735
7736 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7737 /*
7738 * If Synchronous EXC_GUARD delivery was successful then
7739 * kill the process and return, else kill the process
7740 * and deliver the exception via EXC_CORPSE_NOTIFY.
7741 */
7742 if (sync_exception_result == KERN_SUCCESS) {
7743 task_bsdtask_kill(current_task());
7744 } else {
7745 exit_with_guard_exception(current_proc(), code, subcode);
7746 }
7747 } else if (task->task_exc_guard & TASK_EXC_GUARD_VM_CORPSE) {
7748 /*
7749 * If the synchronous EXC_GUARD delivery was not successful,
7750 * raise a simulated crash.
7751 */
7752 if (sync_exception_result != KERN_SUCCESS) {
7753 task_violated_guard(code, subcode, NULL, FALSE);
7754 }
7755 }
7756 }
7757
7758 /*
7759 * vm_map_guard_exception:
7760 *
7761 * Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7762 *
7763 * Right now, we do this when we find nothing mapped, or a
7764 * gap in the mapping when a user address space deallocate
7765 * was requested. We report the address of the first gap found.
7766 */
7767 static void
vm_map_guard_exception(vm_map_offset_t gap_start,unsigned reason)7768 vm_map_guard_exception(
7769 vm_map_offset_t gap_start,
7770 unsigned reason)
7771 {
7772 mach_exception_code_t code = 0;
7773 unsigned int guard_type = GUARD_TYPE_VIRT_MEMORY;
7774 unsigned int target = 0; /* should we pass in pid associated with map? */
7775 mach_exception_data_type_t subcode = (uint64_t)gap_start;
7776 boolean_t fatal = FALSE;
7777
7778 task_t task = current_task_early();
7779
7780 /* Can't deliver exceptions to a NULL task (early boot) or kernel task */
7781 if (task == NULL || task == kernel_task) {
7782 return;
7783 }
7784
7785 EXC_GUARD_ENCODE_TYPE(code, guard_type);
7786 EXC_GUARD_ENCODE_FLAVOR(code, reason);
7787 EXC_GUARD_ENCODE_TARGET(code, target);
7788
7789 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7790 fatal = TRUE;
7791 }
7792 thread_guard_violation(current_thread(), code, subcode, fatal);
7793 }
7794
7795 static kern_return_t
vm_map_delete_submap_recurse(vm_map_t submap,vm_map_offset_t submap_start,vm_map_offset_t submap_end)7796 vm_map_delete_submap_recurse(
7797 vm_map_t submap,
7798 vm_map_offset_t submap_start,
7799 vm_map_offset_t submap_end)
7800 {
7801 vm_map_entry_t submap_entry;
7802
7803 /*
7804 * Verify that the submap does not contain any "permanent" entries
7805 * within the specified range.
7806 * We do not care about gaps.
7807 */
7808
7809 vm_map_lock(submap);
7810
7811 if (!vm_map_lookup_entry(submap, submap_start, &submap_entry)) {
7812 submap_entry = submap_entry->vme_next;
7813 }
7814
7815 for (;
7816 submap_entry != vm_map_to_entry(submap) &&
7817 submap_entry->vme_start < submap_end;
7818 submap_entry = submap_entry->vme_next) {
7819 if (submap_entry->vme_permanent) {
7820 /* "permanent" entry -> fail */
7821 vm_map_unlock(submap);
7822 return KERN_PROTECTION_FAILURE;
7823 }
7824 }
7825 /* no "permanent" entries in the range -> success */
7826 vm_map_unlock(submap);
7827 return KERN_SUCCESS;
7828 }
7829
7830 __abortlike
7831 static void
__vm_map_delete_misaligned_panic(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)7832 __vm_map_delete_misaligned_panic(
7833 vm_map_t map,
7834 vm_map_offset_t start,
7835 vm_map_offset_t end)
7836 {
7837 panic("vm_map_delete(%p,0x%llx,0x%llx): start is not aligned to 0x%x",
7838 map, (uint64_t)start, (uint64_t)end, VM_MAP_PAGE_SIZE(map));
7839 }
7840
7841 __abortlike
7842 static void
__vm_map_delete_failed_panic(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,kern_return_t kr)7843 __vm_map_delete_failed_panic(
7844 vm_map_t map,
7845 vm_map_offset_t start,
7846 vm_map_offset_t end,
7847 kern_return_t kr)
7848 {
7849 panic("vm_map_delete(%p,0x%llx,0x%llx): failed unexpected with %d",
7850 map, (uint64_t)start, (uint64_t)end, kr);
7851 }
7852
7853 __abortlike
7854 static void
__vm_map_delete_gap_panic(vm_map_t map,vm_map_offset_t where,vm_map_offset_t start,vm_map_offset_t end)7855 __vm_map_delete_gap_panic(
7856 vm_map_t map,
7857 vm_map_offset_t where,
7858 vm_map_offset_t start,
7859 vm_map_offset_t end)
7860 {
7861 panic("vm_map_delete(%p,0x%llx,0x%llx): no map entry at 0x%llx",
7862 map, (uint64_t)start, (uint64_t)end, (uint64_t)where);
7863 }
7864
7865 __abortlike
7866 static void
__vm_map_delete_permanent_panic(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_map_entry_t entry)7867 __vm_map_delete_permanent_panic(
7868 vm_map_t map,
7869 vm_map_offset_t start,
7870 vm_map_offset_t end,
7871 vm_map_entry_t entry)
7872 {
7873 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7874 "Attempting to remove permanent VM map entry %p [0x%llx:0x%llx]",
7875 map, (uint64_t)start, (uint64_t)end, entry,
7876 (uint64_t)entry->vme_start,
7877 (uint64_t)entry->vme_end);
7878 }
7879
7880 __options_decl(vm_map_delete_state_t, uint32_t, {
7881 VMDS_NONE = 0x0000,
7882
7883 VMDS_FOUND_GAP = 0x0001,
7884 VMDS_GAPS_OK = 0x0002,
7885
7886 VMDS_KERNEL_PMAP = 0x0004,
7887 VMDS_NEEDS_LOOKUP = 0x0008,
7888 VMDS_NEEDS_WAKEUP = 0x0010,
7889 });
7890
7891 /*
7892 * vm_map_delete: [ internal use only ]
7893 *
7894 * Deallocates the given address range from the target map.
7895 * Removes all user wirings. Unwires one kernel wiring if
7896 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
7897 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
7898 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7899 *
7900 *
7901 * When the map is a kernel map, then any error in removing mappings
7902 * will lead to a panic so that clients do not have to repeat the panic
7903 * code at each call site. If VM_MAP_REMOVE_INTERRUPTIBLE
7904 * is also passed, then KERN_ABORTED will not lead to a panic.
7905 *
7906 * This routine is called with map locked and leaves map locked.
7907 */
7908 static kmem_return_t
vm_map_delete(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vmr_flags_t flags,kmem_guard_t guard,vm_map_zap_t zap_list)7909 vm_map_delete(
7910 vm_map_t map,
7911 vm_map_offset_t start,
7912 vm_map_offset_t end,
7913 vmr_flags_t flags,
7914 kmem_guard_t guard,
7915 vm_map_zap_t zap_list)
7916 {
7917 vm_map_entry_t entry, next;
7918 int interruptible;
7919 vm_map_offset_t gap_start = 0;
7920 vm_map_offset_t clear_in_transition_end = 0;
7921 __unused vm_map_offset_t save_start = start;
7922 __unused vm_map_offset_t save_end = end;
7923 vm_map_delete_state_t state = VMDS_NONE;
7924 kmem_return_t ret = { };
7925
7926 if (vm_map_pmap(map) == kernel_pmap) {
7927 state |= VMDS_KERNEL_PMAP;
7928 }
7929
7930 if (map->terminated || os_ref_get_count_raw(&map->map_refcnt) == 0) {
7931 state |= VMDS_GAPS_OK;
7932 }
7933
7934 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
7935 THREAD_ABORTSAFE : THREAD_UNINT;
7936
7937 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) == 0 &&
7938 (start & VM_MAP_PAGE_MASK(map))) {
7939 __vm_map_delete_misaligned_panic(map, start, end);
7940 }
7941
7942 if ((state & VMDS_GAPS_OK) == 0) {
7943 /*
7944 * If the map isn't terminated then all deletions must have
7945 * no gaps, and be within the [min, max) of the map.
7946 *
7947 * We got here without VM_MAP_RANGE_CHECK() being called,
7948 * and hence must validate bounds manually.
7949 *
7950 * It is worth noting that because vm_deallocate() will
7951 * round_page() the deallocation size, it's possible for "end"
7952 * to be 0 here due to overflow. We hence must treat it as being
7953 * beyond vm_map_max(map).
7954 *
7955 * Similarly, end < start means some wrap around happend,
7956 * which should cause an error or panic.
7957 */
7958 if (end == 0 || end > vm_map_max(map)) {
7959 state |= VMDS_FOUND_GAP;
7960 gap_start = vm_map_max(map);
7961 if (state & VMDS_KERNEL_PMAP) {
7962 __vm_map_delete_gap_panic(map,
7963 gap_start, start, end);
7964 }
7965 goto out;
7966 }
7967
7968 if (end < start) {
7969 if (state & VMDS_KERNEL_PMAP) {
7970 __vm_map_delete_gap_panic(map,
7971 vm_map_max(map), start, end);
7972 }
7973 ret.kmr_return = KERN_INVALID_ARGUMENT;
7974 goto out;
7975 }
7976
7977 if (start < vm_map_min(map)) {
7978 state |= VMDS_FOUND_GAP;
7979 gap_start = start;
7980 if (state & VMDS_KERNEL_PMAP) {
7981 __vm_map_delete_gap_panic(map,
7982 gap_start, start, end);
7983 }
7984 goto out;
7985 }
7986 } else {
7987 /*
7988 * If the map is terminated, we must accept start/end
7989 * being beyond the boundaries of the map as this is
7990 * how some of the mappings like commpage mappings
7991 * can be destroyed (they're outside of those bounds).
7992 *
7993 * end < start is still something we can't cope with,
7994 * so just bail.
7995 */
7996 if (end < start) {
7997 goto out;
7998 }
7999 }
8000
8001
8002 /*
8003 * Find the start of the region.
8004 *
8005 * If in a superpage, extend the range
8006 * to include the start of the mapping.
8007 */
8008 while (vm_map_lookup_entry_or_next(map, start, &entry)) {
8009 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) {
8010 start = SUPERPAGE_ROUND_DOWN(start);
8011 } else {
8012 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8013 break;
8014 }
8015 }
8016
8017 if (entry->superpage_size) {
8018 end = SUPERPAGE_ROUND_UP(end);
8019 }
8020
8021 /*
8022 * Step through all entries in this region
8023 */
8024 for (vm_map_offset_t s = start; s < end;) {
8025 /*
8026 * At this point, we have deleted all the memory entries
8027 * in [start, s) and are proceeding with the [s, end) range.
8028 *
8029 * This loop might drop the map lock, and it is possible that
8030 * some memory was already reallocated within [start, s)
8031 * and we don't want to mess with those entries.
8032 *
8033 * Some of those entries could even have been re-assembled
8034 * with an entry after "s" (in vm_map_simplify_entry()), so
8035 * we may have to vm_map_clip_start() again.
8036 *
8037 * When clear_in_transition_end is set, the we had marked
8038 * [start, clear_in_transition_end) as "in_transition"
8039 * during a previous iteration and we need to clear it.
8040 */
8041
8042 /*
8043 * Step 1: If needed (because we dropped locks),
8044 * lookup the entry again.
8045 *
8046 * If we're coming back from unwiring (Step 5),
8047 * we also need to mark the entries as no longer
8048 * in transition after that.
8049 */
8050
8051 if (state & VMDS_NEEDS_LOOKUP) {
8052 state &= ~VMDS_NEEDS_LOOKUP;
8053
8054 if (vm_map_lookup_entry_or_next(map, s, &entry)) {
8055 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8056 }
8057 }
8058
8059 if (clear_in_transition_end) {
8060 for (vm_map_entry_t it = entry;
8061 it != vm_map_to_entry(map) &&
8062 it->vme_start < clear_in_transition_end;
8063 it = it->vme_next) {
8064 assert(it->in_transition);
8065 it->in_transition = FALSE;
8066 if (it->needs_wakeup) {
8067 it->needs_wakeup = FALSE;
8068 state |= VMDS_NEEDS_WAKEUP;
8069 }
8070 }
8071
8072 clear_in_transition_end = 0;
8073 }
8074
8075
8076 /*
8077 * Step 2: Perform various policy checks
8078 * before we do _anything_ to this entry.
8079 */
8080
8081 if (entry == vm_map_to_entry(map) || s < entry->vme_start) {
8082 if (state & (VMDS_GAPS_OK | VMDS_FOUND_GAP)) {
8083 /*
8084 * Either we found a gap already,
8085 * or we are tearing down a map,
8086 * keep going.
8087 */
8088 } else if (state & VMDS_KERNEL_PMAP) {
8089 __vm_map_delete_gap_panic(map, s, start, end);
8090 } else if (vm_map_round_page(s, VM_MAP_PAGE_MASK(map)) < end) {
8091 /*
8092 * The vm_map_round_page() is needed since an entry
8093 * can be less than VM_MAP_PAGE_MASK() sized.
8094 *
8095 * For example, devices which have h/w 4K pages,
8096 * but entry sizes are all now 16K.
8097 */
8098 state |= VMDS_FOUND_GAP;
8099 gap_start = s;
8100 }
8101
8102 if (entry == vm_map_to_entry(map) ||
8103 end <= entry->vme_start) {
8104 break;
8105 }
8106
8107 s = entry->vme_start;
8108 }
8109
8110 if (state & VMDS_KERNEL_PMAP) {
8111 /*
8112 * In the kernel map and its submaps,
8113 * permanent entries never die, even
8114 * if VM_MAP_REMOVE_IMMUTABLE is passed.
8115 */
8116 if (entry->vme_permanent) {
8117 __vm_map_delete_permanent_panic(map, start, end, entry);
8118 }
8119
8120 if (flags & VM_MAP_REMOVE_GUESS_SIZE) {
8121 end = entry->vme_end;
8122 flags &= ~VM_MAP_REMOVE_GUESS_SIZE;
8123 }
8124
8125 /*
8126 * In the kernel map and its submaps,
8127 * the removal of an atomic/guarded entry is strict.
8128 *
8129 * An atomic entry is processed only if it was
8130 * specifically targeted.
8131 *
8132 * We might have deleted non-atomic entries before
8133 * we reach this this point however...
8134 */
8135 kmem_entry_validate_guard(map, entry,
8136 start, end - start, guard);
8137 }
8138
8139 /*
8140 * Step 2.1: handle "permanent" and "submap" entries
8141 * *before* clipping to avoid triggering some unnecessary
8142 * un-nesting of the shared region.
8143 */
8144 if (entry->vme_permanent && entry->is_sub_map) {
8145 // printf("FBDP %s:%d permanent submap...\n", __FUNCTION__, __LINE__);
8146 /*
8147 * Un-mapping a "permanent" mapping of a user-space
8148 * submap is not allowed unless...
8149 */
8150 if (flags & VM_MAP_REMOVE_IMMUTABLE) {
8151 /*
8152 * a. explicitly requested by the kernel caller.
8153 */
8154 // printf("FBDP %s:%d flags & REMOVE_IMMUTABLE\n", __FUNCTION__, __LINE__);
8155 } else if ((flags & VM_MAP_REMOVE_IMMUTABLE_CODE) &&
8156 developer_mode_state()) {
8157 /*
8158 * b. we're in "developer" mode (for
8159 * breakpoints, dtrace probes, ...).
8160 */
8161 // printf("FBDP %s:%d flags & REMOVE_IMMUTABLE_CODE\n", __FUNCTION__, __LINE__);
8162 } else if (map->terminated) {
8163 /*
8164 * c. this is the final address space cleanup.
8165 */
8166 // printf("FBDP %s:%d map->terminated\n", __FUNCTION__, __LINE__);
8167 } else {
8168 vm_map_offset_t submap_start, submap_end;
8169 kern_return_t submap_kr;
8170
8171 /*
8172 * Check if there are any "permanent" mappings
8173 * in this range in the submap.
8174 */
8175 if (entry->in_transition) {
8176 /* can that even happen ? */
8177 goto in_transition;
8178 }
8179 /* compute the clipped range in the submap */
8180 submap_start = s - entry->vme_start;
8181 submap_start += VME_OFFSET(entry);
8182 submap_end = end - entry->vme_start;
8183 submap_end += VME_OFFSET(entry);
8184 submap_kr = vm_map_delete_submap_recurse(
8185 VME_SUBMAP(entry),
8186 submap_start,
8187 submap_end);
8188 if (submap_kr != KERN_SUCCESS) {
8189 /*
8190 * There are some "permanent" mappings
8191 * in the submap: we are not allowed
8192 * to remove this range.
8193 */
8194 printf("%d[%s] removing permanent submap entry "
8195 "%p [0x%llx:0x%llx] prot 0x%x/0x%x -> KERN_PROT_FAILURE\n",
8196 proc_selfpid(),
8197 (get_bsdtask_info(current_task())
8198 ? proc_name_address(get_bsdtask_info(current_task()))
8199 : "?"), entry,
8200 (uint64_t)entry->vme_start,
8201 (uint64_t)entry->vme_end,
8202 entry->protection,
8203 entry->max_protection);
8204 DTRACE_VM6(vm_map_delete_permanent_deny_submap,
8205 vm_map_entry_t, entry,
8206 vm_map_offset_t, entry->vme_start,
8207 vm_map_offset_t, entry->vme_end,
8208 vm_prot_t, entry->protection,
8209 vm_prot_t, entry->max_protection,
8210 int, VME_ALIAS(entry));
8211 ret.kmr_return = KERN_PROTECTION_FAILURE;
8212 goto out;
8213 }
8214 /* no permanent mappings: proceed */
8215 }
8216 }
8217
8218 /*
8219 * Step 3: Perform any clipping needed.
8220 *
8221 * After this, "entry" starts at "s", ends before "end"
8222 */
8223
8224 if (entry->vme_start < s) {
8225 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
8226 entry->map_aligned &&
8227 !VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map))) {
8228 /*
8229 * The entry will no longer be map-aligned
8230 * after clipping and the caller said it's OK.
8231 */
8232 entry->map_aligned = FALSE;
8233 }
8234 vm_map_clip_start(map, entry, s);
8235 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8236 }
8237
8238 if (end < entry->vme_end) {
8239 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
8240 entry->map_aligned &&
8241 !VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map))) {
8242 /*
8243 * The entry will no longer be map-aligned
8244 * after clipping and the caller said it's OK.
8245 */
8246 entry->map_aligned = FALSE;
8247 }
8248 vm_map_clip_end(map, entry, end);
8249 }
8250
8251 if (entry->vme_permanent && entry->is_sub_map) {
8252 /*
8253 * We already went through step 2.1 which did not deny
8254 * the removal of this "permanent" and "is_sub_map"
8255 * entry.
8256 * Now that we've clipped what we actually want to
8257 * delete, undo the "permanent" part to allow the
8258 * removal to proceed.
8259 */
8260 DTRACE_VM6(vm_map_delete_permanent_allow_submap,
8261 vm_map_entry_t, entry,
8262 vm_map_offset_t, entry->vme_start,
8263 vm_map_offset_t, entry->vme_end,
8264 vm_prot_t, entry->protection,
8265 vm_prot_t, entry->max_protection,
8266 int, VME_ALIAS(entry));
8267 entry->vme_permanent = false;
8268 }
8269
8270 assert(s == entry->vme_start);
8271 assert(entry->vme_end <= end);
8272
8273
8274 /*
8275 * Step 4: If the entry is in flux, wait for this to resolve.
8276 */
8277
8278 if (entry->in_transition) {
8279 wait_result_t wait_result;
8280
8281 in_transition:
8282 /*
8283 * Another thread is wiring/unwiring this entry.
8284 * Let the other thread know we are waiting.
8285 */
8286
8287 entry->needs_wakeup = TRUE;
8288
8289 /*
8290 * wake up anybody waiting on entries that we have
8291 * already unwired/deleted.
8292 */
8293 if (state & VMDS_NEEDS_WAKEUP) {
8294 vm_map_entry_wakeup(map);
8295 state &= ~VMDS_NEEDS_WAKEUP;
8296 }
8297
8298 wait_result = vm_map_entry_wait(map, interruptible);
8299
8300 if (interruptible &&
8301 wait_result == THREAD_INTERRUPTED) {
8302 /*
8303 * We do not clear the needs_wakeup flag,
8304 * since we cannot tell if we were the only one.
8305 */
8306 ret.kmr_return = KERN_ABORTED;
8307 return ret;
8308 }
8309
8310 /*
8311 * The entry could have been clipped or it
8312 * may not exist anymore. Look it up again.
8313 */
8314 state |= VMDS_NEEDS_LOOKUP;
8315 continue;
8316 }
8317
8318
8319 /*
8320 * Step 5: Handle wiring
8321 */
8322
8323 if (entry->wired_count) {
8324 struct vm_map_entry tmp_entry;
8325 boolean_t user_wire;
8326 unsigned int last_timestamp;
8327
8328 user_wire = entry->user_wired_count > 0;
8329
8330 /*
8331 * Remove a kernel wiring if requested
8332 */
8333 if (flags & VM_MAP_REMOVE_KUNWIRE) {
8334 entry->wired_count--;
8335 }
8336
8337 /*
8338 * Remove all user wirings for proper accounting
8339 */
8340 while (entry->user_wired_count) {
8341 subtract_wire_counts(map, entry, user_wire);
8342 }
8343
8344 /*
8345 * All our DMA I/O operations in IOKit are currently
8346 * done by wiring through the map entries of the task
8347 * requesting the I/O.
8348 *
8349 * Because of this, we must always wait for kernel wirings
8350 * to go away on the entries before deleting them.
8351 *
8352 * Any caller who wants to actually remove a kernel wiring
8353 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
8354 * properly remove one wiring instead of blasting through
8355 * them all.
8356 */
8357 if (entry->wired_count != 0) {
8358 assert(map != kernel_map);
8359 /*
8360 * Cannot continue. Typical case is when
8361 * a user thread has physical io pending on
8362 * on this page. Either wait for the
8363 * kernel wiring to go away or return an
8364 * error.
8365 */
8366 wait_result_t wait_result;
8367
8368 entry->needs_wakeup = TRUE;
8369 wait_result = vm_map_entry_wait(map,
8370 interruptible);
8371
8372 if (interruptible &&
8373 wait_result == THREAD_INTERRUPTED) {
8374 /*
8375 * We do not clear the
8376 * needs_wakeup flag, since we
8377 * cannot tell if we were the
8378 * only one.
8379 */
8380 ret.kmr_return = KERN_ABORTED;
8381 return ret;
8382 }
8383
8384
8385 /*
8386 * The entry could have been clipped or
8387 * it may not exist anymore. Look it
8388 * up again.
8389 */
8390 state |= VMDS_NEEDS_LOOKUP;
8391 continue;
8392 }
8393
8394 /*
8395 * We can unlock the map now.
8396 *
8397 * The entry might be split once we unlock the map,
8398 * but we need the range as defined by this entry
8399 * to be stable. So we must make a local copy.
8400 *
8401 * The underlying objects do not change during clips,
8402 * and the in_transition state guarentees existence
8403 * of the entry.
8404 */
8405 last_timestamp = map->timestamp;
8406 entry->in_transition = TRUE;
8407 tmp_entry = *entry;
8408 vm_map_unlock(map);
8409
8410 if (tmp_entry.is_sub_map) {
8411 vm_map_t sub_map;
8412 vm_map_offset_t sub_start, sub_end;
8413 pmap_t pmap;
8414 vm_map_offset_t pmap_addr;
8415
8416
8417 sub_map = VME_SUBMAP(&tmp_entry);
8418 sub_start = VME_OFFSET(&tmp_entry);
8419 sub_end = sub_start + (tmp_entry.vme_end -
8420 tmp_entry.vme_start);
8421 if (tmp_entry.use_pmap) {
8422 pmap = sub_map->pmap;
8423 pmap_addr = tmp_entry.vme_start;
8424 } else {
8425 pmap = map->pmap;
8426 pmap_addr = tmp_entry.vme_start;
8427 }
8428 (void) vm_map_unwire_nested(sub_map,
8429 sub_start, sub_end,
8430 user_wire,
8431 pmap, pmap_addr);
8432 } else {
8433 if (tmp_entry.vme_kernel_object) {
8434 pmap_protect_options(
8435 map->pmap,
8436 tmp_entry.vme_start,
8437 tmp_entry.vme_end,
8438 VM_PROT_NONE,
8439 PMAP_OPTIONS_REMOVE,
8440 NULL);
8441 }
8442 vm_fault_unwire(map, &tmp_entry,
8443 tmp_entry.vme_kernel_object,
8444 map->pmap, tmp_entry.vme_start);
8445 }
8446
8447 vm_map_lock(map);
8448
8449 /*
8450 * Unwiring happened, we can now go back to deleting
8451 * them (after we clear the in_transition bit for the range).
8452 */
8453 if (last_timestamp + 1 != map->timestamp) {
8454 state |= VMDS_NEEDS_LOOKUP;
8455 }
8456 clear_in_transition_end = tmp_entry.vme_end;
8457 continue;
8458 }
8459
8460 assert(entry->wired_count == 0);
8461 assert(entry->user_wired_count == 0);
8462
8463
8464 /*
8465 * Step 6: Entry is unwired and ready for us to delete !
8466 */
8467
8468 if (!entry->vme_permanent) {
8469 /*
8470 * Typical case: the entry really shouldn't be permanent
8471 */
8472 } else if ((flags & VM_MAP_REMOVE_IMMUTABLE_CODE) &&
8473 (entry->protection & VM_PROT_EXECUTE) &&
8474 developer_mode_state()) {
8475 /*
8476 * Allow debuggers to undo executable mappings
8477 * when developer mode is on.
8478 */
8479 #if 0
8480 printf("FBDP %d[%s] removing permanent executable entry "
8481 "%p [0x%llx:0x%llx] prot 0x%x/0x%x\n",
8482 proc_selfpid(),
8483 (current_task()->bsd_info
8484 ? proc_name_address(current_task()->bsd_info)
8485 : "?"), entry,
8486 (uint64_t)entry->vme_start,
8487 (uint64_t)entry->vme_end,
8488 entry->protection,
8489 entry->max_protection);
8490 #endif
8491 entry->vme_permanent = FALSE;
8492 } else if ((flags & VM_MAP_REMOVE_IMMUTABLE) || map->terminated) {
8493 #if 0
8494 printf("FBDP %d[%s] removing permanent entry "
8495 "%p [0x%llx:0x%llx] prot 0x%x/0x%x\n",
8496 proc_selfpid(),
8497 (current_task()->bsd_info
8498 ? proc_name_address(current_task()->bsd_info)
8499 : "?"), entry,
8500 (uint64_t)entry->vme_start,
8501 (uint64_t)entry->vme_end,
8502 entry->protection,
8503 entry->max_protection);
8504 #endif
8505 entry->vme_permanent = FALSE;
8506 } else {
8507 DTRACE_VM6(vm_map_delete_permanent,
8508 vm_map_entry_t, entry,
8509 vm_map_offset_t, entry->vme_start,
8510 vm_map_offset_t, entry->vme_end,
8511 vm_prot_t, entry->protection,
8512 vm_prot_t, entry->max_protection,
8513 int, VME_ALIAS(entry));
8514 }
8515
8516 if (entry->is_sub_map) {
8517 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) >= VM_MAP_PAGE_SHIFT(map),
8518 "map %p (%d) entry %p submap %p (%d)\n",
8519 map, VM_MAP_PAGE_SHIFT(map), entry,
8520 VME_SUBMAP(entry),
8521 VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
8522 if (entry->use_pmap) {
8523 #ifndef NO_NESTED_PMAP
8524 int pmap_flags;
8525
8526 if (map->terminated) {
8527 /*
8528 * This is the final cleanup of the
8529 * address space being terminated.
8530 * No new mappings are expected and
8531 * we don't really need to unnest the
8532 * shared region (and lose the "global"
8533 * pmap mappings, if applicable).
8534 *
8535 * Tell the pmap layer that we're
8536 * "clean" wrt nesting.
8537 */
8538 pmap_flags = PMAP_UNNEST_CLEAN;
8539 } else {
8540 /*
8541 * We're unmapping part of the nested
8542 * shared region, so we can't keep the
8543 * nested pmap.
8544 */
8545 pmap_flags = 0;
8546 }
8547 pmap_unnest_options(
8548 map->pmap,
8549 (addr64_t)entry->vme_start,
8550 entry->vme_end - entry->vme_start,
8551 pmap_flags);
8552 #endif /* NO_NESTED_PMAP */
8553 if (map->mapped_in_other_pmaps &&
8554 os_ref_get_count_raw(&map->map_refcnt) != 0) {
8555 /* clean up parent map/maps */
8556 vm_map_submap_pmap_clean(
8557 map, entry->vme_start,
8558 entry->vme_end,
8559 VME_SUBMAP(entry),
8560 VME_OFFSET(entry));
8561 }
8562 } else {
8563 vm_map_submap_pmap_clean(
8564 map, entry->vme_start, entry->vme_end,
8565 VME_SUBMAP(entry),
8566 VME_OFFSET(entry));
8567 }
8568 } else if (entry->vme_kernel_object ||
8569 VME_OBJECT(entry) == compressor_object) {
8570 /*
8571 * nothing to do
8572 */
8573 } else if (map->mapped_in_other_pmaps &&
8574 os_ref_get_count_raw(&map->map_refcnt) != 0) {
8575 vm_object_pmap_protect_options(
8576 VME_OBJECT(entry), VME_OFFSET(entry),
8577 entry->vme_end - entry->vme_start,
8578 PMAP_NULL,
8579 PAGE_SIZE,
8580 entry->vme_start,
8581 VM_PROT_NONE,
8582 PMAP_OPTIONS_REMOVE);
8583 } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
8584 (state & VMDS_KERNEL_PMAP)) {
8585 /* Remove translations associated
8586 * with this range unless the entry
8587 * does not have an object, or
8588 * it's the kernel map or a descendant
8589 * since the platform could potentially
8590 * create "backdoor" mappings invisible
8591 * to the VM. It is expected that
8592 * objectless, non-kernel ranges
8593 * do not have such VM invisible
8594 * translations.
8595 */
8596 pmap_remove_options(map->pmap,
8597 (addr64_t)entry->vme_start,
8598 (addr64_t)entry->vme_end,
8599 PMAP_OPTIONS_REMOVE);
8600 }
8601
8602 #if DEBUG
8603 /*
8604 * All pmap mappings for this map entry must have been
8605 * cleared by now.
8606 */
8607 assert(pmap_is_empty(map->pmap,
8608 entry->vme_start,
8609 entry->vme_end));
8610 #endif /* DEBUG */
8611
8612 if (entry->iokit_acct) {
8613 /* alternate accounting */
8614 DTRACE_VM4(vm_map_iokit_unmapped_region,
8615 vm_map_t, map,
8616 vm_map_offset_t, entry->vme_start,
8617 vm_map_offset_t, entry->vme_end,
8618 int, VME_ALIAS(entry));
8619 vm_map_iokit_unmapped_region(map,
8620 (entry->vme_end -
8621 entry->vme_start));
8622 entry->iokit_acct = FALSE;
8623 entry->use_pmap = FALSE;
8624 }
8625
8626 s = entry->vme_end;
8627 next = entry->vme_next;
8628 ret.kmr_size += entry->vme_end - entry->vme_start;
8629
8630 if (entry->vme_permanent) {
8631 /*
8632 * A permanent entry can not be removed, so leave it
8633 * in place but remove all access permissions.
8634 */
8635 if (!entry->pmap_cs_associated) {
8636 printf("%s:%d %d[%s] map %p entry %p [ 0x%llx - 0x%llx ] submap %d prot 0x%x/0x%x -> 0/0\n",
8637 __FUNCTION__, __LINE__,
8638 proc_selfpid(),
8639 (get_bsdtask_info(current_task())
8640 ? proc_name_address(get_bsdtask_info(current_task()))
8641 : "?"),
8642 map,
8643 entry,
8644 (uint64_t)entry->vme_start,
8645 (uint64_t)entry->vme_end,
8646 entry->is_sub_map,
8647 entry->protection,
8648 entry->max_protection);
8649 }
8650 DTRACE_VM6(vm_map_delete_permanent_prot_none,
8651 vm_map_entry_t, entry,
8652 vm_map_offset_t, entry->vme_start,
8653 vm_map_offset_t, entry->vme_end,
8654 vm_prot_t, entry->protection,
8655 vm_prot_t, entry->max_protection,
8656 int, VME_ALIAS(entry));
8657 entry->protection = VM_PROT_NONE;
8658 entry->max_protection = VM_PROT_NONE;
8659 } else {
8660 vm_map_entry_zap(map, entry, zap_list);
8661 }
8662
8663 entry = next;
8664
8665 if ((flags & VM_MAP_REMOVE_NO_YIELD) == 0 && s < end) {
8666 unsigned int last_timestamp = map->timestamp++;
8667
8668 if (lck_rw_lock_yield_exclusive(&map->lock,
8669 LCK_RW_YIELD_ANY_WAITER)) {
8670 if (last_timestamp != map->timestamp + 1) {
8671 state |= VMDS_NEEDS_LOOKUP;
8672 }
8673 } else {
8674 /* we didn't yield, undo our change */
8675 map->timestamp--;
8676 }
8677 }
8678 }
8679
8680 if (map->wait_for_space) {
8681 thread_wakeup((event_t) map);
8682 }
8683
8684 if (state & VMDS_NEEDS_WAKEUP) {
8685 vm_map_entry_wakeup(map);
8686 }
8687
8688 out:
8689 if ((state & VMDS_KERNEL_PMAP) && ret.kmr_return) {
8690 __vm_map_delete_failed_panic(map, start, end, ret.kmr_return);
8691 }
8692
8693 if (state & VMDS_FOUND_GAP) {
8694 DTRACE_VM3(kern_vm_deallocate_gap,
8695 vm_map_offset_t, gap_start,
8696 vm_map_offset_t, save_start,
8697 vm_map_offset_t, save_end);
8698 if (flags & VM_MAP_REMOVE_GAPS_FAIL) {
8699 ret.kmr_return = KERN_INVALID_VALUE;
8700 } else {
8701 vm_map_guard_exception(gap_start, kGUARD_EXC_DEALLOC_GAP);
8702 }
8703 }
8704
8705 return ret;
8706 }
8707
8708 kmem_return_t
vm_map_remove_and_unlock(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vmr_flags_t flags,kmem_guard_t guard)8709 vm_map_remove_and_unlock(
8710 vm_map_t map,
8711 vm_map_offset_t start,
8712 vm_map_offset_t end,
8713 vmr_flags_t flags,
8714 kmem_guard_t guard)
8715 {
8716 kmem_return_t ret;
8717 VM_MAP_ZAP_DECLARE(zap);
8718
8719 ret = vm_map_delete(map, start, end, flags, guard, &zap);
8720 vm_map_unlock(map);
8721
8722 vm_map_zap_dispose(&zap);
8723
8724 return ret;
8725 }
8726
8727 /*
8728 * vm_map_remove_guard:
8729 *
8730 * Remove the given address range from the target map.
8731 * This is the exported form of vm_map_delete.
8732 */
8733 kmem_return_t
vm_map_remove_guard(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vmr_flags_t flags,kmem_guard_t guard)8734 vm_map_remove_guard(
8735 vm_map_t map,
8736 vm_map_offset_t start,
8737 vm_map_offset_t end,
8738 vmr_flags_t flags,
8739 kmem_guard_t guard)
8740 {
8741 vm_map_lock(map);
8742 return vm_map_remove_and_unlock(map, start, end, flags, guard);
8743 }
8744
8745 /*
8746 * vm_map_terminate:
8747 *
8748 * Clean out a task's map.
8749 */
8750 kern_return_t
vm_map_terminate(vm_map_t map)8751 vm_map_terminate(
8752 vm_map_t map)
8753 {
8754 vm_map_lock(map);
8755 map->terminated = TRUE;
8756 vm_map_disable_hole_optimization(map);
8757 (void)vm_map_remove_and_unlock(map, map->min_offset, map->max_offset,
8758 VM_MAP_REMOVE_NO_FLAGS, KMEM_GUARD_NONE);
8759 return KERN_SUCCESS;
8760 }
8761
8762 /*
8763 * Routine: vm_map_copy_allocate
8764 *
8765 * Description:
8766 * Allocates and initializes a map copy object.
8767 */
8768 static vm_map_copy_t
vm_map_copy_allocate(void)8769 vm_map_copy_allocate(void)
8770 {
8771 vm_map_copy_t new_copy;
8772
8773 new_copy = zalloc_id(ZONE_ID_VM_MAP_COPY, Z_WAITOK | Z_ZERO);
8774 new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8775 vm_map_copy_first_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8776 vm_map_copy_last_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8777 return new_copy;
8778 }
8779
8780 /*
8781 * Routine: vm_map_copy_discard
8782 *
8783 * Description:
8784 * Dispose of a map copy object (returned by
8785 * vm_map_copyin).
8786 */
8787 void
vm_map_copy_discard(vm_map_copy_t copy)8788 vm_map_copy_discard(
8789 vm_map_copy_t copy)
8790 {
8791 if (copy == VM_MAP_COPY_NULL) {
8792 return;
8793 }
8794
8795 /*
8796 * Assert that the vm_map_copy is coming from the right
8797 * zone and hasn't been forged
8798 */
8799 vm_map_copy_require(copy);
8800
8801 switch (copy->type) {
8802 case VM_MAP_COPY_ENTRY_LIST:
8803 while (vm_map_copy_first_entry(copy) !=
8804 vm_map_copy_to_entry(copy)) {
8805 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
8806
8807 vm_map_copy_entry_unlink(copy, entry);
8808 if (entry->is_sub_map) {
8809 vm_map_deallocate(VME_SUBMAP(entry));
8810 } else {
8811 vm_object_deallocate(VME_OBJECT(entry));
8812 }
8813 vm_map_copy_entry_dispose(entry);
8814 }
8815 break;
8816 case VM_MAP_COPY_OBJECT:
8817 vm_object_deallocate(copy->cpy_object);
8818 break;
8819 case VM_MAP_COPY_KERNEL_BUFFER:
8820
8821 /*
8822 * The vm_map_copy_t and possibly the data buffer were
8823 * allocated by a single call to kalloc_data(), i.e. the
8824 * vm_map_copy_t was not allocated out of the zone.
8825 */
8826 if (copy->size > msg_ool_size_small || copy->offset) {
8827 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8828 (long long)copy->size, (long long)copy->offset);
8829 }
8830 kfree_data(copy->cpy_kdata, copy->size);
8831 }
8832 zfree_id(ZONE_ID_VM_MAP_COPY, copy);
8833 }
8834
8835 /*
8836 * Routine: vm_map_copy_copy
8837 *
8838 * Description:
8839 * Move the information in a map copy object to
8840 * a new map copy object, leaving the old one
8841 * empty.
8842 *
8843 * This is used by kernel routines that need
8844 * to look at out-of-line data (in copyin form)
8845 * before deciding whether to return SUCCESS.
8846 * If the routine returns FAILURE, the original
8847 * copy object will be deallocated; therefore,
8848 * these routines must make a copy of the copy
8849 * object and leave the original empty so that
8850 * deallocation will not fail.
8851 */
8852 vm_map_copy_t
vm_map_copy_copy(vm_map_copy_t copy)8853 vm_map_copy_copy(
8854 vm_map_copy_t copy)
8855 {
8856 vm_map_copy_t new_copy;
8857
8858 if (copy == VM_MAP_COPY_NULL) {
8859 return VM_MAP_COPY_NULL;
8860 }
8861
8862 /*
8863 * Assert that the vm_map_copy is coming from the right
8864 * zone and hasn't been forged
8865 */
8866 vm_map_copy_require(copy);
8867
8868 /*
8869 * Allocate a new copy object, and copy the information
8870 * from the old one into it.
8871 */
8872
8873 new_copy = zalloc_id(ZONE_ID_VM_MAP_COPY, Z_WAITOK | Z_ZERO | Z_NOFAIL);
8874 memcpy((void *) new_copy, (void *) copy, sizeof(struct vm_map_copy));
8875 #if __has_feature(ptrauth_calls)
8876 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8877 new_copy->cpy_kdata = copy->cpy_kdata;
8878 }
8879 #endif
8880
8881 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
8882 /*
8883 * The links in the entry chain must be
8884 * changed to point to the new copy object.
8885 */
8886 vm_map_copy_first_entry(copy)->vme_prev
8887 = vm_map_copy_to_entry(new_copy);
8888 vm_map_copy_last_entry(copy)->vme_next
8889 = vm_map_copy_to_entry(new_copy);
8890 }
8891
8892 /*
8893 * Change the old copy object into one that contains
8894 * nothing to be deallocated.
8895 */
8896 copy->type = VM_MAP_COPY_OBJECT;
8897 copy->cpy_object = VM_OBJECT_NULL;
8898
8899 /*
8900 * Return the new object.
8901 */
8902 return new_copy;
8903 }
8904
8905 static boolean_t
vm_map_entry_is_overwritable(vm_map_t dst_map __unused,vm_map_entry_t entry)8906 vm_map_entry_is_overwritable(
8907 vm_map_t dst_map __unused,
8908 vm_map_entry_t entry)
8909 {
8910 if (!(entry->protection & VM_PROT_WRITE)) {
8911 /* can't overwrite if not writable */
8912 return FALSE;
8913 }
8914 #if !__x86_64__
8915 if (entry->used_for_jit &&
8916 vm_map_cs_enforcement(dst_map) &&
8917 !dst_map->cs_debugged) {
8918 /*
8919 * Can't overwrite a JIT region while cs_enforced
8920 * and not cs_debugged.
8921 */
8922 return FALSE;
8923 }
8924
8925 #if __arm64e__
8926 /* Do not allow overwrite HW assisted TPRO entries */
8927 if (entry->used_for_tpro) {
8928 return FALSE;
8929 }
8930 #endif /* __arm64e__ */
8931
8932 if (entry->vme_permanent) {
8933 if (entry->is_sub_map) {
8934 /*
8935 * We can't tell if the submap contains "permanent"
8936 * entries within the range targeted by the caller.
8937 * The caller will have to check for that with
8938 * vm_map_overwrite_submap_recurse() for example.
8939 */
8940 } else {
8941 /*
8942 * Do not allow overwriting of a "permanent"
8943 * entry.
8944 */
8945 DTRACE_VM6(vm_map_delete_permanent_deny_overwrite,
8946 vm_map_entry_t, entry,
8947 vm_map_offset_t, entry->vme_start,
8948 vm_map_offset_t, entry->vme_end,
8949 vm_prot_t, entry->protection,
8950 vm_prot_t, entry->max_protection,
8951 int, VME_ALIAS(entry));
8952 return FALSE;
8953 }
8954 }
8955 #endif /* !__x86_64__ */
8956 return TRUE;
8957 }
8958
8959 static kern_return_t
vm_map_overwrite_submap_recurse(vm_map_t dst_map,vm_map_offset_t dst_addr,vm_map_size_t dst_size)8960 vm_map_overwrite_submap_recurse(
8961 vm_map_t dst_map,
8962 vm_map_offset_t dst_addr,
8963 vm_map_size_t dst_size)
8964 {
8965 vm_map_offset_t dst_end;
8966 vm_map_entry_t tmp_entry;
8967 vm_map_entry_t entry;
8968 kern_return_t result;
8969 boolean_t encountered_sub_map = FALSE;
8970
8971
8972
8973 /*
8974 * Verify that the destination is all writeable
8975 * initially. We have to trunc the destination
8976 * address and round the copy size or we'll end up
8977 * splitting entries in strange ways.
8978 */
8979
8980 dst_end = vm_map_round_page(dst_addr + dst_size,
8981 VM_MAP_PAGE_MASK(dst_map));
8982 vm_map_lock(dst_map);
8983
8984 start_pass_1:
8985 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8986 vm_map_unlock(dst_map);
8987 return KERN_INVALID_ADDRESS;
8988 }
8989
8990 vm_map_clip_start(dst_map,
8991 tmp_entry,
8992 vm_map_trunc_page(dst_addr,
8993 VM_MAP_PAGE_MASK(dst_map)));
8994 if (tmp_entry->is_sub_map) {
8995 /* clipping did unnest if needed */
8996 assert(!tmp_entry->use_pmap);
8997 }
8998
8999 for (entry = tmp_entry;;) {
9000 vm_map_entry_t next;
9001
9002 next = entry->vme_next;
9003 while (entry->is_sub_map) {
9004 vm_map_offset_t sub_start;
9005 vm_map_offset_t sub_end;
9006 vm_map_offset_t local_end;
9007
9008 if (entry->in_transition) {
9009 /*
9010 * Say that we are waiting, and wait for entry.
9011 */
9012 entry->needs_wakeup = TRUE;
9013 vm_map_entry_wait(dst_map, THREAD_UNINT);
9014
9015 goto start_pass_1;
9016 }
9017
9018 encountered_sub_map = TRUE;
9019 sub_start = VME_OFFSET(entry);
9020
9021 if (entry->vme_end < dst_end) {
9022 sub_end = entry->vme_end;
9023 } else {
9024 sub_end = dst_end;
9025 }
9026 sub_end -= entry->vme_start;
9027 sub_end += VME_OFFSET(entry);
9028 local_end = entry->vme_end;
9029 vm_map_unlock(dst_map);
9030
9031 result = vm_map_overwrite_submap_recurse(
9032 VME_SUBMAP(entry),
9033 sub_start,
9034 sub_end - sub_start);
9035
9036 if (result != KERN_SUCCESS) {
9037 return result;
9038 }
9039 if (dst_end <= entry->vme_end) {
9040 return KERN_SUCCESS;
9041 }
9042 vm_map_lock(dst_map);
9043 if (!vm_map_lookup_entry(dst_map, local_end,
9044 &tmp_entry)) {
9045 vm_map_unlock(dst_map);
9046 return KERN_INVALID_ADDRESS;
9047 }
9048 entry = tmp_entry;
9049 next = entry->vme_next;
9050 }
9051
9052 if (!(entry->protection & VM_PROT_WRITE)) {
9053 vm_map_unlock(dst_map);
9054 return KERN_PROTECTION_FAILURE;
9055 }
9056
9057 if (!vm_map_entry_is_overwritable(dst_map, entry)) {
9058 vm_map_unlock(dst_map);
9059 return KERN_PROTECTION_FAILURE;
9060 }
9061
9062 /*
9063 * If the entry is in transition, we must wait
9064 * for it to exit that state. Anything could happen
9065 * when we unlock the map, so start over.
9066 */
9067 if (entry->in_transition) {
9068 /*
9069 * Say that we are waiting, and wait for entry.
9070 */
9071 entry->needs_wakeup = TRUE;
9072 vm_map_entry_wait(dst_map, THREAD_UNINT);
9073
9074 goto start_pass_1;
9075 }
9076
9077 /*
9078 * our range is contained completely within this map entry
9079 */
9080 if (dst_end <= entry->vme_end) {
9081 vm_map_unlock(dst_map);
9082 return KERN_SUCCESS;
9083 }
9084 /*
9085 * check that range specified is contiguous region
9086 */
9087 if ((next == vm_map_to_entry(dst_map)) ||
9088 (next->vme_start != entry->vme_end)) {
9089 vm_map_unlock(dst_map);
9090 return KERN_INVALID_ADDRESS;
9091 }
9092
9093 /*
9094 * Check for permanent objects in the destination.
9095 */
9096 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
9097 ((!VME_OBJECT(entry)->internal) ||
9098 (VME_OBJECT(entry)->true_share))) {
9099 if (encountered_sub_map) {
9100 vm_map_unlock(dst_map);
9101 return KERN_FAILURE;
9102 }
9103 }
9104
9105
9106 entry = next;
9107 }/* for */
9108 vm_map_unlock(dst_map);
9109 return KERN_SUCCESS;
9110 }
9111
9112 /*
9113 * Routine: vm_map_copy_overwrite
9114 *
9115 * Description:
9116 * Copy the memory described by the map copy
9117 * object (copy; returned by vm_map_copyin) onto
9118 * the specified destination region (dst_map, dst_addr).
9119 * The destination must be writeable.
9120 *
9121 * Unlike vm_map_copyout, this routine actually
9122 * writes over previously-mapped memory. If the
9123 * previous mapping was to a permanent (user-supplied)
9124 * memory object, it is preserved.
9125 *
9126 * The attributes (protection and inheritance) of the
9127 * destination region are preserved.
9128 *
9129 * If successful, consumes the copy object.
9130 * Otherwise, the caller is responsible for it.
9131 *
9132 * Implementation notes:
9133 * To overwrite aligned temporary virtual memory, it is
9134 * sufficient to remove the previous mapping and insert
9135 * the new copy. This replacement is done either on
9136 * the whole region (if no permanent virtual memory
9137 * objects are embedded in the destination region) or
9138 * in individual map entries.
9139 *
9140 * To overwrite permanent virtual memory , it is necessary
9141 * to copy each page, as the external memory management
9142 * interface currently does not provide any optimizations.
9143 *
9144 * Unaligned memory also has to be copied. It is possible
9145 * to use 'vm_trickery' to copy the aligned data. This is
9146 * not done but not hard to implement.
9147 *
9148 * Once a page of permanent memory has been overwritten,
9149 * it is impossible to interrupt this function; otherwise,
9150 * the call would be neither atomic nor location-independent.
9151 * The kernel-state portion of a user thread must be
9152 * interruptible.
9153 *
9154 * It may be expensive to forward all requests that might
9155 * overwrite permanent memory (vm_write, vm_copy) to
9156 * uninterruptible kernel threads. This routine may be
9157 * called by interruptible threads; however, success is
9158 * not guaranteed -- if the request cannot be performed
9159 * atomically and interruptibly, an error indication is
9160 * returned.
9161 *
9162 * Callers of this function must call vm_map_copy_require on
9163 * previously created vm_map_copy_t or pass a newly created
9164 * one to ensure that it hasn't been forged.
9165 */
9166
9167 static kern_return_t
vm_map_copy_overwrite_nested(vm_map_t dst_map,vm_map_address_t dst_addr,vm_map_copy_t copy,boolean_t interruptible,pmap_t pmap,boolean_t discard_on_success)9168 vm_map_copy_overwrite_nested(
9169 vm_map_t dst_map,
9170 vm_map_address_t dst_addr,
9171 vm_map_copy_t copy,
9172 boolean_t interruptible,
9173 pmap_t pmap,
9174 boolean_t discard_on_success)
9175 {
9176 vm_map_offset_t dst_end;
9177 vm_map_entry_t tmp_entry;
9178 vm_map_entry_t entry;
9179 kern_return_t kr;
9180 boolean_t aligned = TRUE;
9181 boolean_t contains_permanent_objects = FALSE;
9182 boolean_t encountered_sub_map = FALSE;
9183 vm_map_offset_t base_addr;
9184 vm_map_size_t copy_size;
9185 vm_map_size_t total_size;
9186 uint16_t copy_page_shift;
9187
9188 /*
9189 * Check for special kernel buffer allocated
9190 * by new_ipc_kmsg_copyin.
9191 */
9192
9193 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
9194 return vm_map_copyout_kernel_buffer(
9195 dst_map, &dst_addr,
9196 copy, copy->size, TRUE, discard_on_success);
9197 }
9198
9199 /*
9200 * Only works for entry lists at the moment. Will
9201 * support page lists later.
9202 */
9203
9204 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9205
9206 if (copy->size == 0) {
9207 if (discard_on_success) {
9208 vm_map_copy_discard(copy);
9209 }
9210 return KERN_SUCCESS;
9211 }
9212
9213 copy_page_shift = copy->cpy_hdr.page_shift;
9214
9215 /*
9216 * Verify that the destination is all writeable
9217 * initially. We have to trunc the destination
9218 * address and round the copy size or we'll end up
9219 * splitting entries in strange ways.
9220 */
9221
9222 if (!VM_MAP_PAGE_ALIGNED(copy->size,
9223 VM_MAP_PAGE_MASK(dst_map)) ||
9224 !VM_MAP_PAGE_ALIGNED(copy->offset,
9225 VM_MAP_PAGE_MASK(dst_map)) ||
9226 !VM_MAP_PAGE_ALIGNED(dst_addr,
9227 VM_MAP_PAGE_MASK(dst_map)) ||
9228 copy_page_shift != VM_MAP_PAGE_SHIFT(dst_map)) {
9229 aligned = FALSE;
9230 dst_end = vm_map_round_page(dst_addr + copy->size,
9231 VM_MAP_PAGE_MASK(dst_map));
9232 } else {
9233 dst_end = dst_addr + copy->size;
9234 }
9235
9236 vm_map_lock(dst_map);
9237
9238 /* LP64todo - remove this check when vm_map_commpage64()
9239 * no longer has to stuff in a map_entry for the commpage
9240 * above the map's max_offset.
9241 */
9242 if (dst_addr >= dst_map->max_offset) {
9243 vm_map_unlock(dst_map);
9244 return KERN_INVALID_ADDRESS;
9245 }
9246
9247 start_pass_1:
9248 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
9249 vm_map_unlock(dst_map);
9250 return KERN_INVALID_ADDRESS;
9251 }
9252 vm_map_clip_start(dst_map,
9253 tmp_entry,
9254 vm_map_trunc_page(dst_addr,
9255 VM_MAP_PAGE_MASK(dst_map)));
9256 for (entry = tmp_entry;;) {
9257 vm_map_entry_t next = entry->vme_next;
9258
9259 while (entry->is_sub_map) {
9260 vm_map_offset_t sub_start;
9261 vm_map_offset_t sub_end;
9262 vm_map_offset_t local_end;
9263
9264 if (entry->in_transition) {
9265 /*
9266 * Say that we are waiting, and wait for entry.
9267 */
9268 entry->needs_wakeup = TRUE;
9269 vm_map_entry_wait(dst_map, THREAD_UNINT);
9270
9271 goto start_pass_1;
9272 }
9273
9274 local_end = entry->vme_end;
9275 if (!(entry->needs_copy)) {
9276 /* if needs_copy we are a COW submap */
9277 /* in such a case we just replace so */
9278 /* there is no need for the follow- */
9279 /* ing check. */
9280 encountered_sub_map = TRUE;
9281 sub_start = VME_OFFSET(entry);
9282
9283 if (entry->vme_end < dst_end) {
9284 sub_end = entry->vme_end;
9285 } else {
9286 sub_end = dst_end;
9287 }
9288 sub_end -= entry->vme_start;
9289 sub_end += VME_OFFSET(entry);
9290 vm_map_unlock(dst_map);
9291
9292 kr = vm_map_overwrite_submap_recurse(
9293 VME_SUBMAP(entry),
9294 sub_start,
9295 sub_end - sub_start);
9296 if (kr != KERN_SUCCESS) {
9297 return kr;
9298 }
9299 vm_map_lock(dst_map);
9300 }
9301
9302 if (dst_end <= entry->vme_end) {
9303 goto start_overwrite;
9304 }
9305 if (!vm_map_lookup_entry(dst_map, local_end,
9306 &entry)) {
9307 vm_map_unlock(dst_map);
9308 return KERN_INVALID_ADDRESS;
9309 }
9310 next = entry->vme_next;
9311 }
9312
9313 if (!(entry->protection & VM_PROT_WRITE)) {
9314 vm_map_unlock(dst_map);
9315 return KERN_PROTECTION_FAILURE;
9316 }
9317
9318 if (!vm_map_entry_is_overwritable(dst_map, entry)) {
9319 vm_map_unlock(dst_map);
9320 return KERN_PROTECTION_FAILURE;
9321 }
9322
9323 /*
9324 * If the entry is in transition, we must wait
9325 * for it to exit that state. Anything could happen
9326 * when we unlock the map, so start over.
9327 */
9328 if (entry->in_transition) {
9329 /*
9330 * Say that we are waiting, and wait for entry.
9331 */
9332 entry->needs_wakeup = TRUE;
9333 vm_map_entry_wait(dst_map, THREAD_UNINT);
9334
9335 goto start_pass_1;
9336 }
9337
9338 /*
9339 * our range is contained completely within this map entry
9340 */
9341 if (dst_end <= entry->vme_end) {
9342 break;
9343 }
9344 /*
9345 * check that range specified is contiguous region
9346 */
9347 if ((next == vm_map_to_entry(dst_map)) ||
9348 (next->vme_start != entry->vme_end)) {
9349 vm_map_unlock(dst_map);
9350 return KERN_INVALID_ADDRESS;
9351 }
9352
9353
9354 /*
9355 * Check for permanent objects in the destination.
9356 */
9357 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
9358 ((!VME_OBJECT(entry)->internal) ||
9359 (VME_OBJECT(entry)->true_share))) {
9360 contains_permanent_objects = TRUE;
9361 }
9362
9363 entry = next;
9364 }/* for */
9365
9366 start_overwrite:
9367 /*
9368 * If there are permanent objects in the destination, then
9369 * the copy cannot be interrupted.
9370 */
9371
9372 if (interruptible && contains_permanent_objects) {
9373 vm_map_unlock(dst_map);
9374 return KERN_FAILURE; /* XXX */
9375 }
9376
9377 /*
9378 *
9379 * Make a second pass, overwriting the data
9380 * At the beginning of each loop iteration,
9381 * the next entry to be overwritten is "tmp_entry"
9382 * (initially, the value returned from the lookup above),
9383 * and the starting address expected in that entry
9384 * is "start".
9385 */
9386
9387 total_size = copy->size;
9388 if (encountered_sub_map) {
9389 copy_size = 0;
9390 /* re-calculate tmp_entry since we've had the map */
9391 /* unlocked */
9392 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
9393 vm_map_unlock(dst_map);
9394 return KERN_INVALID_ADDRESS;
9395 }
9396 } else {
9397 copy_size = copy->size;
9398 }
9399
9400 base_addr = dst_addr;
9401 while (TRUE) {
9402 /* deconstruct the copy object and do in parts */
9403 /* only in sub_map, interruptable case */
9404 vm_map_entry_t copy_entry;
9405 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
9406 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
9407 int nentries;
9408 int remaining_entries = 0;
9409 vm_map_offset_t new_offset = 0;
9410
9411 for (entry = tmp_entry; copy_size == 0;) {
9412 vm_map_entry_t next;
9413
9414 next = entry->vme_next;
9415
9416 /* tmp_entry and base address are moved along */
9417 /* each time we encounter a sub-map. Otherwise */
9418 /* entry can outpase tmp_entry, and the copy_size */
9419 /* may reflect the distance between them */
9420 /* if the current entry is found to be in transition */
9421 /* we will start over at the beginning or the last */
9422 /* encounter of a submap as dictated by base_addr */
9423 /* we will zero copy_size accordingly. */
9424 if (entry->in_transition) {
9425 /*
9426 * Say that we are waiting, and wait for entry.
9427 */
9428 entry->needs_wakeup = TRUE;
9429 vm_map_entry_wait(dst_map, THREAD_UNINT);
9430
9431 if (!vm_map_lookup_entry(dst_map, base_addr,
9432 &tmp_entry)) {
9433 vm_map_unlock(dst_map);
9434 return KERN_INVALID_ADDRESS;
9435 }
9436 copy_size = 0;
9437 entry = tmp_entry;
9438 continue;
9439 }
9440 if (entry->is_sub_map) {
9441 vm_map_offset_t sub_start;
9442 vm_map_offset_t sub_end;
9443 vm_map_offset_t local_end;
9444
9445 if (entry->needs_copy) {
9446 /* if this is a COW submap */
9447 /* just back the range with a */
9448 /* anonymous entry */
9449 assert(!entry->vme_permanent);
9450 if (entry->vme_end < dst_end) {
9451 sub_end = entry->vme_end;
9452 } else {
9453 sub_end = dst_end;
9454 }
9455 if (entry->vme_start < base_addr) {
9456 sub_start = base_addr;
9457 } else {
9458 sub_start = entry->vme_start;
9459 }
9460 vm_map_clip_end(
9461 dst_map, entry, sub_end);
9462 vm_map_clip_start(
9463 dst_map, entry, sub_start);
9464 assert(!entry->use_pmap);
9465 assert(!entry->iokit_acct);
9466 entry->use_pmap = TRUE;
9467 vm_map_deallocate(VME_SUBMAP(entry));
9468 assert(!entry->vme_permanent);
9469 VME_OBJECT_SET(entry, VM_OBJECT_NULL, false, 0);
9470 VME_OFFSET_SET(entry, 0);
9471 entry->is_shared = FALSE;
9472 entry->needs_copy = FALSE;
9473 entry->protection = VM_PROT_DEFAULT;
9474 entry->max_protection = VM_PROT_ALL;
9475 entry->wired_count = 0;
9476 entry->user_wired_count = 0;
9477 if (entry->inheritance
9478 == VM_INHERIT_SHARE) {
9479 entry->inheritance = VM_INHERIT_COPY;
9480 }
9481 continue;
9482 }
9483 /* first take care of any non-sub_map */
9484 /* entries to send */
9485 if (base_addr < entry->vme_start) {
9486 /* stuff to send */
9487 copy_size =
9488 entry->vme_start - base_addr;
9489 break;
9490 }
9491 sub_start = VME_OFFSET(entry);
9492
9493 if (entry->vme_end < dst_end) {
9494 sub_end = entry->vme_end;
9495 } else {
9496 sub_end = dst_end;
9497 }
9498 sub_end -= entry->vme_start;
9499 sub_end += VME_OFFSET(entry);
9500 local_end = entry->vme_end;
9501 vm_map_unlock(dst_map);
9502 copy_size = sub_end - sub_start;
9503
9504 /* adjust the copy object */
9505 if (total_size > copy_size) {
9506 vm_map_size_t local_size = 0;
9507 vm_map_size_t entry_size;
9508
9509 nentries = 1;
9510 new_offset = copy->offset;
9511 copy_entry = vm_map_copy_first_entry(copy);
9512 while (copy_entry !=
9513 vm_map_copy_to_entry(copy)) {
9514 entry_size = copy_entry->vme_end -
9515 copy_entry->vme_start;
9516 if ((local_size < copy_size) &&
9517 ((local_size + entry_size)
9518 >= copy_size)) {
9519 vm_map_copy_clip_end(copy,
9520 copy_entry,
9521 copy_entry->vme_start +
9522 (copy_size - local_size));
9523 entry_size = copy_entry->vme_end -
9524 copy_entry->vme_start;
9525 local_size += entry_size;
9526 new_offset += entry_size;
9527 }
9528 if (local_size >= copy_size) {
9529 next_copy = copy_entry->vme_next;
9530 copy_entry->vme_next =
9531 vm_map_copy_to_entry(copy);
9532 previous_prev =
9533 copy->cpy_hdr.links.prev;
9534 copy->cpy_hdr.links.prev = copy_entry;
9535 copy->size = copy_size;
9536 remaining_entries =
9537 copy->cpy_hdr.nentries;
9538 remaining_entries -= nentries;
9539 copy->cpy_hdr.nentries = nentries;
9540 break;
9541 } else {
9542 local_size += entry_size;
9543 new_offset += entry_size;
9544 nentries++;
9545 }
9546 copy_entry = copy_entry->vme_next;
9547 }
9548 }
9549
9550 if ((entry->use_pmap) && (pmap == NULL)) {
9551 kr = vm_map_copy_overwrite_nested(
9552 VME_SUBMAP(entry),
9553 sub_start,
9554 copy,
9555 interruptible,
9556 VME_SUBMAP(entry)->pmap,
9557 TRUE);
9558 } else if (pmap != NULL) {
9559 kr = vm_map_copy_overwrite_nested(
9560 VME_SUBMAP(entry),
9561 sub_start,
9562 copy,
9563 interruptible, pmap,
9564 TRUE);
9565 } else {
9566 kr = vm_map_copy_overwrite_nested(
9567 VME_SUBMAP(entry),
9568 sub_start,
9569 copy,
9570 interruptible,
9571 dst_map->pmap,
9572 TRUE);
9573 }
9574 if (kr != KERN_SUCCESS) {
9575 if (next_copy != NULL) {
9576 copy->cpy_hdr.nentries +=
9577 remaining_entries;
9578 copy->cpy_hdr.links.prev->vme_next =
9579 next_copy;
9580 copy->cpy_hdr.links.prev
9581 = previous_prev;
9582 copy->size = total_size;
9583 }
9584 return kr;
9585 }
9586 if (dst_end <= local_end) {
9587 return KERN_SUCCESS;
9588 }
9589 /* otherwise copy no longer exists, it was */
9590 /* destroyed after successful copy_overwrite */
9591 copy = vm_map_copy_allocate();
9592 copy->type = VM_MAP_COPY_ENTRY_LIST;
9593 copy->offset = new_offset;
9594 copy->cpy_hdr.page_shift = copy_page_shift;
9595
9596 /*
9597 * XXX FBDP
9598 * this does not seem to deal with
9599 * the VM map store (R&B tree)
9600 */
9601
9602 total_size -= copy_size;
9603 copy_size = 0;
9604 /* put back remainder of copy in container */
9605 if (next_copy != NULL) {
9606 copy->cpy_hdr.nentries = remaining_entries;
9607 copy->cpy_hdr.links.next = next_copy;
9608 copy->cpy_hdr.links.prev = previous_prev;
9609 copy->size = total_size;
9610 next_copy->vme_prev =
9611 vm_map_copy_to_entry(copy);
9612 next_copy = NULL;
9613 }
9614 base_addr = local_end;
9615 vm_map_lock(dst_map);
9616 if (!vm_map_lookup_entry(dst_map,
9617 local_end, &tmp_entry)) {
9618 vm_map_unlock(dst_map);
9619 return KERN_INVALID_ADDRESS;
9620 }
9621 entry = tmp_entry;
9622 continue;
9623 }
9624 if (dst_end <= entry->vme_end) {
9625 copy_size = dst_end - base_addr;
9626 break;
9627 }
9628
9629 if ((next == vm_map_to_entry(dst_map)) ||
9630 (next->vme_start != entry->vme_end)) {
9631 vm_map_unlock(dst_map);
9632 return KERN_INVALID_ADDRESS;
9633 }
9634
9635 entry = next;
9636 }/* for */
9637
9638 next_copy = NULL;
9639 nentries = 1;
9640
9641 /* adjust the copy object */
9642 if (total_size > copy_size) {
9643 vm_map_size_t local_size = 0;
9644 vm_map_size_t entry_size;
9645
9646 new_offset = copy->offset;
9647 copy_entry = vm_map_copy_first_entry(copy);
9648 while (copy_entry != vm_map_copy_to_entry(copy)) {
9649 entry_size = copy_entry->vme_end -
9650 copy_entry->vme_start;
9651 if ((local_size < copy_size) &&
9652 ((local_size + entry_size)
9653 >= copy_size)) {
9654 vm_map_copy_clip_end(copy, copy_entry,
9655 copy_entry->vme_start +
9656 (copy_size - local_size));
9657 entry_size = copy_entry->vme_end -
9658 copy_entry->vme_start;
9659 local_size += entry_size;
9660 new_offset += entry_size;
9661 }
9662 if (local_size >= copy_size) {
9663 next_copy = copy_entry->vme_next;
9664 copy_entry->vme_next =
9665 vm_map_copy_to_entry(copy);
9666 previous_prev =
9667 copy->cpy_hdr.links.prev;
9668 copy->cpy_hdr.links.prev = copy_entry;
9669 copy->size = copy_size;
9670 remaining_entries =
9671 copy->cpy_hdr.nentries;
9672 remaining_entries -= nentries;
9673 copy->cpy_hdr.nentries = nentries;
9674 break;
9675 } else {
9676 local_size += entry_size;
9677 new_offset += entry_size;
9678 nentries++;
9679 }
9680 copy_entry = copy_entry->vme_next;
9681 }
9682 }
9683
9684 if (aligned) {
9685 pmap_t local_pmap;
9686
9687 if (pmap) {
9688 local_pmap = pmap;
9689 } else {
9690 local_pmap = dst_map->pmap;
9691 }
9692
9693 if ((kr = vm_map_copy_overwrite_aligned(
9694 dst_map, tmp_entry, copy,
9695 base_addr, local_pmap)) != KERN_SUCCESS) {
9696 if (next_copy != NULL) {
9697 copy->cpy_hdr.nentries +=
9698 remaining_entries;
9699 copy->cpy_hdr.links.prev->vme_next =
9700 next_copy;
9701 copy->cpy_hdr.links.prev =
9702 previous_prev;
9703 copy->size += copy_size;
9704 }
9705 return kr;
9706 }
9707 vm_map_unlock(dst_map);
9708 } else {
9709 /*
9710 * Performance gain:
9711 *
9712 * if the copy and dst address are misaligned but the same
9713 * offset within the page we can copy_not_aligned the
9714 * misaligned parts and copy aligned the rest. If they are
9715 * aligned but len is unaligned we simply need to copy
9716 * the end bit unaligned. We'll need to split the misaligned
9717 * bits of the region in this case !
9718 */
9719 /* ALWAYS UNLOCKS THE dst_map MAP */
9720 kr = vm_map_copy_overwrite_unaligned(
9721 dst_map,
9722 tmp_entry,
9723 copy,
9724 base_addr,
9725 discard_on_success);
9726 if (kr != KERN_SUCCESS) {
9727 if (next_copy != NULL) {
9728 copy->cpy_hdr.nentries +=
9729 remaining_entries;
9730 copy->cpy_hdr.links.prev->vme_next =
9731 next_copy;
9732 copy->cpy_hdr.links.prev =
9733 previous_prev;
9734 copy->size += copy_size;
9735 }
9736 return kr;
9737 }
9738 }
9739 total_size -= copy_size;
9740 if (total_size == 0) {
9741 break;
9742 }
9743 base_addr += copy_size;
9744 copy_size = 0;
9745 copy->offset = new_offset;
9746 if (next_copy != NULL) {
9747 copy->cpy_hdr.nentries = remaining_entries;
9748 copy->cpy_hdr.links.next = next_copy;
9749 copy->cpy_hdr.links.prev = previous_prev;
9750 next_copy->vme_prev = vm_map_copy_to_entry(copy);
9751 copy->size = total_size;
9752 }
9753 vm_map_lock(dst_map);
9754 while (TRUE) {
9755 if (!vm_map_lookup_entry(dst_map,
9756 base_addr, &tmp_entry)) {
9757 vm_map_unlock(dst_map);
9758 return KERN_INVALID_ADDRESS;
9759 }
9760 if (tmp_entry->in_transition) {
9761 entry->needs_wakeup = TRUE;
9762 vm_map_entry_wait(dst_map, THREAD_UNINT);
9763 } else {
9764 break;
9765 }
9766 }
9767 vm_map_clip_start(dst_map,
9768 tmp_entry,
9769 vm_map_trunc_page(base_addr,
9770 VM_MAP_PAGE_MASK(dst_map)));
9771
9772 entry = tmp_entry;
9773 } /* while */
9774
9775 /*
9776 * Throw away the vm_map_copy object
9777 */
9778 if (discard_on_success) {
9779 vm_map_copy_discard(copy);
9780 }
9781
9782 return KERN_SUCCESS;
9783 }/* vm_map_copy_overwrite */
9784
9785 kern_return_t
vm_map_copy_overwrite(vm_map_t dst_map,vm_map_offset_t dst_addr,vm_map_copy_t copy,vm_map_size_t copy_size,boolean_t interruptible)9786 vm_map_copy_overwrite(
9787 vm_map_t dst_map,
9788 vm_map_offset_t dst_addr,
9789 vm_map_copy_t copy,
9790 vm_map_size_t copy_size,
9791 boolean_t interruptible)
9792 {
9793 vm_map_size_t head_size, tail_size;
9794 vm_map_copy_t head_copy, tail_copy;
9795 vm_map_offset_t head_addr, tail_addr;
9796 vm_map_entry_t entry;
9797 kern_return_t kr;
9798 vm_map_offset_t effective_page_mask, effective_page_size;
9799 uint16_t copy_page_shift;
9800
9801 head_size = 0;
9802 tail_size = 0;
9803 head_copy = NULL;
9804 tail_copy = NULL;
9805 head_addr = 0;
9806 tail_addr = 0;
9807
9808 /*
9809 * Check for null copy object.
9810 */
9811 if (copy == VM_MAP_COPY_NULL) {
9812 return KERN_SUCCESS;
9813 }
9814
9815 /*
9816 * Assert that the vm_map_copy is coming from the right
9817 * zone and hasn't been forged
9818 */
9819 vm_map_copy_require(copy);
9820
9821 if (interruptible ||
9822 copy->type != VM_MAP_COPY_ENTRY_LIST) {
9823 /*
9824 * We can't split the "copy" map if we're interruptible
9825 * or if we don't have a "copy" map...
9826 */
9827 blunt_copy:
9828 return vm_map_copy_overwrite_nested(dst_map,
9829 dst_addr,
9830 copy,
9831 interruptible,
9832 (pmap_t) NULL,
9833 TRUE);
9834 }
9835
9836 copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy);
9837 if (copy_page_shift < PAGE_SHIFT ||
9838 VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
9839 goto blunt_copy;
9840 }
9841
9842 if (VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
9843 effective_page_mask = VM_MAP_PAGE_MASK(dst_map);
9844 } else {
9845 effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
9846 effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
9847 effective_page_mask);
9848 }
9849 effective_page_size = effective_page_mask + 1;
9850
9851 if (copy_size < VM_MAP_COPY_OVERWRITE_OPTIMIZATION_THRESHOLD_PAGES * effective_page_size) {
9852 /*
9853 * Too small to bother with optimizing...
9854 */
9855 goto blunt_copy;
9856 }
9857
9858 if ((dst_addr & effective_page_mask) !=
9859 (copy->offset & effective_page_mask)) {
9860 /*
9861 * Incompatible mis-alignment of source and destination...
9862 */
9863 goto blunt_copy;
9864 }
9865
9866 /*
9867 * Proper alignment or identical mis-alignment at the beginning.
9868 * Let's try and do a small unaligned copy first (if needed)
9869 * and then an aligned copy for the rest.
9870 */
9871 if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
9872 head_addr = dst_addr;
9873 head_size = (effective_page_size -
9874 (copy->offset & effective_page_mask));
9875 head_size = MIN(head_size, copy_size);
9876 }
9877 if (!vm_map_page_aligned(copy->offset + copy_size,
9878 effective_page_mask)) {
9879 /*
9880 * Mis-alignment at the end.
9881 * Do an aligned copy up to the last page and
9882 * then an unaligned copy for the remaining bytes.
9883 */
9884 tail_size = ((copy->offset + copy_size) &
9885 effective_page_mask);
9886 tail_size = MIN(tail_size, copy_size);
9887 tail_addr = dst_addr + copy_size - tail_size;
9888 assert(tail_addr >= head_addr + head_size);
9889 }
9890 assert(head_size + tail_size <= copy_size);
9891
9892 if (head_size + tail_size == copy_size) {
9893 /*
9894 * It's all unaligned, no optimization possible...
9895 */
9896 goto blunt_copy;
9897 }
9898
9899 /*
9900 * Can't optimize if there are any submaps in the
9901 * destination due to the way we free the "copy" map
9902 * progressively in vm_map_copy_overwrite_nested()
9903 * in that case.
9904 */
9905 vm_map_lock_read(dst_map);
9906 if (!vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
9907 vm_map_unlock_read(dst_map);
9908 goto blunt_copy;
9909 }
9910 for (;
9911 (entry != vm_map_to_entry(dst_map) &&
9912 entry->vme_start < dst_addr + copy_size);
9913 entry = entry->vme_next) {
9914 if (entry->is_sub_map) {
9915 vm_map_unlock_read(dst_map);
9916 goto blunt_copy;
9917 }
9918 }
9919 vm_map_unlock_read(dst_map);
9920
9921 if (head_size) {
9922 /*
9923 * Unaligned copy of the first "head_size" bytes, to reach
9924 * a page boundary.
9925 */
9926
9927 /*
9928 * Extract "head_copy" out of "copy".
9929 */
9930 head_copy = vm_map_copy_allocate();
9931 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
9932 head_copy->cpy_hdr.entries_pageable =
9933 copy->cpy_hdr.entries_pageable;
9934 vm_map_store_init(&head_copy->cpy_hdr);
9935 head_copy->cpy_hdr.page_shift = copy_page_shift;
9936
9937 entry = vm_map_copy_first_entry(copy);
9938 if (entry->vme_end < copy->offset + head_size) {
9939 head_size = entry->vme_end - copy->offset;
9940 }
9941
9942 head_copy->offset = copy->offset;
9943 head_copy->size = head_size;
9944 copy->offset += head_size;
9945 copy->size -= head_size;
9946 copy_size -= head_size;
9947 assert(copy_size > 0);
9948
9949 vm_map_copy_clip_end(copy, entry, copy->offset);
9950 vm_map_copy_entry_unlink(copy, entry);
9951 vm_map_copy_entry_link(head_copy,
9952 vm_map_copy_to_entry(head_copy),
9953 entry);
9954
9955 /*
9956 * Do the unaligned copy.
9957 */
9958 kr = vm_map_copy_overwrite_nested(dst_map,
9959 head_addr,
9960 head_copy,
9961 interruptible,
9962 (pmap_t) NULL,
9963 FALSE);
9964 if (kr != KERN_SUCCESS) {
9965 goto done;
9966 }
9967 }
9968
9969 if (tail_size) {
9970 /*
9971 * Extract "tail_copy" out of "copy".
9972 */
9973 tail_copy = vm_map_copy_allocate();
9974 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
9975 tail_copy->cpy_hdr.entries_pageable =
9976 copy->cpy_hdr.entries_pageable;
9977 vm_map_store_init(&tail_copy->cpy_hdr);
9978 tail_copy->cpy_hdr.page_shift = copy_page_shift;
9979
9980 tail_copy->offset = copy->offset + copy_size - tail_size;
9981 tail_copy->size = tail_size;
9982
9983 copy->size -= tail_size;
9984 copy_size -= tail_size;
9985 assert(copy_size > 0);
9986
9987 entry = vm_map_copy_last_entry(copy);
9988 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
9989 entry = vm_map_copy_last_entry(copy);
9990 vm_map_copy_entry_unlink(copy, entry);
9991 vm_map_copy_entry_link(tail_copy,
9992 vm_map_copy_last_entry(tail_copy),
9993 entry);
9994 }
9995
9996 /*
9997 * If we are here from ipc_kmsg_copyout_ool_descriptor(),
9998 * we want to avoid TOCTOU issues w.r.t copy->size but
9999 * we don't need to change vm_map_copy_overwrite_nested()
10000 * and all other vm_map_copy_overwrite variants.
10001 *
10002 * So we assign the original copy_size that was passed into
10003 * this routine back to copy.
10004 *
10005 * This use of local 'copy_size' passed into this routine is
10006 * to try and protect against TOCTOU attacks where the kernel
10007 * has been exploited. We don't expect this to be an issue
10008 * during normal system operation.
10009 */
10010 assertf(copy->size == copy_size,
10011 "Mismatch of copy sizes. Expected 0x%llx, Got 0x%llx\n", (uint64_t) copy_size, (uint64_t) copy->size);
10012 copy->size = copy_size;
10013
10014 /*
10015 * Copy most (or possibly all) of the data.
10016 */
10017 kr = vm_map_copy_overwrite_nested(dst_map,
10018 dst_addr + head_size,
10019 copy,
10020 interruptible,
10021 (pmap_t) NULL,
10022 FALSE);
10023 if (kr != KERN_SUCCESS) {
10024 goto done;
10025 }
10026
10027 if (tail_size) {
10028 kr = vm_map_copy_overwrite_nested(dst_map,
10029 tail_addr,
10030 tail_copy,
10031 interruptible,
10032 (pmap_t) NULL,
10033 FALSE);
10034 }
10035
10036 done:
10037 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
10038 if (kr == KERN_SUCCESS) {
10039 /*
10040 * Discard all the copy maps.
10041 */
10042 if (head_copy) {
10043 vm_map_copy_discard(head_copy);
10044 head_copy = NULL;
10045 }
10046 vm_map_copy_discard(copy);
10047 if (tail_copy) {
10048 vm_map_copy_discard(tail_copy);
10049 tail_copy = NULL;
10050 }
10051 } else {
10052 /*
10053 * Re-assemble the original copy map.
10054 */
10055 if (head_copy) {
10056 entry = vm_map_copy_first_entry(head_copy);
10057 vm_map_copy_entry_unlink(head_copy, entry);
10058 vm_map_copy_entry_link(copy,
10059 vm_map_copy_to_entry(copy),
10060 entry);
10061 copy->offset -= head_size;
10062 copy->size += head_size;
10063 vm_map_copy_discard(head_copy);
10064 head_copy = NULL;
10065 }
10066 if (tail_copy) {
10067 entry = vm_map_copy_last_entry(tail_copy);
10068 vm_map_copy_entry_unlink(tail_copy, entry);
10069 vm_map_copy_entry_link(copy,
10070 vm_map_copy_last_entry(copy),
10071 entry);
10072 copy->size += tail_size;
10073 vm_map_copy_discard(tail_copy);
10074 tail_copy = NULL;
10075 }
10076 }
10077 return kr;
10078 }
10079
10080
10081 /*
10082 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
10083 *
10084 * Decription:
10085 * Physically copy unaligned data
10086 *
10087 * Implementation:
10088 * Unaligned parts of pages have to be physically copied. We use
10089 * a modified form of vm_fault_copy (which understands none-aligned
10090 * page offsets and sizes) to do the copy. We attempt to copy as
10091 * much memory in one go as possibly, however vm_fault_copy copies
10092 * within 1 memory object so we have to find the smaller of "amount left"
10093 * "source object data size" and "target object data size". With
10094 * unaligned data we don't need to split regions, therefore the source
10095 * (copy) object should be one map entry, the target range may be split
10096 * over multiple map entries however. In any event we are pessimistic
10097 * about these assumptions.
10098 *
10099 * Callers of this function must call vm_map_copy_require on
10100 * previously created vm_map_copy_t or pass a newly created
10101 * one to ensure that it hasn't been forged.
10102 *
10103 * Assumptions:
10104 * dst_map is locked on entry and is return locked on success,
10105 * unlocked on error.
10106 */
10107
10108 static kern_return_t
vm_map_copy_overwrite_unaligned(vm_map_t dst_map,vm_map_entry_t entry,vm_map_copy_t copy,vm_map_offset_t start,boolean_t discard_on_success)10109 vm_map_copy_overwrite_unaligned(
10110 vm_map_t dst_map,
10111 vm_map_entry_t entry,
10112 vm_map_copy_t copy,
10113 vm_map_offset_t start,
10114 boolean_t discard_on_success)
10115 {
10116 vm_map_entry_t copy_entry;
10117 vm_map_entry_t copy_entry_next;
10118 vm_map_version_t version;
10119 vm_object_t dst_object;
10120 vm_object_offset_t dst_offset;
10121 vm_object_offset_t src_offset;
10122 vm_object_offset_t entry_offset;
10123 vm_map_offset_t entry_end;
10124 vm_map_size_t src_size,
10125 dst_size,
10126 copy_size,
10127 amount_left;
10128 kern_return_t kr = KERN_SUCCESS;
10129
10130
10131 copy_entry = vm_map_copy_first_entry(copy);
10132
10133 vm_map_lock_write_to_read(dst_map);
10134
10135 src_offset = copy->offset - trunc_page_mask_64(copy->offset, VM_MAP_COPY_PAGE_MASK(copy));
10136 amount_left = copy->size;
10137 /*
10138 * unaligned so we never clipped this entry, we need the offset into
10139 * the vm_object not just the data.
10140 */
10141 while (amount_left > 0) {
10142 if (entry == vm_map_to_entry(dst_map)) {
10143 vm_map_unlock_read(dst_map);
10144 return KERN_INVALID_ADDRESS;
10145 }
10146
10147 /* "start" must be within the current map entry */
10148 assert((start >= entry->vme_start) && (start < entry->vme_end));
10149
10150 /*
10151 * Check protection again
10152 */
10153 if (!(entry->protection & VM_PROT_WRITE)) {
10154 vm_map_unlock_read(dst_map);
10155 return KERN_PROTECTION_FAILURE;
10156 }
10157 if (!vm_map_entry_is_overwritable(dst_map, entry)) {
10158 vm_map_unlock_read(dst_map);
10159 return KERN_PROTECTION_FAILURE;
10160 }
10161
10162 dst_offset = start - entry->vme_start;
10163
10164 dst_size = entry->vme_end - start;
10165
10166 src_size = copy_entry->vme_end -
10167 (copy_entry->vme_start + src_offset);
10168
10169 if (dst_size < src_size) {
10170 /*
10171 * we can only copy dst_size bytes before
10172 * we have to get the next destination entry
10173 */
10174 copy_size = dst_size;
10175 } else {
10176 /*
10177 * we can only copy src_size bytes before
10178 * we have to get the next source copy entry
10179 */
10180 copy_size = src_size;
10181 }
10182
10183 if (copy_size > amount_left) {
10184 copy_size = amount_left;
10185 }
10186 /*
10187 * Entry needs copy, create a shadow shadow object for
10188 * Copy on write region.
10189 */
10190 if (entry->needs_copy) {
10191 if (vm_map_lock_read_to_write(dst_map)) {
10192 vm_map_lock_read(dst_map);
10193 goto RetryLookup;
10194 }
10195 VME_OBJECT_SHADOW(entry,
10196 (vm_map_size_t)(entry->vme_end
10197 - entry->vme_start),
10198 vm_map_always_shadow(dst_map));
10199 entry->needs_copy = FALSE;
10200 vm_map_lock_write_to_read(dst_map);
10201 }
10202 dst_object = VME_OBJECT(entry);
10203 /*
10204 * unlike with the virtual (aligned) copy we're going
10205 * to fault on it therefore we need a target object.
10206 */
10207 if (dst_object == VM_OBJECT_NULL) {
10208 if (vm_map_lock_read_to_write(dst_map)) {
10209 vm_map_lock_read(dst_map);
10210 goto RetryLookup;
10211 }
10212 dst_object = vm_object_allocate((vm_map_size_t)
10213 entry->vme_end - entry->vme_start);
10214 VME_OBJECT_SET(entry, dst_object, false, 0);
10215 VME_OFFSET_SET(entry, 0);
10216 assert(entry->use_pmap);
10217 vm_map_lock_write_to_read(dst_map);
10218 }
10219 /*
10220 * Take an object reference and unlock map. The "entry" may
10221 * disappear or change when the map is unlocked.
10222 */
10223 vm_object_reference(dst_object);
10224 version.main_timestamp = dst_map->timestamp;
10225 entry_offset = VME_OFFSET(entry);
10226 entry_end = entry->vme_end;
10227 vm_map_unlock_read(dst_map);
10228 /*
10229 * Copy as much as possible in one pass
10230 */
10231 kr = vm_fault_copy(
10232 VME_OBJECT(copy_entry),
10233 VME_OFFSET(copy_entry) + src_offset,
10234 ©_size,
10235 dst_object,
10236 entry_offset + dst_offset,
10237 dst_map,
10238 &version,
10239 THREAD_UNINT );
10240
10241 start += copy_size;
10242 src_offset += copy_size;
10243 amount_left -= copy_size;
10244 /*
10245 * Release the object reference
10246 */
10247 vm_object_deallocate(dst_object);
10248 /*
10249 * If a hard error occurred, return it now
10250 */
10251 if (kr != KERN_SUCCESS) {
10252 return kr;
10253 }
10254
10255 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
10256 || amount_left == 0) {
10257 /*
10258 * all done with this copy entry, dispose.
10259 */
10260 copy_entry_next = copy_entry->vme_next;
10261
10262 if (discard_on_success) {
10263 vm_map_copy_entry_unlink(copy, copy_entry);
10264 assert(!copy_entry->is_sub_map);
10265 vm_object_deallocate(VME_OBJECT(copy_entry));
10266 vm_map_copy_entry_dispose(copy_entry);
10267 }
10268
10269 if (copy_entry_next == vm_map_copy_to_entry(copy) &&
10270 amount_left) {
10271 /*
10272 * not finished copying but run out of source
10273 */
10274 return KERN_INVALID_ADDRESS;
10275 }
10276
10277 copy_entry = copy_entry_next;
10278
10279 src_offset = 0;
10280 }
10281
10282 if (amount_left == 0) {
10283 return KERN_SUCCESS;
10284 }
10285
10286 vm_map_lock_read(dst_map);
10287 if (version.main_timestamp == dst_map->timestamp) {
10288 if (start == entry_end) {
10289 /*
10290 * destination region is split. Use the version
10291 * information to avoid a lookup in the normal
10292 * case.
10293 */
10294 entry = entry->vme_next;
10295 /*
10296 * should be contiguous. Fail if we encounter
10297 * a hole in the destination.
10298 */
10299 if (start != entry->vme_start) {
10300 vm_map_unlock_read(dst_map);
10301 return KERN_INVALID_ADDRESS;
10302 }
10303 }
10304 } else {
10305 /*
10306 * Map version check failed.
10307 * we must lookup the entry because somebody
10308 * might have changed the map behind our backs.
10309 */
10310 RetryLookup:
10311 if (!vm_map_lookup_entry(dst_map, start, &entry)) {
10312 vm_map_unlock_read(dst_map);
10313 return KERN_INVALID_ADDRESS;
10314 }
10315 }
10316 }/* while */
10317
10318 return KERN_SUCCESS;
10319 }/* vm_map_copy_overwrite_unaligned */
10320
10321 /*
10322 * Routine: vm_map_copy_overwrite_aligned [internal use only]
10323 *
10324 * Description:
10325 * Does all the vm_trickery possible for whole pages.
10326 *
10327 * Implementation:
10328 *
10329 * If there are no permanent objects in the destination,
10330 * and the source and destination map entry zones match,
10331 * and the destination map entry is not shared,
10332 * then the map entries can be deleted and replaced
10333 * with those from the copy. The following code is the
10334 * basic idea of what to do, but there are lots of annoying
10335 * little details about getting protection and inheritance
10336 * right. Should add protection, inheritance, and sharing checks
10337 * to the above pass and make sure that no wiring is involved.
10338 *
10339 * Callers of this function must call vm_map_copy_require on
10340 * previously created vm_map_copy_t or pass a newly created
10341 * one to ensure that it hasn't been forged.
10342 */
10343
10344 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
10345 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
10346 int vm_map_copy_overwrite_aligned_src_large = 0;
10347
10348 static kern_return_t
vm_map_copy_overwrite_aligned(vm_map_t dst_map,vm_map_entry_t tmp_entry,vm_map_copy_t copy,vm_map_offset_t start,__unused pmap_t pmap)10349 vm_map_copy_overwrite_aligned(
10350 vm_map_t dst_map,
10351 vm_map_entry_t tmp_entry,
10352 vm_map_copy_t copy,
10353 vm_map_offset_t start,
10354 __unused pmap_t pmap)
10355 {
10356 vm_object_t object;
10357 vm_map_entry_t copy_entry;
10358 vm_map_size_t copy_size;
10359 vm_map_size_t size;
10360 vm_map_entry_t entry;
10361
10362 while ((copy_entry = vm_map_copy_first_entry(copy))
10363 != vm_map_copy_to_entry(copy)) {
10364 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
10365
10366 entry = tmp_entry;
10367 if (entry->is_sub_map) {
10368 /* unnested when clipped earlier */
10369 assert(!entry->use_pmap);
10370 }
10371 if (entry == vm_map_to_entry(dst_map)) {
10372 vm_map_unlock(dst_map);
10373 return KERN_INVALID_ADDRESS;
10374 }
10375 size = (entry->vme_end - entry->vme_start);
10376 /*
10377 * Make sure that no holes popped up in the
10378 * address map, and that the protection is
10379 * still valid, in case the map was unlocked
10380 * earlier.
10381 */
10382
10383 if ((entry->vme_start != start) || ((entry->is_sub_map)
10384 && !entry->needs_copy)) {
10385 vm_map_unlock(dst_map);
10386 return KERN_INVALID_ADDRESS;
10387 }
10388 assert(entry != vm_map_to_entry(dst_map));
10389
10390 /*
10391 * Check protection again
10392 */
10393
10394 if (!(entry->protection & VM_PROT_WRITE)) {
10395 vm_map_unlock(dst_map);
10396 return KERN_PROTECTION_FAILURE;
10397 }
10398
10399 if (!vm_map_entry_is_overwritable(dst_map, entry)) {
10400 vm_map_unlock(dst_map);
10401 return KERN_PROTECTION_FAILURE;
10402 }
10403
10404 /*
10405 * Adjust to source size first
10406 */
10407
10408 if (copy_size < size) {
10409 if (entry->map_aligned &&
10410 !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
10411 VM_MAP_PAGE_MASK(dst_map))) {
10412 /* no longer map-aligned */
10413 entry->map_aligned = FALSE;
10414 }
10415 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
10416 size = copy_size;
10417 }
10418
10419 /*
10420 * Adjust to destination size
10421 */
10422
10423 if (size < copy_size) {
10424 vm_map_copy_clip_end(copy, copy_entry,
10425 copy_entry->vme_start + size);
10426 copy_size = size;
10427 }
10428
10429 assert((entry->vme_end - entry->vme_start) == size);
10430 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
10431 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
10432
10433 /*
10434 * If the destination contains temporary unshared memory,
10435 * we can perform the copy by throwing it away and
10436 * installing the source data.
10437 */
10438
10439 object = VME_OBJECT(entry);
10440 if ((!entry->is_shared &&
10441 ((object == VM_OBJECT_NULL) ||
10442 (object->internal && !object->true_share))) ||
10443 entry->needs_copy) {
10444 vm_object_t old_object = VME_OBJECT(entry);
10445 vm_object_offset_t old_offset = VME_OFFSET(entry);
10446 vm_object_offset_t offset;
10447
10448 /*
10449 * Ensure that the source and destination aren't
10450 * identical
10451 */
10452 if (old_object == VME_OBJECT(copy_entry) &&
10453 old_offset == VME_OFFSET(copy_entry)) {
10454 vm_map_copy_entry_unlink(copy, copy_entry);
10455 vm_map_copy_entry_dispose(copy_entry);
10456
10457 if (old_object != VM_OBJECT_NULL) {
10458 vm_object_deallocate(old_object);
10459 }
10460
10461 start = tmp_entry->vme_end;
10462 tmp_entry = tmp_entry->vme_next;
10463 continue;
10464 }
10465
10466 #if XNU_TARGET_OS_OSX
10467 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
10468 #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
10469 if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
10470 VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
10471 copy_size <= __TRADEOFF1_COPY_SIZE) {
10472 /*
10473 * Virtual vs. Physical copy tradeoff #1.
10474 *
10475 * Copying only a few pages out of a large
10476 * object: do a physical copy instead of
10477 * a virtual copy, to avoid possibly keeping
10478 * the entire large object alive because of
10479 * those few copy-on-write pages.
10480 */
10481 vm_map_copy_overwrite_aligned_src_large++;
10482 goto slow_copy;
10483 }
10484 #endif /* XNU_TARGET_OS_OSX */
10485
10486 if ((dst_map->pmap != kernel_pmap) &&
10487 (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
10488 (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_MEDIUM)) {
10489 vm_object_t new_object, new_shadow;
10490
10491 /*
10492 * We're about to map something over a mapping
10493 * established by malloc()...
10494 */
10495 new_object = VME_OBJECT(copy_entry);
10496 if (new_object != VM_OBJECT_NULL) {
10497 vm_object_lock_shared(new_object);
10498 }
10499 while (new_object != VM_OBJECT_NULL &&
10500 #if XNU_TARGET_OS_OSX
10501 !new_object->true_share &&
10502 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10503 #endif /* XNU_TARGET_OS_OSX */
10504 new_object->internal) {
10505 new_shadow = new_object->shadow;
10506 if (new_shadow == VM_OBJECT_NULL) {
10507 break;
10508 }
10509 vm_object_lock_shared(new_shadow);
10510 vm_object_unlock(new_object);
10511 new_object = new_shadow;
10512 }
10513 if (new_object != VM_OBJECT_NULL) {
10514 if (!new_object->internal) {
10515 /*
10516 * The new mapping is backed
10517 * by an external object. We
10518 * don't want malloc'ed memory
10519 * to be replaced with such a
10520 * non-anonymous mapping, so
10521 * let's go off the optimized
10522 * path...
10523 */
10524 vm_map_copy_overwrite_aligned_src_not_internal++;
10525 vm_object_unlock(new_object);
10526 goto slow_copy;
10527 }
10528 #if XNU_TARGET_OS_OSX
10529 if (new_object->true_share ||
10530 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
10531 /*
10532 * Same if there's a "true_share"
10533 * object in the shadow chain, or
10534 * an object with a non-default
10535 * (SYMMETRIC) copy strategy.
10536 */
10537 vm_map_copy_overwrite_aligned_src_not_symmetric++;
10538 vm_object_unlock(new_object);
10539 goto slow_copy;
10540 }
10541 #endif /* XNU_TARGET_OS_OSX */
10542 vm_object_unlock(new_object);
10543 }
10544 /*
10545 * The new mapping is still backed by
10546 * anonymous (internal) memory, so it's
10547 * OK to substitute it for the original
10548 * malloc() mapping.
10549 */
10550 }
10551
10552 if (old_object != VM_OBJECT_NULL) {
10553 assert(!entry->vme_permanent);
10554 if (entry->is_sub_map) {
10555 if (entry->use_pmap) {
10556 #ifndef NO_NESTED_PMAP
10557 pmap_unnest(dst_map->pmap,
10558 (addr64_t)entry->vme_start,
10559 entry->vme_end - entry->vme_start);
10560 #endif /* NO_NESTED_PMAP */
10561 if (dst_map->mapped_in_other_pmaps) {
10562 /* clean up parent */
10563 /* map/maps */
10564 vm_map_submap_pmap_clean(
10565 dst_map, entry->vme_start,
10566 entry->vme_end,
10567 VME_SUBMAP(entry),
10568 VME_OFFSET(entry));
10569 }
10570 } else {
10571 vm_map_submap_pmap_clean(
10572 dst_map, entry->vme_start,
10573 entry->vme_end,
10574 VME_SUBMAP(entry),
10575 VME_OFFSET(entry));
10576 }
10577 vm_map_deallocate(VME_SUBMAP(entry));
10578 } else {
10579 if (dst_map->mapped_in_other_pmaps) {
10580 vm_object_pmap_protect_options(
10581 VME_OBJECT(entry),
10582 VME_OFFSET(entry),
10583 entry->vme_end
10584 - entry->vme_start,
10585 PMAP_NULL,
10586 PAGE_SIZE,
10587 entry->vme_start,
10588 VM_PROT_NONE,
10589 PMAP_OPTIONS_REMOVE);
10590 } else {
10591 pmap_remove_options(
10592 dst_map->pmap,
10593 (addr64_t)(entry->vme_start),
10594 (addr64_t)(entry->vme_end),
10595 PMAP_OPTIONS_REMOVE);
10596 }
10597 vm_object_deallocate(old_object);
10598 }
10599 }
10600
10601 if (entry->iokit_acct) {
10602 /* keep using iokit accounting */
10603 entry->use_pmap = FALSE;
10604 } else {
10605 /* use pmap accounting */
10606 entry->use_pmap = TRUE;
10607 }
10608 assert(!entry->vme_permanent);
10609 VME_OBJECT_SET(entry, VME_OBJECT(copy_entry), false, 0);
10610 object = VME_OBJECT(entry);
10611 entry->needs_copy = copy_entry->needs_copy;
10612 entry->wired_count = 0;
10613 entry->user_wired_count = 0;
10614 offset = VME_OFFSET(copy_entry);
10615 VME_OFFSET_SET(entry, offset);
10616
10617 vm_map_copy_entry_unlink(copy, copy_entry);
10618 vm_map_copy_entry_dispose(copy_entry);
10619
10620 /*
10621 * we could try to push pages into the pmap at this point, BUT
10622 * this optimization only saved on average 2 us per page if ALL
10623 * the pages in the source were currently mapped
10624 * and ALL the pages in the dest were touched, if there were fewer
10625 * than 2/3 of the pages touched, this optimization actually cost more cycles
10626 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
10627 */
10628
10629 /*
10630 * Set up for the next iteration. The map
10631 * has not been unlocked, so the next
10632 * address should be at the end of this
10633 * entry, and the next map entry should be
10634 * the one following it.
10635 */
10636
10637 start = tmp_entry->vme_end;
10638 tmp_entry = tmp_entry->vme_next;
10639 } else {
10640 vm_map_version_t version;
10641 vm_object_t dst_object;
10642 vm_object_offset_t dst_offset;
10643 kern_return_t r;
10644
10645 slow_copy:
10646 if (entry->needs_copy) {
10647 VME_OBJECT_SHADOW(entry,
10648 (entry->vme_end -
10649 entry->vme_start),
10650 vm_map_always_shadow(dst_map));
10651 entry->needs_copy = FALSE;
10652 }
10653
10654 dst_object = VME_OBJECT(entry);
10655 dst_offset = VME_OFFSET(entry);
10656
10657 /*
10658 * Take an object reference, and record
10659 * the map version information so that the
10660 * map can be safely unlocked.
10661 */
10662
10663 if (dst_object == VM_OBJECT_NULL) {
10664 /*
10665 * We would usually have just taken the
10666 * optimized path above if the destination
10667 * object has not been allocated yet. But we
10668 * now disable that optimization if the copy
10669 * entry's object is not backed by anonymous
10670 * memory to avoid replacing malloc'ed
10671 * (i.e. re-usable) anonymous memory with a
10672 * not-so-anonymous mapping.
10673 * So we have to handle this case here and
10674 * allocate a new VM object for this map entry.
10675 */
10676 dst_object = vm_object_allocate(
10677 entry->vme_end - entry->vme_start);
10678 dst_offset = 0;
10679 VME_OBJECT_SET(entry, dst_object, false, 0);
10680 VME_OFFSET_SET(entry, dst_offset);
10681 assert(entry->use_pmap);
10682 }
10683
10684 vm_object_reference(dst_object);
10685
10686 /* account for unlock bumping up timestamp */
10687 version.main_timestamp = dst_map->timestamp + 1;
10688
10689 vm_map_unlock(dst_map);
10690
10691 /*
10692 * Copy as much as possible in one pass
10693 */
10694
10695 copy_size = size;
10696 r = vm_fault_copy(
10697 VME_OBJECT(copy_entry),
10698 VME_OFFSET(copy_entry),
10699 ©_size,
10700 dst_object,
10701 dst_offset,
10702 dst_map,
10703 &version,
10704 THREAD_UNINT );
10705
10706 /*
10707 * Release the object reference
10708 */
10709
10710 vm_object_deallocate(dst_object);
10711
10712 /*
10713 * If a hard error occurred, return it now
10714 */
10715
10716 if (r != KERN_SUCCESS) {
10717 return r;
10718 }
10719
10720 if (copy_size != 0) {
10721 /*
10722 * Dispose of the copied region
10723 */
10724
10725 vm_map_copy_clip_end(copy, copy_entry,
10726 copy_entry->vme_start + copy_size);
10727 vm_map_copy_entry_unlink(copy, copy_entry);
10728 vm_object_deallocate(VME_OBJECT(copy_entry));
10729 vm_map_copy_entry_dispose(copy_entry);
10730 }
10731
10732 /*
10733 * Pick up in the destination map where we left off.
10734 *
10735 * Use the version information to avoid a lookup
10736 * in the normal case.
10737 */
10738
10739 start += copy_size;
10740 vm_map_lock(dst_map);
10741 if (version.main_timestamp == dst_map->timestamp &&
10742 copy_size != 0) {
10743 /* We can safely use saved tmp_entry value */
10744
10745 if (tmp_entry->map_aligned &&
10746 !VM_MAP_PAGE_ALIGNED(
10747 start,
10748 VM_MAP_PAGE_MASK(dst_map))) {
10749 /* no longer map-aligned */
10750 tmp_entry->map_aligned = FALSE;
10751 }
10752 vm_map_clip_end(dst_map, tmp_entry, start);
10753 tmp_entry = tmp_entry->vme_next;
10754 } else {
10755 /* Must do lookup of tmp_entry */
10756
10757 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
10758 vm_map_unlock(dst_map);
10759 return KERN_INVALID_ADDRESS;
10760 }
10761 if (tmp_entry->map_aligned &&
10762 !VM_MAP_PAGE_ALIGNED(
10763 start,
10764 VM_MAP_PAGE_MASK(dst_map))) {
10765 /* no longer map-aligned */
10766 tmp_entry->map_aligned = FALSE;
10767 }
10768 vm_map_clip_start(dst_map, tmp_entry, start);
10769 }
10770 }
10771 }/* while */
10772
10773 return KERN_SUCCESS;
10774 }/* vm_map_copy_overwrite_aligned */
10775
10776 /*
10777 * Routine: vm_map_copyin_kernel_buffer [internal use only]
10778 *
10779 * Description:
10780 * Copy in data to a kernel buffer from space in the
10781 * source map. The original space may be optionally
10782 * deallocated.
10783 *
10784 * If successful, returns a new copy object.
10785 */
10786 static kern_return_t
vm_map_copyin_kernel_buffer(vm_map_t src_map,vm_map_offset_t src_addr,vm_map_size_t len,boolean_t src_destroy,vm_map_copy_t * copy_result)10787 vm_map_copyin_kernel_buffer(
10788 vm_map_t src_map,
10789 vm_map_offset_t src_addr,
10790 vm_map_size_t len,
10791 boolean_t src_destroy,
10792 vm_map_copy_t *copy_result)
10793 {
10794 kern_return_t kr;
10795 vm_map_copy_t copy;
10796
10797 if (len > msg_ool_size_small) {
10798 return KERN_INVALID_ARGUMENT;
10799 }
10800
10801 copy = zalloc_id(ZONE_ID_VM_MAP_COPY, Z_WAITOK | Z_ZERO | Z_NOFAIL);
10802 copy->cpy_kdata = kalloc_data(len, Z_WAITOK);
10803 if (copy->cpy_kdata == NULL) {
10804 zfree_id(ZONE_ID_VM_MAP_COPY, copy);
10805 return KERN_RESOURCE_SHORTAGE;
10806 }
10807
10808 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
10809 copy->size = len;
10810 copy->offset = 0;
10811
10812 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
10813 if (kr != KERN_SUCCESS) {
10814 kfree_data(copy->cpy_kdata, len);
10815 zfree_id(ZONE_ID_VM_MAP_COPY, copy);
10816 return kr;
10817 }
10818
10819 if (src_destroy) {
10820 vmr_flags_t flags = VM_MAP_REMOVE_INTERRUPTIBLE;
10821
10822 if (src_map == kernel_map) {
10823 flags |= VM_MAP_REMOVE_KUNWIRE;
10824 }
10825
10826 (void)vm_map_remove_guard(src_map,
10827 vm_map_trunc_page(src_addr, VM_MAP_PAGE_MASK(src_map)),
10828 vm_map_round_page(src_addr + len, VM_MAP_PAGE_MASK(src_map)),
10829 flags, KMEM_GUARD_NONE);
10830 }
10831
10832 *copy_result = copy;
10833 return KERN_SUCCESS;
10834 }
10835
10836 /*
10837 * Routine: vm_map_copyout_kernel_buffer [internal use only]
10838 *
10839 * Description:
10840 * Copy out data from a kernel buffer into space in the
10841 * destination map. The space may be otpionally dynamically
10842 * allocated.
10843 *
10844 * If successful, consumes the copy object.
10845 * Otherwise, the caller is responsible for it.
10846 *
10847 * Callers of this function must call vm_map_copy_require on
10848 * previously created vm_map_copy_t or pass a newly created
10849 * one to ensure that it hasn't been forged.
10850 */
10851 static int vm_map_copyout_kernel_buffer_failures = 0;
10852 static kern_return_t
vm_map_copyout_kernel_buffer(vm_map_t map,vm_map_address_t * addr,vm_map_copy_t copy,vm_map_size_t copy_size,boolean_t overwrite,boolean_t consume_on_success)10853 vm_map_copyout_kernel_buffer(
10854 vm_map_t map,
10855 vm_map_address_t *addr, /* IN/OUT */
10856 vm_map_copy_t copy,
10857 vm_map_size_t copy_size,
10858 boolean_t overwrite,
10859 boolean_t consume_on_success)
10860 {
10861 kern_return_t kr = KERN_SUCCESS;
10862 thread_t thread = current_thread();
10863
10864 assert(copy->size == copy_size);
10865
10866 /*
10867 * check for corrupted vm_map_copy structure
10868 */
10869 if (copy_size > msg_ool_size_small || copy->offset) {
10870 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
10871 (long long)copy->size, (long long)copy->offset);
10872 }
10873
10874 if (!overwrite) {
10875 /*
10876 * Allocate space in the target map for the data
10877 */
10878 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
10879
10880 if (map == kernel_map) {
10881 vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
10882 }
10883 *addr = 0;
10884 kr = vm_map_enter(map,
10885 addr,
10886 vm_map_round_page(copy_size,
10887 VM_MAP_PAGE_MASK(map)),
10888 (vm_map_offset_t) 0,
10889 VM_FLAGS_ANYWHERE,
10890 vmk_flags,
10891 VM_KERN_MEMORY_NONE,
10892 VM_OBJECT_NULL,
10893 (vm_object_offset_t) 0,
10894 FALSE,
10895 VM_PROT_DEFAULT,
10896 VM_PROT_ALL,
10897 VM_INHERIT_DEFAULT);
10898 if (kr != KERN_SUCCESS) {
10899 return kr;
10900 }
10901 #if KASAN
10902 if (map->pmap == kernel_pmap) {
10903 kasan_notify_address(*addr, copy->size);
10904 }
10905 #endif
10906 }
10907
10908 /*
10909 * Copyout the data from the kernel buffer to the target map.
10910 */
10911 if (thread->map == map) {
10912 /*
10913 * If the target map is the current map, just do
10914 * the copy.
10915 */
10916 assert((vm_size_t)copy_size == copy_size);
10917 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10918 kr = KERN_INVALID_ADDRESS;
10919 }
10920 } else {
10921 vm_map_t oldmap;
10922
10923 /*
10924 * If the target map is another map, assume the
10925 * target's address space identity for the duration
10926 * of the copy.
10927 */
10928 vm_map_reference(map);
10929 oldmap = vm_map_switch(map);
10930
10931 assert((vm_size_t)copy_size == copy_size);
10932 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10933 vm_map_copyout_kernel_buffer_failures++;
10934 kr = KERN_INVALID_ADDRESS;
10935 }
10936
10937 (void) vm_map_switch(oldmap);
10938 vm_map_deallocate(map);
10939 }
10940
10941 if (kr != KERN_SUCCESS) {
10942 /* the copy failed, clean up */
10943 if (!overwrite) {
10944 /*
10945 * Deallocate the space we allocated in the target map.
10946 */
10947 (void) vm_map_remove(map,
10948 vm_map_trunc_page(*addr,
10949 VM_MAP_PAGE_MASK(map)),
10950 vm_map_round_page((*addr +
10951 vm_map_round_page(copy_size,
10952 VM_MAP_PAGE_MASK(map))),
10953 VM_MAP_PAGE_MASK(map)));
10954 *addr = 0;
10955 }
10956 } else {
10957 /* copy was successful, dicard the copy structure */
10958 if (consume_on_success) {
10959 kfree_data(copy->cpy_kdata, copy_size);
10960 zfree_id(ZONE_ID_VM_MAP_COPY, copy);
10961 }
10962 }
10963
10964 return kr;
10965 }
10966
10967 /*
10968 * Routine: vm_map_copy_insert [internal use only]
10969 *
10970 * Description:
10971 * Link a copy chain ("copy") into a map at the
10972 * specified location (after "where").
10973 *
10974 * Callers of this function must call vm_map_copy_require on
10975 * previously created vm_map_copy_t or pass a newly created
10976 * one to ensure that it hasn't been forged.
10977 * Side effects:
10978 * The copy chain is destroyed.
10979 */
10980 static void
vm_map_copy_insert(vm_map_t map,vm_map_entry_t after_where,vm_map_copy_t copy)10981 vm_map_copy_insert(
10982 vm_map_t map,
10983 vm_map_entry_t after_where,
10984 vm_map_copy_t copy)
10985 {
10986 vm_map_entry_t entry;
10987
10988 while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
10989 entry = vm_map_copy_first_entry(copy);
10990 vm_map_copy_entry_unlink(copy, entry);
10991 vm_map_store_entry_link(map, after_where, entry,
10992 VM_MAP_KERNEL_FLAGS_NONE);
10993 after_where = entry;
10994 }
10995 zfree_id(ZONE_ID_VM_MAP_COPY, copy);
10996 }
10997
10998 /*
10999 * Callers of this function must call vm_map_copy_require on
11000 * previously created vm_map_copy_t or pass a newly created
11001 * one to ensure that it hasn't been forged.
11002 */
11003 void
vm_map_copy_remap(vm_map_t map,vm_map_entry_t where,vm_map_copy_t copy,vm_map_offset_t adjustment,vm_prot_t cur_prot,vm_prot_t max_prot,vm_inherit_t inheritance)11004 vm_map_copy_remap(
11005 vm_map_t map,
11006 vm_map_entry_t where,
11007 vm_map_copy_t copy,
11008 vm_map_offset_t adjustment,
11009 vm_prot_t cur_prot,
11010 vm_prot_t max_prot,
11011 vm_inherit_t inheritance)
11012 {
11013 vm_map_entry_t copy_entry, new_entry;
11014
11015 for (copy_entry = vm_map_copy_first_entry(copy);
11016 copy_entry != vm_map_copy_to_entry(copy);
11017 copy_entry = copy_entry->vme_next) {
11018 /* get a new VM map entry for the map */
11019 new_entry = vm_map_entry_create(map);
11020 /* copy the "copy entry" to the new entry */
11021 vm_map_entry_copy(map, new_entry, copy_entry);
11022 /* adjust "start" and "end" */
11023 new_entry->vme_start += adjustment;
11024 new_entry->vme_end += adjustment;
11025 /* clear some attributes */
11026 new_entry->inheritance = inheritance;
11027 new_entry->protection = cur_prot;
11028 new_entry->max_protection = max_prot;
11029 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
11030 /* take an extra reference on the entry's "object" */
11031 if (new_entry->is_sub_map) {
11032 assert(!new_entry->use_pmap); /* not nested */
11033 vm_map_reference(VME_SUBMAP(new_entry));
11034 } else {
11035 vm_object_reference(VME_OBJECT(new_entry));
11036 }
11037 /* insert the new entry in the map */
11038 vm_map_store_entry_link(map, where, new_entry,
11039 VM_MAP_KERNEL_FLAGS_NONE);
11040 /* continue inserting the "copy entries" after the new entry */
11041 where = new_entry;
11042 }
11043 }
11044
11045
11046 /*
11047 * Returns true if *size matches (or is in the range of) copy->size.
11048 * Upon returning true, the *size field is updated with the actual size of the
11049 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
11050 */
11051 boolean_t
vm_map_copy_validate_size(vm_map_t dst_map,vm_map_copy_t copy,vm_map_size_t * size)11052 vm_map_copy_validate_size(
11053 vm_map_t dst_map,
11054 vm_map_copy_t copy,
11055 vm_map_size_t *size)
11056 {
11057 if (copy == VM_MAP_COPY_NULL) {
11058 return FALSE;
11059 }
11060
11061 /*
11062 * Assert that the vm_map_copy is coming from the right
11063 * zone and hasn't been forged
11064 */
11065 vm_map_copy_require(copy);
11066
11067 vm_map_size_t copy_sz = copy->size;
11068 vm_map_size_t sz = *size;
11069 switch (copy->type) {
11070 case VM_MAP_COPY_OBJECT:
11071 case VM_MAP_COPY_KERNEL_BUFFER:
11072 if (sz == copy_sz) {
11073 return TRUE;
11074 }
11075 break;
11076 case VM_MAP_COPY_ENTRY_LIST:
11077 /*
11078 * potential page-size rounding prevents us from exactly
11079 * validating this flavor of vm_map_copy, but we can at least
11080 * assert that it's within a range.
11081 */
11082 if (copy_sz >= sz &&
11083 copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
11084 *size = copy_sz;
11085 return TRUE;
11086 }
11087 break;
11088 default:
11089 break;
11090 }
11091 return FALSE;
11092 }
11093
11094 /*
11095 * Routine: vm_map_copyout_size
11096 *
11097 * Description:
11098 * Copy out a copy chain ("copy") into newly-allocated
11099 * space in the destination map. Uses a prevalidated
11100 * size for the copy object (vm_map_copy_validate_size).
11101 *
11102 * If successful, consumes the copy object.
11103 * Otherwise, the caller is responsible for it.
11104 */
11105 kern_return_t
vm_map_copyout_size(vm_map_t dst_map,vm_map_address_t * dst_addr,vm_map_copy_t copy,vm_map_size_t copy_size)11106 vm_map_copyout_size(
11107 vm_map_t dst_map,
11108 vm_map_address_t *dst_addr, /* OUT */
11109 vm_map_copy_t copy,
11110 vm_map_size_t copy_size)
11111 {
11112 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
11113 TRUE, /* consume_on_success */
11114 VM_PROT_DEFAULT,
11115 VM_PROT_ALL,
11116 VM_INHERIT_DEFAULT);
11117 }
11118
11119 /*
11120 * Routine: vm_map_copyout
11121 *
11122 * Description:
11123 * Copy out a copy chain ("copy") into newly-allocated
11124 * space in the destination map.
11125 *
11126 * If successful, consumes the copy object.
11127 * Otherwise, the caller is responsible for it.
11128 */
11129 kern_return_t
vm_map_copyout(vm_map_t dst_map,vm_map_address_t * dst_addr,vm_map_copy_t copy)11130 vm_map_copyout(
11131 vm_map_t dst_map,
11132 vm_map_address_t *dst_addr, /* OUT */
11133 vm_map_copy_t copy)
11134 {
11135 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
11136 TRUE, /* consume_on_success */
11137 VM_PROT_DEFAULT,
11138 VM_PROT_ALL,
11139 VM_INHERIT_DEFAULT);
11140 }
11141
11142 kern_return_t
vm_map_copyout_internal(vm_map_t dst_map,vm_map_address_t * dst_addr,vm_map_copy_t copy,vm_map_size_t copy_size,boolean_t consume_on_success,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)11143 vm_map_copyout_internal(
11144 vm_map_t dst_map,
11145 vm_map_address_t *dst_addr, /* OUT */
11146 vm_map_copy_t copy,
11147 vm_map_size_t copy_size,
11148 boolean_t consume_on_success,
11149 vm_prot_t cur_protection,
11150 vm_prot_t max_protection,
11151 vm_inherit_t inheritance)
11152 {
11153 vm_map_size_t size;
11154 vm_map_size_t adjustment;
11155 vm_map_offset_t start;
11156 vm_object_offset_t vm_copy_start;
11157 vm_map_entry_t last;
11158 vm_map_entry_t entry;
11159 vm_map_copy_t original_copy;
11160 kern_return_t kr;
11161 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
11162
11163 /*
11164 * Check for null copy object.
11165 */
11166
11167 if (copy == VM_MAP_COPY_NULL) {
11168 *dst_addr = 0;
11169 return KERN_SUCCESS;
11170 }
11171
11172 /*
11173 * Assert that the vm_map_copy is coming from the right
11174 * zone and hasn't been forged
11175 */
11176 vm_map_copy_require(copy);
11177
11178 if (copy->size != copy_size) {
11179 *dst_addr = 0;
11180 return KERN_FAILURE;
11181 }
11182
11183 /*
11184 * Check for special copy object, created
11185 * by vm_map_copyin_object.
11186 */
11187
11188 if (copy->type == VM_MAP_COPY_OBJECT) {
11189 vm_object_t object = copy->cpy_object;
11190 vm_object_offset_t offset;
11191
11192 offset = vm_object_trunc_page(copy->offset);
11193 size = vm_map_round_page((copy_size +
11194 (vm_map_size_t)(copy->offset -
11195 offset)),
11196 VM_MAP_PAGE_MASK(dst_map));
11197 *dst_addr = 0;
11198 kr = vm_map_enter(dst_map, dst_addr, size,
11199 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
11200 VM_MAP_KERNEL_FLAGS_NONE,
11201 VM_KERN_MEMORY_NONE,
11202 object, offset, FALSE,
11203 VM_PROT_DEFAULT, VM_PROT_ALL,
11204 VM_INHERIT_DEFAULT);
11205 if (kr != KERN_SUCCESS) {
11206 return kr;
11207 }
11208 /* Account for non-pagealigned copy object */
11209 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
11210 if (consume_on_success) {
11211 zfree_id(ZONE_ID_VM_MAP_COPY, copy);
11212 }
11213 return KERN_SUCCESS;
11214 }
11215
11216 /*
11217 * Check for special kernel buffer allocated
11218 * by new_ipc_kmsg_copyin.
11219 */
11220
11221 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
11222 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
11223 copy, copy_size, FALSE,
11224 consume_on_success);
11225 }
11226
11227 original_copy = copy;
11228 if (copy->cpy_hdr.page_shift != VM_MAP_PAGE_SHIFT(dst_map)) {
11229 vm_map_copy_t target_copy;
11230 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
11231
11232 target_copy = VM_MAP_COPY_NULL;
11233 DEBUG4K_ADJUST("adjusting...\n");
11234 kr = vm_map_copy_adjust_to_target(
11235 copy,
11236 0, /* offset */
11237 copy->size, /* size */
11238 dst_map,
11239 TRUE, /* copy */
11240 &target_copy,
11241 &overmap_start,
11242 &overmap_end,
11243 &trimmed_start);
11244 if (kr != KERN_SUCCESS) {
11245 DEBUG4K_COPY("adjust failed 0x%x\n", kr);
11246 return kr;
11247 }
11248 DEBUG4K_COPY("copy %p (%d 0x%llx 0x%llx) dst_map %p (%d) target_copy %p (%d 0x%llx 0x%llx) overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx\n", copy, copy->cpy_hdr.page_shift, copy->offset, (uint64_t)copy->size, dst_map, VM_MAP_PAGE_SHIFT(dst_map), target_copy, target_copy->cpy_hdr.page_shift, target_copy->offset, (uint64_t)target_copy->size, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start);
11249 if (target_copy != copy) {
11250 copy = target_copy;
11251 }
11252 copy_size = copy->size;
11253 }
11254
11255 /*
11256 * Find space for the data
11257 */
11258
11259 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
11260 VM_MAP_COPY_PAGE_MASK(copy));
11261 size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
11262 VM_MAP_COPY_PAGE_MASK(copy))
11263 - vm_copy_start;
11264
11265
11266 if (dst_map == kernel_map) {
11267 vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
11268 }
11269
11270 vm_map_lock(dst_map);
11271 kr = vm_map_locate_space(dst_map, size, 0, vmk_flags,
11272 &start, &last);
11273 if (kr != KERN_SUCCESS) {
11274 vm_map_unlock(dst_map);
11275 return kr;
11276 }
11277
11278 adjustment = start - vm_copy_start;
11279 if (!consume_on_success) {
11280 /*
11281 * We're not allowed to consume "copy", so we'll have to
11282 * copy its map entries into the destination map below.
11283 * No need to re-allocate map entries from the correct
11284 * (pageable or not) zone, since we'll get new map entries
11285 * during the transfer.
11286 * We'll also adjust the map entries's "start" and "end"
11287 * during the transfer, to keep "copy"'s entries consistent
11288 * with its "offset".
11289 */
11290 goto after_adjustments;
11291 }
11292
11293 /*
11294 * Since we're going to just drop the map
11295 * entries from the copy into the destination
11296 * map, they must come from the same pool.
11297 */
11298
11299 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
11300 /*
11301 * Mismatches occur when dealing with the default
11302 * pager.
11303 */
11304 vm_map_entry_t next, new;
11305
11306 /*
11307 * Find the zone that the copies were allocated from
11308 */
11309
11310 entry = vm_map_copy_first_entry(copy);
11311
11312 /*
11313 * Reinitialize the copy so that vm_map_copy_entry_link
11314 * will work.
11315 */
11316 vm_map_store_copy_reset(copy, entry);
11317 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
11318
11319 /*
11320 * Copy each entry.
11321 */
11322 while (entry != vm_map_copy_to_entry(copy)) {
11323 new = vm_map_copy_entry_create(copy);
11324 vm_map_entry_copy_full(new, entry);
11325 new->vme_no_copy_on_read = FALSE;
11326 assert(!new->iokit_acct);
11327 if (new->is_sub_map) {
11328 /* clr address space specifics */
11329 new->use_pmap = FALSE;
11330 }
11331 vm_map_copy_entry_link(copy,
11332 vm_map_copy_last_entry(copy),
11333 new);
11334 next = entry->vme_next;
11335 vm_map_entry_dispose(entry);
11336 entry = next;
11337 }
11338 }
11339
11340 /*
11341 * Adjust the addresses in the copy chain, and
11342 * reset the region attributes.
11343 */
11344
11345 for (entry = vm_map_copy_first_entry(copy);
11346 entry != vm_map_copy_to_entry(copy);
11347 entry = entry->vme_next) {
11348 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
11349 /*
11350 * We're injecting this copy entry into a map that
11351 * has the standard page alignment, so clear
11352 * "map_aligned" (which might have been inherited
11353 * from the original map entry).
11354 */
11355 entry->map_aligned = FALSE;
11356 }
11357
11358 entry->vme_start += adjustment;
11359 entry->vme_end += adjustment;
11360
11361 if (entry->map_aligned) {
11362 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
11363 VM_MAP_PAGE_MASK(dst_map)));
11364 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
11365 VM_MAP_PAGE_MASK(dst_map)));
11366 }
11367
11368 entry->inheritance = VM_INHERIT_DEFAULT;
11369 entry->protection = VM_PROT_DEFAULT;
11370 entry->max_protection = VM_PROT_ALL;
11371 entry->behavior = VM_BEHAVIOR_DEFAULT;
11372
11373 /*
11374 * If the entry is now wired,
11375 * map the pages into the destination map.
11376 */
11377 if (entry->wired_count != 0) {
11378 vm_map_offset_t va;
11379 vm_object_offset_t offset;
11380 vm_object_t object;
11381 vm_prot_t prot;
11382 int type_of_fault;
11383
11384 /* TODO4K would need to use actual page size */
11385 assert(VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT);
11386
11387 object = VME_OBJECT(entry);
11388 offset = VME_OFFSET(entry);
11389 va = entry->vme_start;
11390
11391 pmap_pageable(dst_map->pmap,
11392 entry->vme_start,
11393 entry->vme_end,
11394 TRUE);
11395
11396 while (va < entry->vme_end) {
11397 vm_page_t m;
11398 struct vm_object_fault_info fault_info = {};
11399
11400 /*
11401 * Look up the page in the object.
11402 * Assert that the page will be found in the
11403 * top object:
11404 * either
11405 * the object was newly created by
11406 * vm_object_copy_slowly, and has
11407 * copies of all of the pages from
11408 * the source object
11409 * or
11410 * the object was moved from the old
11411 * map entry; because the old map
11412 * entry was wired, all of the pages
11413 * were in the top-level object.
11414 * (XXX not true if we wire pages for
11415 * reading)
11416 */
11417 vm_object_lock(object);
11418
11419 m = vm_page_lookup(object, offset);
11420 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
11421 m->vmp_absent) {
11422 panic("vm_map_copyout: wiring %p", m);
11423 }
11424
11425 prot = entry->protection;
11426
11427 if (override_nx(dst_map, VME_ALIAS(entry)) &&
11428 prot) {
11429 prot |= VM_PROT_EXECUTE;
11430 }
11431
11432 type_of_fault = DBG_CACHE_HIT_FAULT;
11433
11434 fault_info.user_tag = VME_ALIAS(entry);
11435 fault_info.pmap_options = 0;
11436 if (entry->iokit_acct ||
11437 (!entry->is_sub_map && !entry->use_pmap)) {
11438 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
11439 }
11440
11441 vm_fault_enter(m,
11442 dst_map->pmap,
11443 va,
11444 PAGE_SIZE, 0,
11445 prot,
11446 prot,
11447 VM_PAGE_WIRED(m),
11448 FALSE, /* change_wiring */
11449 VM_KERN_MEMORY_NONE, /* tag - not wiring */
11450 &fault_info,
11451 NULL, /* need_retry */
11452 &type_of_fault);
11453
11454 vm_object_unlock(object);
11455
11456 offset += PAGE_SIZE_64;
11457 va += PAGE_SIZE;
11458 }
11459 }
11460 }
11461
11462 after_adjustments:
11463
11464 /*
11465 * Correct the page alignment for the result
11466 */
11467
11468 *dst_addr = start + (copy->offset - vm_copy_start);
11469
11470 #if KASAN
11471 kasan_notify_address(*dst_addr, size);
11472 #endif
11473
11474 /*
11475 * Update the hints and the map size
11476 */
11477
11478 if (consume_on_success) {
11479 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
11480 } else {
11481 SAVE_HINT_MAP_WRITE(dst_map, last);
11482 }
11483
11484 dst_map->size += size;
11485
11486 /*
11487 * Link in the copy
11488 */
11489
11490 if (consume_on_success) {
11491 vm_map_copy_insert(dst_map, last, copy);
11492 if (copy != original_copy) {
11493 vm_map_copy_discard(original_copy);
11494 original_copy = VM_MAP_COPY_NULL;
11495 }
11496 } else {
11497 vm_map_copy_remap(dst_map, last, copy, adjustment,
11498 cur_protection, max_protection,
11499 inheritance);
11500 if (copy != original_copy && original_copy != VM_MAP_COPY_NULL) {
11501 vm_map_copy_discard(copy);
11502 copy = original_copy;
11503 }
11504 }
11505
11506
11507 vm_map_unlock(dst_map);
11508
11509 /*
11510 * XXX If wiring_required, call vm_map_pageable
11511 */
11512
11513 return KERN_SUCCESS;
11514 }
11515
11516 /*
11517 * Routine: vm_map_copyin
11518 *
11519 * Description:
11520 * see vm_map_copyin_common. Exported via Unsupported.exports.
11521 *
11522 */
11523
11524 #undef vm_map_copyin
11525
11526 kern_return_t
vm_map_copyin(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,boolean_t src_destroy,vm_map_copy_t * copy_result)11527 vm_map_copyin(
11528 vm_map_t src_map,
11529 vm_map_address_t src_addr,
11530 vm_map_size_t len,
11531 boolean_t src_destroy,
11532 vm_map_copy_t *copy_result) /* OUT */
11533 {
11534 return vm_map_copyin_common(src_map, src_addr, len, src_destroy,
11535 FALSE, copy_result, FALSE);
11536 }
11537
11538 /*
11539 * Routine: vm_map_copyin_common
11540 *
11541 * Description:
11542 * Copy the specified region (src_addr, len) from the
11543 * source address space (src_map), possibly removing
11544 * the region from the source address space (src_destroy).
11545 *
11546 * Returns:
11547 * A vm_map_copy_t object (copy_result), suitable for
11548 * insertion into another address space (using vm_map_copyout),
11549 * copying over another address space region (using
11550 * vm_map_copy_overwrite). If the copy is unused, it
11551 * should be destroyed (using vm_map_copy_discard).
11552 *
11553 * In/out conditions:
11554 * The source map should not be locked on entry.
11555 */
11556
11557 typedef struct submap_map {
11558 vm_map_t parent_map;
11559 vm_map_offset_t base_start;
11560 vm_map_offset_t base_end;
11561 vm_map_size_t base_len;
11562 struct submap_map *next;
11563 } submap_map_t;
11564
11565 kern_return_t
vm_map_copyin_common(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,boolean_t src_destroy,__unused boolean_t src_volatile,vm_map_copy_t * copy_result,boolean_t use_maxprot)11566 vm_map_copyin_common(
11567 vm_map_t src_map,
11568 vm_map_address_t src_addr,
11569 vm_map_size_t len,
11570 boolean_t src_destroy,
11571 __unused boolean_t src_volatile,
11572 vm_map_copy_t *copy_result, /* OUT */
11573 boolean_t use_maxprot)
11574 {
11575 int flags;
11576
11577 flags = 0;
11578 if (src_destroy) {
11579 flags |= VM_MAP_COPYIN_SRC_DESTROY;
11580 }
11581 if (use_maxprot) {
11582 flags |= VM_MAP_COPYIN_USE_MAXPROT;
11583 }
11584 return vm_map_copyin_internal(src_map,
11585 src_addr,
11586 len,
11587 flags,
11588 copy_result);
11589 }
11590 kern_return_t
vm_map_copyin_internal(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,int flags,vm_map_copy_t * copy_result)11591 vm_map_copyin_internal(
11592 vm_map_t src_map,
11593 vm_map_address_t src_addr,
11594 vm_map_size_t len,
11595 int flags,
11596 vm_map_copy_t *copy_result) /* OUT */
11597 {
11598 vm_map_entry_t tmp_entry; /* Result of last map lookup --
11599 * in multi-level lookup, this
11600 * entry contains the actual
11601 * vm_object/offset.
11602 */
11603 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
11604
11605 vm_map_offset_t src_start; /* Start of current entry --
11606 * where copy is taking place now
11607 */
11608 vm_map_offset_t src_end; /* End of entire region to be
11609 * copied */
11610 vm_map_offset_t src_base;
11611 vm_map_t base_map = src_map;
11612 boolean_t map_share = FALSE;
11613 submap_map_t *parent_maps = NULL;
11614
11615 vm_map_copy_t copy; /* Resulting copy */
11616 vm_map_address_t copy_addr;
11617 vm_map_size_t copy_size;
11618 boolean_t src_destroy;
11619 boolean_t use_maxprot;
11620 boolean_t preserve_purgeable;
11621 boolean_t entry_was_shared;
11622 vm_map_entry_t saved_src_entry;
11623
11624 if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
11625 return KERN_INVALID_ARGUMENT;
11626 }
11627
11628 #if CONFIG_KERNEL_TBI
11629 if (src_map->pmap == kernel_pmap) {
11630 src_addr = VM_KERNEL_TBI_FILL(src_addr);
11631 }
11632 #endif /* CONFIG_KERNEL_TBI && KASAN_TBI */
11633
11634 src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
11635 use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
11636 preserve_purgeable =
11637 (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
11638
11639 /*
11640 * Check for copies of zero bytes.
11641 */
11642
11643 if (len == 0) {
11644 *copy_result = VM_MAP_COPY_NULL;
11645 return KERN_SUCCESS;
11646 }
11647
11648 /*
11649 * Check that the end address doesn't overflow
11650 */
11651 src_end = src_addr + len;
11652 if (src_end < src_addr) {
11653 return KERN_INVALID_ADDRESS;
11654 }
11655
11656 /*
11657 * Compute (page aligned) start and end of region
11658 */
11659 src_start = vm_map_trunc_page(src_addr,
11660 VM_MAP_PAGE_MASK(src_map));
11661 src_end = vm_map_round_page(src_end,
11662 VM_MAP_PAGE_MASK(src_map));
11663
11664 /*
11665 * If the copy is sufficiently small, use a kernel buffer instead
11666 * of making a virtual copy. The theory being that the cost of
11667 * setting up VM (and taking C-O-W faults) dominates the copy costs
11668 * for small regions.
11669 */
11670 if ((len <= msg_ool_size_small) &&
11671 !use_maxprot &&
11672 !preserve_purgeable &&
11673 !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
11674 /*
11675 * Since the "msg_ool_size_small" threshold was increased and
11676 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
11677 * address space limits, we revert to doing a virtual copy if the
11678 * copied range goes beyond those limits. Otherwise, mach_vm_read()
11679 * of the commpage would now fail when it used to work.
11680 */
11681 (src_start >= vm_map_min(src_map) &&
11682 src_start < vm_map_max(src_map) &&
11683 src_end >= vm_map_min(src_map) &&
11684 src_end < vm_map_max(src_map))) {
11685 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
11686 src_destroy, copy_result);
11687 }
11688
11689 /*
11690 * Allocate a header element for the list.
11691 *
11692 * Use the start and end in the header to
11693 * remember the endpoints prior to rounding.
11694 */
11695
11696 copy = vm_map_copy_allocate();
11697 copy->type = VM_MAP_COPY_ENTRY_LIST;
11698 copy->cpy_hdr.entries_pageable = TRUE;
11699 copy->cpy_hdr.page_shift = (uint16_t)VM_MAP_PAGE_SHIFT(src_map);
11700
11701 vm_map_store_init( &(copy->cpy_hdr));
11702
11703 copy->offset = src_addr;
11704 copy->size = len;
11705
11706 new_entry = vm_map_copy_entry_create(copy);
11707
11708 #define RETURN(x) \
11709 MACRO_BEGIN \
11710 vm_map_unlock(src_map); \
11711 if(src_map != base_map) \
11712 vm_map_deallocate(src_map); \
11713 if (new_entry != VM_MAP_ENTRY_NULL) \
11714 vm_map_copy_entry_dispose(new_entry); \
11715 vm_map_copy_discard(copy); \
11716 { \
11717 submap_map_t *_ptr; \
11718 \
11719 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11720 parent_maps=parent_maps->next; \
11721 if (_ptr->parent_map != base_map) \
11722 vm_map_deallocate(_ptr->parent_map); \
11723 kfree_type(submap_map_t, _ptr); \
11724 } \
11725 } \
11726 MACRO_RETURN(x); \
11727 MACRO_END
11728
11729 /*
11730 * Find the beginning of the region.
11731 */
11732
11733 vm_map_lock(src_map);
11734
11735 /*
11736 * Lookup the original "src_addr" rather than the truncated
11737 * "src_start", in case "src_start" falls in a non-map-aligned
11738 * map entry *before* the map entry that contains "src_addr"...
11739 */
11740 if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry)) {
11741 RETURN(KERN_INVALID_ADDRESS);
11742 }
11743 if (!tmp_entry->is_sub_map) {
11744 /*
11745 * ... but clip to the map-rounded "src_start" rather than
11746 * "src_addr" to preserve map-alignment. We'll adjust the
11747 * first copy entry at the end, if needed.
11748 */
11749 vm_map_clip_start(src_map, tmp_entry, src_start);
11750 }
11751 if (src_start < tmp_entry->vme_start) {
11752 /*
11753 * Move "src_start" up to the start of the
11754 * first map entry to copy.
11755 */
11756 src_start = tmp_entry->vme_start;
11757 }
11758 /* set for later submap fix-up */
11759 copy_addr = src_start;
11760
11761 /*
11762 * Go through entries until we get to the end.
11763 */
11764
11765 while (TRUE) {
11766 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
11767 vm_map_size_t src_size; /* Size of source
11768 * map entry (in both
11769 * maps)
11770 */
11771
11772 vm_object_t src_object; /* Object to copy */
11773 vm_object_offset_t src_offset;
11774
11775 vm_object_t new_copy_object;/* vm_object_copy_* result */
11776
11777 boolean_t src_needs_copy; /* Should source map
11778 * be made read-only
11779 * for copy-on-write?
11780 */
11781
11782 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
11783
11784 boolean_t was_wired; /* Was source wired? */
11785 boolean_t saved_used_for_jit; /* Saved used_for_jit. */
11786 vm_map_version_t version; /* Version before locks
11787 * dropped to make copy
11788 */
11789 kern_return_t result; /* Return value from
11790 * copy_strategically.
11791 */
11792 while (tmp_entry->is_sub_map) {
11793 vm_map_size_t submap_len;
11794 submap_map_t *ptr;
11795
11796 ptr = kalloc_type(submap_map_t, Z_WAITOK);
11797 ptr->next = parent_maps;
11798 parent_maps = ptr;
11799 ptr->parent_map = src_map;
11800 ptr->base_start = src_start;
11801 ptr->base_end = src_end;
11802 submap_len = tmp_entry->vme_end - src_start;
11803 if (submap_len > (src_end - src_start)) {
11804 submap_len = src_end - src_start;
11805 }
11806 ptr->base_len = submap_len;
11807
11808 src_start -= tmp_entry->vme_start;
11809 src_start += VME_OFFSET(tmp_entry);
11810 src_end = src_start + submap_len;
11811 src_map = VME_SUBMAP(tmp_entry);
11812 vm_map_lock(src_map);
11813 /* keep an outstanding reference for all maps in */
11814 /* the parents tree except the base map */
11815 vm_map_reference(src_map);
11816 vm_map_unlock(ptr->parent_map);
11817 if (!vm_map_lookup_entry(
11818 src_map, src_start, &tmp_entry)) {
11819 RETURN(KERN_INVALID_ADDRESS);
11820 }
11821 map_share = TRUE;
11822 if (!tmp_entry->is_sub_map) {
11823 vm_map_clip_start(src_map, tmp_entry, src_start);
11824 }
11825 src_entry = tmp_entry;
11826 }
11827 /* we are now in the lowest level submap... */
11828
11829 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
11830 (VME_OBJECT(tmp_entry)->phys_contiguous)) {
11831 /* This is not, supported for now.In future */
11832 /* we will need to detect the phys_contig */
11833 /* condition and then upgrade copy_slowly */
11834 /* to do physical copy from the device mem */
11835 /* based object. We can piggy-back off of */
11836 /* the was wired boolean to set-up the */
11837 /* proper handling */
11838 RETURN(KERN_PROTECTION_FAILURE);
11839 }
11840 /*
11841 * Create a new address map entry to hold the result.
11842 * Fill in the fields from the appropriate source entries.
11843 * We must unlock the source map to do this if we need
11844 * to allocate a map entry.
11845 */
11846 if (new_entry == VM_MAP_ENTRY_NULL) {
11847 version.main_timestamp = src_map->timestamp;
11848 vm_map_unlock(src_map);
11849
11850 new_entry = vm_map_copy_entry_create(copy);
11851
11852 vm_map_lock(src_map);
11853 if ((version.main_timestamp + 1) != src_map->timestamp) {
11854 if (!vm_map_lookup_entry(src_map, src_start,
11855 &tmp_entry)) {
11856 RETURN(KERN_INVALID_ADDRESS);
11857 }
11858 if (!tmp_entry->is_sub_map) {
11859 vm_map_clip_start(src_map, tmp_entry, src_start);
11860 }
11861 continue; /* restart w/ new tmp_entry */
11862 }
11863 }
11864
11865 /*
11866 * Verify that the region can be read.
11867 */
11868 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
11869 !use_maxprot) ||
11870 (src_entry->max_protection & VM_PROT_READ) == 0) {
11871 RETURN(KERN_PROTECTION_FAILURE);
11872 }
11873
11874 /*
11875 * Clip against the endpoints of the entire region.
11876 */
11877
11878 vm_map_clip_end(src_map, src_entry, src_end);
11879
11880 src_size = src_entry->vme_end - src_start;
11881 src_object = VME_OBJECT(src_entry);
11882 src_offset = VME_OFFSET(src_entry);
11883 was_wired = (src_entry->wired_count != 0);
11884
11885 vm_map_entry_copy(src_map, new_entry, src_entry);
11886 if (new_entry->is_sub_map) {
11887 /* clr address space specifics */
11888 new_entry->use_pmap = FALSE;
11889 } else {
11890 /*
11891 * We're dealing with a copy-on-write operation,
11892 * so the resulting mapping should not inherit the
11893 * original mapping's accounting settings.
11894 * "iokit_acct" should have been cleared in
11895 * vm_map_entry_copy().
11896 * "use_pmap" should be reset to its default (TRUE)
11897 * so that the new mapping gets accounted for in
11898 * the task's memory footprint.
11899 */
11900 assert(!new_entry->iokit_acct);
11901 new_entry->use_pmap = TRUE;
11902 }
11903
11904 /*
11905 * Attempt non-blocking copy-on-write optimizations.
11906 */
11907
11908 /*
11909 * If we are destroying the source, and the object
11910 * is internal, we could move the object reference
11911 * from the source to the copy. The copy is
11912 * copy-on-write only if the source is.
11913 * We make another reference to the object, because
11914 * destroying the source entry will deallocate it.
11915 *
11916 * This memory transfer has to be atomic, (to prevent
11917 * the VM object from being shared or copied while
11918 * it's being moved here), so we could only do this
11919 * if we won't have to unlock the VM map until the
11920 * original mapping has been fully removed.
11921 */
11922
11923 RestartCopy:
11924 if ((src_object == VM_OBJECT_NULL ||
11925 (!was_wired && !map_share && !tmp_entry->is_shared
11926 && !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT))) &&
11927 vm_object_copy_quickly(
11928 VME_OBJECT(new_entry),
11929 src_offset,
11930 src_size,
11931 &src_needs_copy,
11932 &new_entry_needs_copy)) {
11933 new_entry->needs_copy = new_entry_needs_copy;
11934
11935 /*
11936 * Handle copy-on-write obligations
11937 */
11938
11939 if (src_needs_copy && !tmp_entry->needs_copy) {
11940 vm_prot_t prot;
11941
11942 prot = src_entry->protection & ~VM_PROT_WRITE;
11943
11944 if (override_nx(src_map, VME_ALIAS(src_entry))
11945 && prot) {
11946 prot |= VM_PROT_EXECUTE;
11947 }
11948
11949 vm_object_pmap_protect(
11950 src_object,
11951 src_offset,
11952 src_size,
11953 (src_entry->is_shared ?
11954 PMAP_NULL
11955 : src_map->pmap),
11956 VM_MAP_PAGE_SIZE(src_map),
11957 src_entry->vme_start,
11958 prot);
11959
11960 assert(tmp_entry->wired_count == 0);
11961 tmp_entry->needs_copy = TRUE;
11962 }
11963
11964 /*
11965 * The map has never been unlocked, so it's safe
11966 * to move to the next entry rather than doing
11967 * another lookup.
11968 */
11969
11970 goto CopySuccessful;
11971 }
11972
11973 entry_was_shared = tmp_entry->is_shared;
11974
11975 /*
11976 * Take an object reference, so that we may
11977 * release the map lock(s).
11978 */
11979
11980 assert(src_object != VM_OBJECT_NULL);
11981 vm_object_reference(src_object);
11982
11983 /*
11984 * Record the timestamp for later verification.
11985 * Unlock the map.
11986 */
11987
11988 version.main_timestamp = src_map->timestamp;
11989 vm_map_unlock(src_map); /* Increments timestamp once! */
11990 saved_src_entry = src_entry;
11991 tmp_entry = VM_MAP_ENTRY_NULL;
11992 src_entry = VM_MAP_ENTRY_NULL;
11993
11994 /*
11995 * Perform the copy
11996 */
11997
11998 if (was_wired ||
11999 (debug4k_no_cow_copyin &&
12000 VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT)) {
12001 CopySlowly:
12002 vm_object_lock(src_object);
12003 result = vm_object_copy_slowly(
12004 src_object,
12005 src_offset,
12006 src_size,
12007 THREAD_UNINT,
12008 &new_copy_object);
12009 /* VME_OBJECT_SET will reset used_for_jit, so preserve it. */
12010 saved_used_for_jit = new_entry->used_for_jit;
12011 VME_OBJECT_SET(new_entry, new_copy_object, false, 0);
12012 new_entry->used_for_jit = saved_used_for_jit;
12013 VME_OFFSET_SET(new_entry,
12014 src_offset - vm_object_trunc_page(src_offset));
12015 new_entry->needs_copy = FALSE;
12016 } else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
12017 (entry_was_shared || map_share)) {
12018 vm_object_t new_object;
12019
12020 vm_object_lock_shared(src_object);
12021 new_object = vm_object_copy_delayed(
12022 src_object,
12023 src_offset,
12024 src_size,
12025 TRUE);
12026 if (new_object == VM_OBJECT_NULL) {
12027 goto CopySlowly;
12028 }
12029
12030 VME_OBJECT_SET(new_entry, new_object, false, 0);
12031 assert(new_entry->wired_count == 0);
12032 new_entry->needs_copy = TRUE;
12033 assert(!new_entry->iokit_acct);
12034 assert(new_object->purgable == VM_PURGABLE_DENY);
12035 assertf(new_entry->use_pmap, "src_map %p new_entry %p\n", src_map, new_entry);
12036 result = KERN_SUCCESS;
12037 } else {
12038 vm_object_offset_t new_offset;
12039 new_offset = VME_OFFSET(new_entry);
12040 result = vm_object_copy_strategically(src_object,
12041 src_offset,
12042 src_size,
12043 &new_copy_object,
12044 &new_offset,
12045 &new_entry_needs_copy);
12046 /* VME_OBJECT_SET will reset used_for_jit, so preserve it. */
12047 saved_used_for_jit = new_entry->used_for_jit;
12048 VME_OBJECT_SET(new_entry, new_copy_object, false, 0);
12049 new_entry->used_for_jit = saved_used_for_jit;
12050 if (new_offset != VME_OFFSET(new_entry)) {
12051 VME_OFFSET_SET(new_entry, new_offset);
12052 }
12053
12054 new_entry->needs_copy = new_entry_needs_copy;
12055 }
12056
12057 if (result == KERN_SUCCESS &&
12058 ((preserve_purgeable &&
12059 src_object->purgable != VM_PURGABLE_DENY) ||
12060 new_entry->used_for_jit)) {
12061 /*
12062 * Purgeable objects should be COPY_NONE, true share;
12063 * this should be propogated to the copy.
12064 *
12065 * Also force mappings the pmap specially protects to
12066 * be COPY_NONE; trying to COW these mappings would
12067 * change the effective protections, which could have
12068 * side effects if the pmap layer relies on the
12069 * specified protections.
12070 */
12071
12072 vm_object_t new_object;
12073
12074 new_object = VME_OBJECT(new_entry);
12075 assert(new_object != src_object);
12076 vm_object_lock(new_object);
12077 assert(new_object->ref_count == 1);
12078 assert(new_object->shadow == VM_OBJECT_NULL);
12079 assert(new_object->copy == VM_OBJECT_NULL);
12080 assert(new_object->vo_owner == NULL);
12081
12082 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
12083
12084 if (preserve_purgeable &&
12085 src_object->purgable != VM_PURGABLE_DENY) {
12086 new_object->true_share = TRUE;
12087
12088 /* start as non-volatile with no owner... */
12089 new_object->purgable = VM_PURGABLE_NONVOLATILE;
12090 vm_purgeable_nonvolatile_enqueue(new_object, NULL);
12091 /* ... and move to src_object's purgeable state */
12092 if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
12093 int state;
12094 state = src_object->purgable;
12095 vm_object_purgable_control(
12096 new_object,
12097 VM_PURGABLE_SET_STATE_FROM_KERNEL,
12098 &state);
12099 }
12100 /* no pmap accounting for purgeable objects */
12101 new_entry->use_pmap = FALSE;
12102 }
12103
12104 vm_object_unlock(new_object);
12105 new_object = VM_OBJECT_NULL;
12106 }
12107
12108 if (result != KERN_SUCCESS &&
12109 result != KERN_MEMORY_RESTART_COPY) {
12110 vm_map_lock(src_map);
12111 RETURN(result);
12112 }
12113
12114 /*
12115 * Throw away the extra reference
12116 */
12117
12118 vm_object_deallocate(src_object);
12119
12120 /*
12121 * Verify that the map has not substantially
12122 * changed while the copy was being made.
12123 */
12124
12125 vm_map_lock(src_map);
12126
12127 if ((version.main_timestamp + 1) == src_map->timestamp) {
12128 /* src_map hasn't changed: src_entry is still valid */
12129 src_entry = saved_src_entry;
12130 goto VerificationSuccessful;
12131 }
12132
12133 /*
12134 * Simple version comparison failed.
12135 *
12136 * Retry the lookup and verify that the
12137 * same object/offset are still present.
12138 *
12139 * [Note: a memory manager that colludes with
12140 * the calling task can detect that we have
12141 * cheated. While the map was unlocked, the
12142 * mapping could have been changed and restored.]
12143 */
12144
12145 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
12146 if (result != KERN_MEMORY_RESTART_COPY) {
12147 vm_object_deallocate(VME_OBJECT(new_entry));
12148 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL, false, 0);
12149 /* reset accounting state */
12150 new_entry->iokit_acct = FALSE;
12151 new_entry->use_pmap = TRUE;
12152 }
12153 RETURN(KERN_INVALID_ADDRESS);
12154 }
12155
12156 src_entry = tmp_entry;
12157 vm_map_clip_start(src_map, src_entry, src_start);
12158
12159 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
12160 !use_maxprot) ||
12161 ((src_entry->max_protection & VM_PROT_READ) == 0)) {
12162 goto VerificationFailed;
12163 }
12164
12165 if (src_entry->vme_end < new_entry->vme_end) {
12166 /*
12167 * This entry might have been shortened
12168 * (vm_map_clip_end) or been replaced with
12169 * an entry that ends closer to "src_start"
12170 * than before.
12171 * Adjust "new_entry" accordingly; copying
12172 * less memory would be correct but we also
12173 * redo the copy (see below) if the new entry
12174 * no longer points at the same object/offset.
12175 */
12176 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
12177 VM_MAP_COPY_PAGE_MASK(copy)));
12178 new_entry->vme_end = src_entry->vme_end;
12179 src_size = new_entry->vme_end - src_start;
12180 } else if (src_entry->vme_end > new_entry->vme_end) {
12181 /*
12182 * This entry might have been extended
12183 * (vm_map_entry_simplify() or coalesce)
12184 * or been replaced with an entry that ends farther
12185 * from "src_start" than before.
12186 *
12187 * We've called vm_object_copy_*() only on
12188 * the previous <start:end> range, so we can't
12189 * just extend new_entry. We have to re-do
12190 * the copy based on the new entry as if it was
12191 * pointing at a different object/offset (see
12192 * "Verification failed" below).
12193 */
12194 }
12195
12196 if ((VME_OBJECT(src_entry) != src_object) ||
12197 (VME_OFFSET(src_entry) != src_offset) ||
12198 (src_entry->vme_end > new_entry->vme_end)) {
12199 /*
12200 * Verification failed.
12201 *
12202 * Start over with this top-level entry.
12203 */
12204
12205 VerificationFailed: ;
12206
12207 vm_object_deallocate(VME_OBJECT(new_entry));
12208 tmp_entry = src_entry;
12209 continue;
12210 }
12211
12212 /*
12213 * Verification succeeded.
12214 */
12215
12216 VerificationSuccessful:;
12217
12218 if (result == KERN_MEMORY_RESTART_COPY) {
12219 goto RestartCopy;
12220 }
12221
12222 /*
12223 * Copy succeeded.
12224 */
12225
12226 CopySuccessful: ;
12227
12228 /*
12229 * Link in the new copy entry.
12230 */
12231
12232 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
12233 new_entry);
12234
12235 /*
12236 * Determine whether the entire region
12237 * has been copied.
12238 */
12239 src_base = src_start;
12240 src_start = new_entry->vme_end;
12241 new_entry = VM_MAP_ENTRY_NULL;
12242 while ((src_start >= src_end) && (src_end != 0)) {
12243 submap_map_t *ptr;
12244
12245 if (src_map == base_map) {
12246 /* back to the top */
12247 break;
12248 }
12249
12250 ptr = parent_maps;
12251 assert(ptr != NULL);
12252 parent_maps = parent_maps->next;
12253
12254 /* fix up the damage we did in that submap */
12255 vm_map_simplify_range(src_map,
12256 src_base,
12257 src_end);
12258
12259 vm_map_unlock(src_map);
12260 vm_map_deallocate(src_map);
12261 vm_map_lock(ptr->parent_map);
12262 src_map = ptr->parent_map;
12263 src_base = ptr->base_start;
12264 src_start = ptr->base_start + ptr->base_len;
12265 src_end = ptr->base_end;
12266 if (!vm_map_lookup_entry(src_map,
12267 src_start,
12268 &tmp_entry) &&
12269 (src_end > src_start)) {
12270 RETURN(KERN_INVALID_ADDRESS);
12271 }
12272 kfree_type(submap_map_t, ptr);
12273 if (parent_maps == NULL) {
12274 map_share = FALSE;
12275 }
12276 src_entry = tmp_entry->vme_prev;
12277 }
12278
12279 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
12280 (src_start >= src_addr + len) &&
12281 (src_addr + len != 0)) {
12282 /*
12283 * Stop copying now, even though we haven't reached
12284 * "src_end". We'll adjust the end of the last copy
12285 * entry at the end, if needed.
12286 *
12287 * If src_map's aligment is different from the
12288 * system's page-alignment, there could be
12289 * extra non-map-aligned map entries between
12290 * the original (non-rounded) "src_addr + len"
12291 * and the rounded "src_end".
12292 * We do not want to copy those map entries since
12293 * they're not part of the copied range.
12294 */
12295 break;
12296 }
12297
12298 if ((src_start >= src_end) && (src_end != 0)) {
12299 break;
12300 }
12301
12302 /*
12303 * Verify that there are no gaps in the region
12304 */
12305
12306 tmp_entry = src_entry->vme_next;
12307 if ((tmp_entry->vme_start != src_start) ||
12308 (tmp_entry == vm_map_to_entry(src_map))) {
12309 RETURN(KERN_INVALID_ADDRESS);
12310 }
12311 }
12312
12313 /*
12314 * If the source should be destroyed, do it now, since the
12315 * copy was successful.
12316 */
12317 if (src_destroy) {
12318 vmr_flags_t remove_flags = VM_MAP_REMOVE_NO_FLAGS;
12319
12320 if (src_map == kernel_map) {
12321 remove_flags |= VM_MAP_REMOVE_KUNWIRE;
12322 }
12323 (void)vm_map_remove_and_unlock(src_map,
12324 vm_map_trunc_page(src_addr, VM_MAP_PAGE_MASK(src_map)),
12325 src_end,
12326 remove_flags,
12327 KMEM_GUARD_NONE);
12328 } else {
12329 /* fix up the damage we did in the base map */
12330 vm_map_simplify_range(
12331 src_map,
12332 vm_map_trunc_page(src_addr,
12333 VM_MAP_PAGE_MASK(src_map)),
12334 vm_map_round_page(src_end,
12335 VM_MAP_PAGE_MASK(src_map)));
12336 vm_map_unlock(src_map);
12337 }
12338
12339 tmp_entry = VM_MAP_ENTRY_NULL;
12340
12341 if (VM_MAP_PAGE_SHIFT(src_map) > PAGE_SHIFT &&
12342 VM_MAP_PAGE_SHIFT(src_map) != VM_MAP_COPY_PAGE_SHIFT(copy)) {
12343 vm_map_offset_t original_start, original_offset, original_end;
12344
12345 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
12346
12347 /* adjust alignment of first copy_entry's "vme_start" */
12348 tmp_entry = vm_map_copy_first_entry(copy);
12349 if (tmp_entry != vm_map_copy_to_entry(copy)) {
12350 vm_map_offset_t adjustment;
12351
12352 original_start = tmp_entry->vme_start;
12353 original_offset = VME_OFFSET(tmp_entry);
12354
12355 /* map-align the start of the first copy entry... */
12356 adjustment = (tmp_entry->vme_start -
12357 vm_map_trunc_page(
12358 tmp_entry->vme_start,
12359 VM_MAP_PAGE_MASK(src_map)));
12360 tmp_entry->vme_start -= adjustment;
12361 VME_OFFSET_SET(tmp_entry,
12362 VME_OFFSET(tmp_entry) - adjustment);
12363 copy_addr -= adjustment;
12364 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12365 /* ... adjust for mis-aligned start of copy range */
12366 adjustment =
12367 (vm_map_trunc_page(copy->offset,
12368 PAGE_MASK) -
12369 vm_map_trunc_page(copy->offset,
12370 VM_MAP_PAGE_MASK(src_map)));
12371 if (adjustment) {
12372 assert(page_aligned(adjustment));
12373 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
12374 tmp_entry->vme_start += adjustment;
12375 VME_OFFSET_SET(tmp_entry,
12376 (VME_OFFSET(tmp_entry) +
12377 adjustment));
12378 copy_addr += adjustment;
12379 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12380 }
12381
12382 /*
12383 * Assert that the adjustments haven't exposed
12384 * more than was originally copied...
12385 */
12386 assert(tmp_entry->vme_start >= original_start);
12387 assert(VME_OFFSET(tmp_entry) >= original_offset);
12388 /*
12389 * ... and that it did not adjust outside of a
12390 * a single 16K page.
12391 */
12392 assert(vm_map_trunc_page(tmp_entry->vme_start,
12393 VM_MAP_PAGE_MASK(src_map)) ==
12394 vm_map_trunc_page(original_start,
12395 VM_MAP_PAGE_MASK(src_map)));
12396 }
12397
12398 /* adjust alignment of last copy_entry's "vme_end" */
12399 tmp_entry = vm_map_copy_last_entry(copy);
12400 if (tmp_entry != vm_map_copy_to_entry(copy)) {
12401 vm_map_offset_t adjustment;
12402
12403 original_end = tmp_entry->vme_end;
12404
12405 /* map-align the end of the last copy entry... */
12406 tmp_entry->vme_end =
12407 vm_map_round_page(tmp_entry->vme_end,
12408 VM_MAP_PAGE_MASK(src_map));
12409 /* ... adjust for mis-aligned end of copy range */
12410 adjustment =
12411 (vm_map_round_page((copy->offset +
12412 copy->size),
12413 VM_MAP_PAGE_MASK(src_map)) -
12414 vm_map_round_page((copy->offset +
12415 copy->size),
12416 PAGE_MASK));
12417 if (adjustment) {
12418 assert(page_aligned(adjustment));
12419 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
12420 tmp_entry->vme_end -= adjustment;
12421 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12422 }
12423
12424 /*
12425 * Assert that the adjustments haven't exposed
12426 * more than was originally copied...
12427 */
12428 assert(tmp_entry->vme_end <= original_end);
12429 /*
12430 * ... and that it did not adjust outside of a
12431 * a single 16K page.
12432 */
12433 assert(vm_map_round_page(tmp_entry->vme_end,
12434 VM_MAP_PAGE_MASK(src_map)) ==
12435 vm_map_round_page(original_end,
12436 VM_MAP_PAGE_MASK(src_map)));
12437 }
12438 }
12439
12440 /* Fix-up start and end points in copy. This is necessary */
12441 /* when the various entries in the copy object were picked */
12442 /* up from different sub-maps */
12443
12444 tmp_entry = vm_map_copy_first_entry(copy);
12445 copy_size = 0; /* compute actual size */
12446 while (tmp_entry != vm_map_copy_to_entry(copy)) {
12447 assert(VM_MAP_PAGE_ALIGNED(
12448 copy_addr + (tmp_entry->vme_end -
12449 tmp_entry->vme_start),
12450 MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
12451 assert(VM_MAP_PAGE_ALIGNED(
12452 copy_addr,
12453 MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
12454
12455 /*
12456 * The copy_entries will be injected directly into the
12457 * destination map and might not be "map aligned" there...
12458 */
12459 tmp_entry->map_aligned = FALSE;
12460
12461 tmp_entry->vme_end = copy_addr +
12462 (tmp_entry->vme_end - tmp_entry->vme_start);
12463 tmp_entry->vme_start = copy_addr;
12464 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12465 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
12466 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
12467 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
12468 }
12469
12470 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
12471 copy_size < copy->size) {
12472 /*
12473 * The actual size of the VM map copy is smaller than what
12474 * was requested by the caller. This must be because some
12475 * PAGE_SIZE-sized pages are missing at the end of the last
12476 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
12477 * The caller might not have been aware of those missing
12478 * pages and might not want to be aware of it, which is
12479 * fine as long as they don't try to access (and crash on)
12480 * those missing pages.
12481 * Let's adjust the size of the "copy", to avoid failing
12482 * in vm_map_copyout() or vm_map_copy_overwrite().
12483 */
12484 assert(vm_map_round_page(copy_size,
12485 VM_MAP_PAGE_MASK(src_map)) ==
12486 vm_map_round_page(copy->size,
12487 VM_MAP_PAGE_MASK(src_map)));
12488 copy->size = copy_size;
12489 }
12490
12491 *copy_result = copy;
12492 return KERN_SUCCESS;
12493
12494 #undef RETURN
12495 }
12496
12497 kern_return_t
vm_map_copy_extract(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,boolean_t do_copy,vm_map_copy_t * copy_result,vm_prot_t * cur_prot,vm_prot_t * max_prot,vm_inherit_t inheritance,vm_map_kernel_flags_t vmk_flags)12498 vm_map_copy_extract(
12499 vm_map_t src_map,
12500 vm_map_address_t src_addr,
12501 vm_map_size_t len,
12502 boolean_t do_copy,
12503 vm_map_copy_t *copy_result, /* OUT */
12504 vm_prot_t *cur_prot, /* IN/OUT */
12505 vm_prot_t *max_prot, /* IN/OUT */
12506 vm_inherit_t inheritance,
12507 vm_map_kernel_flags_t vmk_flags)
12508 {
12509 vm_map_copy_t copy;
12510 kern_return_t kr;
12511 vm_prot_t required_cur_prot, required_max_prot;
12512
12513 /*
12514 * Check for copies of zero bytes.
12515 */
12516
12517 if (len == 0) {
12518 *copy_result = VM_MAP_COPY_NULL;
12519 return KERN_SUCCESS;
12520 }
12521
12522 /*
12523 * Check that the end address doesn't overflow
12524 */
12525 if (src_addr + len < src_addr) {
12526 return KERN_INVALID_ADDRESS;
12527 }
12528
12529 if (VM_MAP_PAGE_SIZE(src_map) < PAGE_SIZE) {
12530 DEBUG4K_SHARE("src_map %p src_addr 0x%llx src_end 0x%llx\n", src_map, (uint64_t)src_addr, (uint64_t)(src_addr + len));
12531 }
12532
12533 required_cur_prot = *cur_prot;
12534 required_max_prot = *max_prot;
12535
12536 /*
12537 * Allocate a header element for the list.
12538 *
12539 * Use the start and end in the header to
12540 * remember the endpoints prior to rounding.
12541 */
12542
12543 copy = vm_map_copy_allocate();
12544 copy->type = VM_MAP_COPY_ENTRY_LIST;
12545 copy->cpy_hdr.entries_pageable = vmk_flags.vmkf_copy_pageable;
12546
12547 vm_map_store_init(©->cpy_hdr);
12548
12549 copy->offset = 0;
12550 copy->size = len;
12551
12552 kr = vm_map_remap_extract(src_map,
12553 src_addr,
12554 len,
12555 do_copy, /* copy */
12556 ©->cpy_hdr,
12557 cur_prot, /* IN/OUT */
12558 max_prot, /* IN/OUT */
12559 inheritance,
12560 vmk_flags);
12561 if (kr != KERN_SUCCESS) {
12562 vm_map_copy_discard(copy);
12563 return kr;
12564 }
12565 if (required_cur_prot != VM_PROT_NONE) {
12566 assert((*cur_prot & required_cur_prot) == required_cur_prot);
12567 assert((*max_prot & required_max_prot) == required_max_prot);
12568 }
12569
12570 *copy_result = copy;
12571 return KERN_SUCCESS;
12572 }
12573
12574 /*
12575 * vm_map_copyin_object:
12576 *
12577 * Create a copy object from an object.
12578 * Our caller donates an object reference.
12579 */
12580
12581 kern_return_t
vm_map_copyin_object(vm_object_t object,vm_object_offset_t offset,vm_object_size_t size,vm_map_copy_t * copy_result)12582 vm_map_copyin_object(
12583 vm_object_t object,
12584 vm_object_offset_t offset, /* offset of region in object */
12585 vm_object_size_t size, /* size of region in object */
12586 vm_map_copy_t *copy_result) /* OUT */
12587 {
12588 vm_map_copy_t copy; /* Resulting copy */
12589
12590 /*
12591 * We drop the object into a special copy object
12592 * that contains the object directly.
12593 */
12594
12595 copy = vm_map_copy_allocate();
12596 copy->type = VM_MAP_COPY_OBJECT;
12597 copy->cpy_object = object;
12598 copy->offset = offset;
12599 copy->size = size;
12600
12601 *copy_result = copy;
12602 return KERN_SUCCESS;
12603 }
12604
12605 static void
vm_map_fork_share(vm_map_t old_map,vm_map_entry_t old_entry,vm_map_t new_map)12606 vm_map_fork_share(
12607 vm_map_t old_map,
12608 vm_map_entry_t old_entry,
12609 vm_map_t new_map)
12610 {
12611 vm_object_t object;
12612 vm_map_entry_t new_entry;
12613
12614 /*
12615 * New sharing code. New map entry
12616 * references original object. Internal
12617 * objects use asynchronous copy algorithm for
12618 * future copies. First make sure we have
12619 * the right object. If we need a shadow,
12620 * or someone else already has one, then
12621 * make a new shadow and share it.
12622 */
12623
12624 if (!old_entry->is_sub_map) {
12625 object = VME_OBJECT(old_entry);
12626 }
12627
12628 if (old_entry->is_sub_map) {
12629 assert(old_entry->wired_count == 0);
12630 #ifndef NO_NESTED_PMAP
12631 #if !PMAP_FORK_NEST
12632 if (old_entry->use_pmap) {
12633 kern_return_t result;
12634
12635 result = pmap_nest(new_map->pmap,
12636 (VME_SUBMAP(old_entry))->pmap,
12637 (addr64_t)old_entry->vme_start,
12638 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
12639 if (result) {
12640 panic("vm_map_fork_share: pmap_nest failed!");
12641 }
12642 }
12643 #endif /* !PMAP_FORK_NEST */
12644 #endif /* NO_NESTED_PMAP */
12645 } else if (object == VM_OBJECT_NULL) {
12646 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
12647 old_entry->vme_start));
12648 VME_OFFSET_SET(old_entry, 0);
12649 VME_OBJECT_SET(old_entry, object, false, 0);
12650 old_entry->use_pmap = TRUE;
12651 // assert(!old_entry->needs_copy);
12652 } else if (object->copy_strategy !=
12653 MEMORY_OBJECT_COPY_SYMMETRIC) {
12654 /*
12655 * We are already using an asymmetric
12656 * copy, and therefore we already have
12657 * the right object.
12658 */
12659
12660 assert(!old_entry->needs_copy);
12661 } else if (old_entry->needs_copy || /* case 1 */
12662 object->shadowed || /* case 2 */
12663 (!object->true_share && /* case 3 */
12664 !old_entry->is_shared &&
12665 (object->vo_size >
12666 (vm_map_size_t)(old_entry->vme_end -
12667 old_entry->vme_start)))) {
12668 /*
12669 * We need to create a shadow.
12670 * There are three cases here.
12671 * In the first case, we need to
12672 * complete a deferred symmetrical
12673 * copy that we participated in.
12674 * In the second and third cases,
12675 * we need to create the shadow so
12676 * that changes that we make to the
12677 * object do not interfere with
12678 * any symmetrical copies which
12679 * have occured (case 2) or which
12680 * might occur (case 3).
12681 *
12682 * The first case is when we had
12683 * deferred shadow object creation
12684 * via the entry->needs_copy mechanism.
12685 * This mechanism only works when
12686 * only one entry points to the source
12687 * object, and we are about to create
12688 * a second entry pointing to the
12689 * same object. The problem is that
12690 * there is no way of mapping from
12691 * an object to the entries pointing
12692 * to it. (Deferred shadow creation
12693 * works with one entry because occurs
12694 * at fault time, and we walk from the
12695 * entry to the object when handling
12696 * the fault.)
12697 *
12698 * The second case is when the object
12699 * to be shared has already been copied
12700 * with a symmetric copy, but we point
12701 * directly to the object without
12702 * needs_copy set in our entry. (This
12703 * can happen because different ranges
12704 * of an object can be pointed to by
12705 * different entries. In particular,
12706 * a single entry pointing to an object
12707 * can be split by a call to vm_inherit,
12708 * which, combined with task_create, can
12709 * result in the different entries
12710 * having different needs_copy values.)
12711 * The shadowed flag in the object allows
12712 * us to detect this case. The problem
12713 * with this case is that if this object
12714 * has or will have shadows, then we
12715 * must not perform an asymmetric copy
12716 * of this object, since such a copy
12717 * allows the object to be changed, which
12718 * will break the previous symmetrical
12719 * copies (which rely upon the object
12720 * not changing). In a sense, the shadowed
12721 * flag says "don't change this object".
12722 * We fix this by creating a shadow
12723 * object for this object, and sharing
12724 * that. This works because we are free
12725 * to change the shadow object (and thus
12726 * to use an asymmetric copy strategy);
12727 * this is also semantically correct,
12728 * since this object is temporary, and
12729 * therefore a copy of the object is
12730 * as good as the object itself. (This
12731 * is not true for permanent objects,
12732 * since the pager needs to see changes,
12733 * which won't happen if the changes
12734 * are made to a copy.)
12735 *
12736 * The third case is when the object
12737 * to be shared has parts sticking
12738 * outside of the entry we're working
12739 * with, and thus may in the future
12740 * be subject to a symmetrical copy.
12741 * (This is a preemptive version of
12742 * case 2.)
12743 */
12744 VME_OBJECT_SHADOW(old_entry,
12745 (vm_map_size_t) (old_entry->vme_end -
12746 old_entry->vme_start),
12747 vm_map_always_shadow(old_map));
12748
12749 /*
12750 * If we're making a shadow for other than
12751 * copy on write reasons, then we have
12752 * to remove write permission.
12753 */
12754
12755 if (!old_entry->needs_copy &&
12756 (old_entry->protection & VM_PROT_WRITE)) {
12757 vm_prot_t prot;
12758
12759 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
12760
12761 prot = old_entry->protection & ~VM_PROT_WRITE;
12762
12763 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
12764
12765 if (override_nx(old_map, VME_ALIAS(old_entry)) && prot) {
12766 prot |= VM_PROT_EXECUTE;
12767 }
12768
12769
12770 if (old_map->mapped_in_other_pmaps) {
12771 vm_object_pmap_protect(
12772 VME_OBJECT(old_entry),
12773 VME_OFFSET(old_entry),
12774 (old_entry->vme_end -
12775 old_entry->vme_start),
12776 PMAP_NULL,
12777 PAGE_SIZE,
12778 old_entry->vme_start,
12779 prot);
12780 } else {
12781 pmap_protect(old_map->pmap,
12782 old_entry->vme_start,
12783 old_entry->vme_end,
12784 prot);
12785 }
12786 }
12787
12788 old_entry->needs_copy = FALSE;
12789 object = VME_OBJECT(old_entry);
12790 }
12791
12792
12793 /*
12794 * If object was using a symmetric copy strategy,
12795 * change its copy strategy to the default
12796 * asymmetric copy strategy, which is copy_delay
12797 * in the non-norma case and copy_call in the
12798 * norma case. Bump the reference count for the
12799 * new entry.
12800 */
12801
12802 if (old_entry->is_sub_map) {
12803 vm_map_reference(VME_SUBMAP(old_entry));
12804 } else {
12805 vm_object_lock(object);
12806 vm_object_reference_locked(object);
12807 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
12808 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
12809 }
12810 vm_object_unlock(object);
12811 }
12812
12813 /*
12814 * Clone the entry, using object ref from above.
12815 * Mark both entries as shared.
12816 */
12817
12818 new_entry = vm_map_entry_create(new_map); /* Never the kernel map or descendants */
12819 vm_map_entry_copy(old_map, new_entry, old_entry);
12820 old_entry->is_shared = TRUE;
12821 new_entry->is_shared = TRUE;
12822
12823 /*
12824 * We're dealing with a shared mapping, so the resulting mapping
12825 * should inherit some of the original mapping's accounting settings.
12826 * "iokit_acct" should have been cleared in vm_map_entry_copy().
12827 * "use_pmap" should stay the same as before (if it hasn't been reset
12828 * to TRUE when we cleared "iokit_acct").
12829 */
12830 assert(!new_entry->iokit_acct);
12831
12832 /*
12833 * If old entry's inheritence is VM_INHERIT_NONE,
12834 * the new entry is for corpse fork, remove the
12835 * write permission from the new entry.
12836 */
12837 if (old_entry->inheritance == VM_INHERIT_NONE) {
12838 new_entry->protection &= ~VM_PROT_WRITE;
12839 new_entry->max_protection &= ~VM_PROT_WRITE;
12840 }
12841
12842 /*
12843 * Insert the entry into the new map -- we
12844 * know we're inserting at the end of the new
12845 * map.
12846 */
12847
12848 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry,
12849 VM_MAP_KERNEL_FLAGS_NONE);
12850
12851 /*
12852 * Update the physical map
12853 */
12854
12855 if (old_entry->is_sub_map) {
12856 /* Bill Angell pmap support goes here */
12857 } else {
12858 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
12859 old_entry->vme_end - old_entry->vme_start,
12860 old_entry->vme_start);
12861 }
12862 }
12863
12864 static boolean_t
vm_map_fork_copy(vm_map_t old_map,vm_map_entry_t * old_entry_p,vm_map_t new_map,int vm_map_copyin_flags)12865 vm_map_fork_copy(
12866 vm_map_t old_map,
12867 vm_map_entry_t *old_entry_p,
12868 vm_map_t new_map,
12869 int vm_map_copyin_flags)
12870 {
12871 vm_map_entry_t old_entry = *old_entry_p;
12872 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
12873 vm_map_offset_t start = old_entry->vme_start;
12874 vm_map_copy_t copy;
12875 vm_map_entry_t last = vm_map_last_entry(new_map);
12876
12877 vm_map_unlock(old_map);
12878 /*
12879 * Use maxprot version of copyin because we
12880 * care about whether this memory can ever
12881 * be accessed, not just whether it's accessible
12882 * right now.
12883 */
12884 vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
12885 if (vm_map_copyin_internal(old_map, start, entry_size,
12886 vm_map_copyin_flags, ©)
12887 != KERN_SUCCESS) {
12888 /*
12889 * The map might have changed while it
12890 * was unlocked, check it again. Skip
12891 * any blank space or permanently
12892 * unreadable region.
12893 */
12894 vm_map_lock(old_map);
12895 if (!vm_map_lookup_entry(old_map, start, &last) ||
12896 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
12897 last = last->vme_next;
12898 }
12899 *old_entry_p = last;
12900
12901 /*
12902 * XXX For some error returns, want to
12903 * XXX skip to the next element. Note
12904 * that INVALID_ADDRESS and
12905 * PROTECTION_FAILURE are handled above.
12906 */
12907
12908 return FALSE;
12909 }
12910
12911 /*
12912 * Assert that the vm_map_copy is coming from the right
12913 * zone and hasn't been forged
12914 */
12915 vm_map_copy_require(copy);
12916
12917 /*
12918 * Insert the copy into the new map
12919 */
12920 vm_map_copy_insert(new_map, last, copy);
12921
12922 /*
12923 * Pick up the traversal at the end of
12924 * the copied region.
12925 */
12926
12927 vm_map_lock(old_map);
12928 start += entry_size;
12929 if (!vm_map_lookup_entry(old_map, start, &last)) {
12930 last = last->vme_next;
12931 } else {
12932 if (last->vme_start == start) {
12933 /*
12934 * No need to clip here and we don't
12935 * want to cause any unnecessary
12936 * unnesting...
12937 */
12938 } else {
12939 vm_map_clip_start(old_map, last, start);
12940 }
12941 }
12942 *old_entry_p = last;
12943
12944 return TRUE;
12945 }
12946
12947 #if PMAP_FORK_NEST
12948 #define PMAP_FORK_NEST_DEBUG 0
12949 static inline void
vm_map_fork_unnest(pmap_t new_pmap,vm_map_offset_t pre_nested_start,vm_map_offset_t pre_nested_end,vm_map_offset_t start,vm_map_offset_t end)12950 vm_map_fork_unnest(
12951 pmap_t new_pmap,
12952 vm_map_offset_t pre_nested_start,
12953 vm_map_offset_t pre_nested_end,
12954 vm_map_offset_t start,
12955 vm_map_offset_t end)
12956 {
12957 kern_return_t kr;
12958 vm_map_offset_t nesting_mask, start_unnest, end_unnest;
12959
12960 assertf(pre_nested_start <= pre_nested_end,
12961 "pre_nested start 0x%llx end 0x%llx",
12962 (uint64_t)pre_nested_start, (uint64_t)pre_nested_end);
12963 assertf(start <= end,
12964 "start 0x%llx end 0x%llx",
12965 (uint64_t) start, (uint64_t)end);
12966
12967 if (pre_nested_start == pre_nested_end) {
12968 /* nothing was pre-nested: done */
12969 return;
12970 }
12971 if (end <= pre_nested_start) {
12972 /* fully before pre-nested range: done */
12973 return;
12974 }
12975 if (start >= pre_nested_end) {
12976 /* fully after pre-nested range: done */
12977 return;
12978 }
12979 /* ignore parts of range outside of pre_nested range */
12980 if (start < pre_nested_start) {
12981 start = pre_nested_start;
12982 }
12983 if (end > pre_nested_end) {
12984 end = pre_nested_end;
12985 }
12986 nesting_mask = pmap_shared_region_size_min(new_pmap) - 1;
12987 start_unnest = start & ~nesting_mask;
12988 end_unnest = (end + nesting_mask) & ~nesting_mask;
12989 kr = pmap_unnest(new_pmap,
12990 (addr64_t)start_unnest,
12991 (uint64_t)(end_unnest - start_unnest));
12992 #if PMAP_FORK_NEST_DEBUG
12993 printf("PMAP_FORK_NEST %s:%d new_pmap %p 0x%llx:0x%llx -> pmap_unnest 0x%llx:0x%llx kr 0x%x\n", __FUNCTION__, __LINE__, new_pmap, (uint64_t)start, (uint64_t)end, (uint64_t)start_unnest, (uint64_t)end_unnest, kr);
12994 #endif /* PMAP_FORK_NEST_DEBUG */
12995 assertf(kr == KERN_SUCCESS,
12996 "0x%llx 0x%llx pmap_unnest(%p, 0x%llx, 0x%llx) -> 0x%x",
12997 (uint64_t)start, (uint64_t)end, new_pmap,
12998 (uint64_t)start_unnest, (uint64_t)(end_unnest - start_unnest),
12999 kr);
13000 }
13001 #endif /* PMAP_FORK_NEST */
13002
13003 /*
13004 * vm_map_fork:
13005 *
13006 * Create and return a new map based on the old
13007 * map, according to the inheritance values on the
13008 * regions in that map and the options.
13009 *
13010 * The source map must not be locked.
13011 */
13012 vm_map_t
vm_map_fork(ledger_t ledger,vm_map_t old_map,int options)13013 vm_map_fork(
13014 ledger_t ledger,
13015 vm_map_t old_map,
13016 int options)
13017 {
13018 pmap_t new_pmap;
13019 vm_map_t new_map;
13020 vm_map_entry_t old_entry;
13021 vm_map_size_t new_size = 0, entry_size;
13022 vm_map_entry_t new_entry;
13023 boolean_t src_needs_copy;
13024 boolean_t new_entry_needs_copy;
13025 boolean_t pmap_is64bit;
13026 int vm_map_copyin_flags;
13027 vm_inherit_t old_entry_inheritance;
13028 int map_create_options;
13029 kern_return_t footprint_collect_kr;
13030
13031 if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
13032 VM_MAP_FORK_PRESERVE_PURGEABLE |
13033 VM_MAP_FORK_CORPSE_FOOTPRINT)) {
13034 /* unsupported option */
13035 return VM_MAP_NULL;
13036 }
13037
13038 pmap_is64bit =
13039 #if defined(__i386__) || defined(__x86_64__)
13040 old_map->pmap->pm_task_map != TASK_MAP_32BIT;
13041 #elif defined(__arm64__)
13042 old_map->pmap->is_64bit;
13043 #else
13044 #error Unknown architecture.
13045 #endif
13046
13047 unsigned int pmap_flags = 0;
13048 pmap_flags |= pmap_is64bit ? PMAP_CREATE_64BIT : 0;
13049 #if defined(HAS_APPLE_PAC)
13050 pmap_flags |= old_map->pmap->disable_jop ? PMAP_CREATE_DISABLE_JOP : 0;
13051 #endif
13052 #if CONFIG_ROSETTA
13053 pmap_flags |= old_map->pmap->is_rosetta ? PMAP_CREATE_ROSETTA : 0;
13054 #endif
13055 #if PMAP_CREATE_FORCE_4K_PAGES
13056 if (VM_MAP_PAGE_SIZE(old_map) == FOURK_PAGE_SIZE &&
13057 PAGE_SIZE != FOURK_PAGE_SIZE) {
13058 pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
13059 }
13060 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
13061 new_pmap = pmap_create_options(ledger, (vm_map_size_t) 0, pmap_flags);
13062 if (new_pmap == NULL) {
13063 return VM_MAP_NULL;
13064 }
13065
13066 vm_map_reference(old_map);
13067 vm_map_lock(old_map);
13068
13069 map_create_options = 0;
13070 if (old_map->hdr.entries_pageable) {
13071 map_create_options |= VM_MAP_CREATE_PAGEABLE;
13072 }
13073 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
13074 map_create_options |= VM_MAP_CREATE_CORPSE_FOOTPRINT;
13075 footprint_collect_kr = KERN_SUCCESS;
13076 }
13077 new_map = vm_map_create_options(new_pmap,
13078 old_map->min_offset,
13079 old_map->max_offset,
13080 map_create_options);
13081 /* inherit cs_enforcement */
13082 vm_map_cs_enforcement_set(new_map, old_map->cs_enforcement);
13083 vm_map_lock(new_map);
13084 vm_commit_pagezero_status(new_map);
13085 /* inherit the parent map's page size */
13086 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
13087
13088 #if CONFIG_MAP_RANGES
13089 /* inherit the parent map's VM ranges */
13090 vm_map_range_fork(new_map, old_map);
13091 #endif
13092 /* ensure PMAP_CS structures are prepared for the fork */
13093 pmap_cs_fork_prepare(old_map->pmap, new_pmap);
13094
13095 #if PMAP_FORK_NEST
13096 /*
13097 * Pre-nest the shared region's pmap.
13098 */
13099 vm_map_offset_t pre_nested_start = 0, pre_nested_end = 0;
13100 pmap_fork_nest(old_map->pmap, new_pmap,
13101 &pre_nested_start, &pre_nested_end);
13102 #if PMAP_FORK_NEST_DEBUG
13103 printf("PMAP_FORK_NEST %s:%d old %p new %p pre_nested start 0x%llx end 0x%llx\n", __FUNCTION__, __LINE__, old_map->pmap, new_pmap, (uint64_t)pre_nested_start, (uint64_t)pre_nested_end);
13104 #endif /* PMAP_FORK_NEST_DEBUG */
13105 #endif /* PMAP_FORK_NEST */
13106
13107 for (old_entry = vm_map_first_entry(old_map); old_entry != vm_map_to_entry(old_map);) {
13108 /*
13109 * Abort any corpse collection if the system is shutting down.
13110 */
13111 if ((options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
13112 get_system_inshutdown()) {
13113 #if PMAP_FORK_NEST
13114 new_entry = vm_map_last_entry(new_map);
13115 if (new_entry == vm_map_to_entry(new_map)) {
13116 /* unnest all that was pre-nested */
13117 vm_map_fork_unnest(new_pmap,
13118 pre_nested_start, pre_nested_end,
13119 vm_map_min(new_map), vm_map_max(new_map));
13120 } else if (new_entry->vme_end < vm_map_max(new_map)) {
13121 /* unnest hole at the end, if pre-nested */
13122 vm_map_fork_unnest(new_pmap,
13123 pre_nested_start, pre_nested_end,
13124 new_entry->vme_end, vm_map_max(new_map));
13125 }
13126 #endif /* PMAP_FORK_NEST */
13127 vm_map_corpse_footprint_collect_done(new_map);
13128 vm_map_unlock(new_map);
13129 vm_map_unlock(old_map);
13130 vm_map_deallocate(new_map);
13131 vm_map_deallocate(old_map);
13132 printf("Aborting corpse map due to system shutdown\n");
13133 return VM_MAP_NULL;
13134 }
13135
13136 entry_size = old_entry->vme_end - old_entry->vme_start;
13137
13138 #if PMAP_FORK_NEST
13139 /*
13140 * Undo any unnecessary pre-nesting.
13141 */
13142 vm_map_offset_t prev_end;
13143 if (old_entry == vm_map_first_entry(old_map)) {
13144 prev_end = vm_map_min(old_map);
13145 } else {
13146 prev_end = old_entry->vme_prev->vme_end;
13147 }
13148 if (prev_end < old_entry->vme_start) {
13149 /* unnest hole before this entry, if pre-nested */
13150 vm_map_fork_unnest(new_pmap,
13151 pre_nested_start, pre_nested_end,
13152 prev_end, old_entry->vme_start);
13153 }
13154 if (old_entry->is_sub_map && old_entry->use_pmap) {
13155 /* keep this entry nested in the child */
13156 #if PMAP_FORK_NEST_DEBUG
13157 printf("PMAP_FORK_NEST %s:%d new_pmap %p keeping 0x%llx:0x%llx nested\n", __FUNCTION__, __LINE__, new_pmap, (uint64_t)old_entry->vme_start, (uint64_t)old_entry->vme_end);
13158 #endif /* PMAP_FORK_NEST_DEBUG */
13159 } else {
13160 /* undo nesting for this entry, if pre-nested */
13161 vm_map_fork_unnest(new_pmap,
13162 pre_nested_start, pre_nested_end,
13163 old_entry->vme_start, old_entry->vme_end);
13164 }
13165 #endif /* PMAP_FORK_NEST */
13166
13167 old_entry_inheritance = old_entry->inheritance;
13168 /*
13169 * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
13170 * share VM_INHERIT_NONE entries that are not backed by a
13171 * device pager.
13172 */
13173 if (old_entry_inheritance == VM_INHERIT_NONE &&
13174 (options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE) &&
13175 (old_entry->protection & VM_PROT_READ) &&
13176 !(!old_entry->is_sub_map &&
13177 VME_OBJECT(old_entry) != NULL &&
13178 VME_OBJECT(old_entry)->pager != NULL &&
13179 is_device_pager_ops(
13180 VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
13181 old_entry_inheritance = VM_INHERIT_SHARE;
13182 }
13183
13184 if (old_entry_inheritance != VM_INHERIT_NONE &&
13185 (options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
13186 footprint_collect_kr == KERN_SUCCESS) {
13187 /*
13188 * The corpse won't have old_map->pmap to query
13189 * footprint information, so collect that data now
13190 * and store it in new_map->vmmap_corpse_footprint
13191 * for later autopsy.
13192 */
13193 footprint_collect_kr =
13194 vm_map_corpse_footprint_collect(old_map,
13195 old_entry,
13196 new_map);
13197 }
13198
13199 switch (old_entry_inheritance) {
13200 case VM_INHERIT_NONE:
13201 break;
13202
13203 case VM_INHERIT_SHARE:
13204 vm_map_fork_share(old_map, old_entry, new_map);
13205 new_size += entry_size;
13206 break;
13207
13208 case VM_INHERIT_COPY:
13209
13210 /*
13211 * Inline the copy_quickly case;
13212 * upon failure, fall back on call
13213 * to vm_map_fork_copy.
13214 */
13215
13216 if (old_entry->is_sub_map) {
13217 break;
13218 }
13219 if ((old_entry->wired_count != 0) ||
13220 ((VME_OBJECT(old_entry) != NULL) &&
13221 (VME_OBJECT(old_entry)->true_share))) {
13222 goto slow_vm_map_fork_copy;
13223 }
13224
13225 new_entry = vm_map_entry_create(new_map); /* never the kernel map or descendants */
13226 vm_map_entry_copy(old_map, new_entry, old_entry);
13227 if (old_entry->vme_permanent) {
13228 /* inherit "permanent" on fork() */
13229 new_entry->vme_permanent = TRUE;
13230 }
13231
13232 if (new_entry->used_for_jit == TRUE && new_map->jit_entry_exists == FALSE) {
13233 new_map->jit_entry_exists = TRUE;
13234 }
13235
13236 if (new_entry->is_sub_map) {
13237 /* clear address space specifics */
13238 new_entry->use_pmap = FALSE;
13239 } else {
13240 /*
13241 * We're dealing with a copy-on-write operation,
13242 * so the resulting mapping should not inherit
13243 * the original mapping's accounting settings.
13244 * "iokit_acct" should have been cleared in
13245 * vm_map_entry_copy().
13246 * "use_pmap" should be reset to its default
13247 * (TRUE) so that the new mapping gets
13248 * accounted for in the task's memory footprint.
13249 */
13250 assert(!new_entry->iokit_acct);
13251 new_entry->use_pmap = TRUE;
13252 }
13253
13254 if (!vm_object_copy_quickly(
13255 VME_OBJECT(new_entry),
13256 VME_OFFSET(old_entry),
13257 (old_entry->vme_end -
13258 old_entry->vme_start),
13259 &src_needs_copy,
13260 &new_entry_needs_copy)) {
13261 vm_map_entry_dispose(new_entry);
13262 goto slow_vm_map_fork_copy;
13263 }
13264
13265 /*
13266 * Handle copy-on-write obligations
13267 */
13268
13269 if (src_needs_copy && !old_entry->needs_copy) {
13270 vm_prot_t prot;
13271
13272 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
13273
13274 prot = old_entry->protection & ~VM_PROT_WRITE;
13275
13276 if (override_nx(old_map, VME_ALIAS(old_entry))
13277 && prot) {
13278 prot |= VM_PROT_EXECUTE;
13279 }
13280
13281 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
13282
13283 vm_object_pmap_protect(
13284 VME_OBJECT(old_entry),
13285 VME_OFFSET(old_entry),
13286 (old_entry->vme_end -
13287 old_entry->vme_start),
13288 ((old_entry->is_shared
13289 || old_map->mapped_in_other_pmaps)
13290 ? PMAP_NULL :
13291 old_map->pmap),
13292 VM_MAP_PAGE_SIZE(old_map),
13293 old_entry->vme_start,
13294 prot);
13295
13296 assert(old_entry->wired_count == 0);
13297 old_entry->needs_copy = TRUE;
13298 }
13299 new_entry->needs_copy = new_entry_needs_copy;
13300
13301 /*
13302 * Insert the entry at the end
13303 * of the map.
13304 */
13305
13306 vm_map_store_entry_link(new_map,
13307 vm_map_last_entry(new_map),
13308 new_entry,
13309 VM_MAP_KERNEL_FLAGS_NONE);
13310 new_size += entry_size;
13311 break;
13312
13313 slow_vm_map_fork_copy:
13314 vm_map_copyin_flags = 0;
13315 if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
13316 vm_map_copyin_flags |=
13317 VM_MAP_COPYIN_PRESERVE_PURGEABLE;
13318 }
13319 if (vm_map_fork_copy(old_map,
13320 &old_entry,
13321 new_map,
13322 vm_map_copyin_flags)) {
13323 new_size += entry_size;
13324 }
13325 continue;
13326 }
13327 old_entry = old_entry->vme_next;
13328 }
13329
13330 #if PMAP_FORK_NEST
13331 new_entry = vm_map_last_entry(new_map);
13332 if (new_entry == vm_map_to_entry(new_map)) {
13333 /* unnest all that was pre-nested */
13334 vm_map_fork_unnest(new_pmap,
13335 pre_nested_start, pre_nested_end,
13336 vm_map_min(new_map), vm_map_max(new_map));
13337 } else if (new_entry->vme_end < vm_map_max(new_map)) {
13338 /* unnest hole at the end, if pre-nested */
13339 vm_map_fork_unnest(new_pmap,
13340 pre_nested_start, pre_nested_end,
13341 new_entry->vme_end, vm_map_max(new_map));
13342 }
13343 #endif /* PMAP_FORK_NEST */
13344
13345 #if defined(__arm64__)
13346 pmap_insert_sharedpage(new_map->pmap);
13347 #endif /* __arm64__ */
13348
13349 new_map->size = new_size;
13350
13351 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
13352 vm_map_corpse_footprint_collect_done(new_map);
13353 }
13354
13355 /* Propagate JIT entitlement for the pmap layer. */
13356 if (pmap_get_jit_entitled(old_map->pmap)) {
13357 /* Tell the pmap that it supports JIT. */
13358 pmap_set_jit_entitled(new_map->pmap);
13359 }
13360
13361 vm_map_unlock(new_map);
13362 vm_map_unlock(old_map);
13363 vm_map_deallocate(old_map);
13364
13365 return new_map;
13366 }
13367
13368 /*
13369 * vm_map_exec:
13370 *
13371 * Setup the "new_map" with the proper execution environment according
13372 * to the type of executable (platform, 64bit, chroot environment).
13373 * Map the comm page and shared region, etc...
13374 */
13375 kern_return_t
vm_map_exec(vm_map_t new_map,task_t task,boolean_t is64bit,void * fsroot,cpu_type_t cpu,cpu_subtype_t cpu_subtype,boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)13376 vm_map_exec(
13377 vm_map_t new_map,
13378 task_t task,
13379 boolean_t is64bit,
13380 void *fsroot,
13381 cpu_type_t cpu,
13382 cpu_subtype_t cpu_subtype,
13383 boolean_t reslide,
13384 boolean_t is_driverkit,
13385 uint32_t rsr_version)
13386 {
13387 SHARED_REGION_TRACE_DEBUG(
13388 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
13389 (void *)VM_KERNEL_ADDRPERM(current_task()),
13390 (void *)VM_KERNEL_ADDRPERM(new_map),
13391 (void *)VM_KERNEL_ADDRPERM(task),
13392 (void *)VM_KERNEL_ADDRPERM(fsroot),
13393 cpu,
13394 cpu_subtype));
13395 (void) vm_commpage_enter(new_map, task, is64bit);
13396
13397 (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu, cpu_subtype, reslide, is_driverkit, rsr_version);
13398
13399 SHARED_REGION_TRACE_DEBUG(
13400 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
13401 (void *)VM_KERNEL_ADDRPERM(current_task()),
13402 (void *)VM_KERNEL_ADDRPERM(new_map),
13403 (void *)VM_KERNEL_ADDRPERM(task),
13404 (void *)VM_KERNEL_ADDRPERM(fsroot),
13405 cpu,
13406 cpu_subtype));
13407
13408 /*
13409 * Some devices have region(s) of memory that shouldn't get allocated by
13410 * user processes. The following code creates dummy vm_map_entry_t's for each
13411 * of the regions that needs to be reserved to prevent any allocations in
13412 * those regions.
13413 */
13414 kern_return_t kr = KERN_FAILURE;
13415 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
13416 vmk_flags.vmkf_permanent = TRUE;
13417 vmk_flags.vmkf_beyond_max = TRUE;
13418
13419 struct vm_reserved_region *regions = NULL;
13420 size_t num_regions = ml_get_vm_reserved_regions(is64bit, ®ions);
13421 assert((num_regions == 0) || (num_regions > 0 && regions != NULL));
13422
13423 for (size_t i = 0; i < num_regions; ++i) {
13424 kr = vm_map_enter(
13425 new_map,
13426 ®ions[i].vmrr_addr,
13427 regions[i].vmrr_size,
13428 (vm_map_offset_t)0,
13429 VM_FLAGS_FIXED,
13430 vmk_flags,
13431 VM_KERN_MEMORY_NONE,
13432 VM_OBJECT_NULL,
13433 (vm_object_offset_t)0,
13434 FALSE,
13435 VM_PROT_NONE,
13436 VM_PROT_NONE,
13437 VM_INHERIT_COPY);
13438
13439 if (kr != KERN_SUCCESS) {
13440 panic("Failed to reserve %s region in user map %p %d", regions[i].vmrr_name, new_map, kr);
13441 }
13442 }
13443
13444 new_map->reserved_regions = (num_regions ? TRUE : FALSE);
13445
13446 return KERN_SUCCESS;
13447 }
13448
13449 uint64_t vm_map_lookup_and_lock_object_copy_slowly_count = 0;
13450 uint64_t vm_map_lookup_and_lock_object_copy_slowly_size = 0;
13451 uint64_t vm_map_lookup_and_lock_object_copy_slowly_max = 0;
13452 uint64_t vm_map_lookup_and_lock_object_copy_slowly_restart = 0;
13453 uint64_t vm_map_lookup_and_lock_object_copy_slowly_error = 0;
13454 uint64_t vm_map_lookup_and_lock_object_copy_strategically_count = 0;
13455 uint64_t vm_map_lookup_and_lock_object_copy_strategically_size = 0;
13456 uint64_t vm_map_lookup_and_lock_object_copy_strategically_max = 0;
13457 uint64_t vm_map_lookup_and_lock_object_copy_strategically_restart = 0;
13458 uint64_t vm_map_lookup_and_lock_object_copy_strategically_error = 0;
13459 uint64_t vm_map_lookup_and_lock_object_copy_shadow_count = 0;
13460 uint64_t vm_map_lookup_and_lock_object_copy_shadow_size = 0;
13461 uint64_t vm_map_lookup_and_lock_object_copy_shadow_max = 0;
13462 /*
13463 * vm_map_lookup_and_lock_object:
13464 *
13465 * Finds the VM object, offset, and
13466 * protection for a given virtual address in the
13467 * specified map, assuming a page fault of the
13468 * type specified.
13469 *
13470 * Returns the (object, offset, protection) for
13471 * this address, whether it is wired down, and whether
13472 * this map has the only reference to the data in question.
13473 * In order to later verify this lookup, a "version"
13474 * is returned.
13475 * If contended != NULL, *contended will be set to
13476 * true iff the thread had to spin or block to acquire
13477 * an exclusive lock.
13478 *
13479 * The map MUST be locked by the caller and WILL be
13480 * locked on exit. In order to guarantee the
13481 * existence of the returned object, it is returned
13482 * locked.
13483 *
13484 * If a lookup is requested with "write protection"
13485 * specified, the map may be changed to perform virtual
13486 * copying operations, although the data referenced will
13487 * remain the same.
13488 */
13489 kern_return_t
vm_map_lookup_and_lock_object(vm_map_t * var_map,vm_map_offset_t vaddr,vm_prot_t fault_type,int object_lock_type,vm_map_version_t * out_version,vm_object_t * object,vm_object_offset_t * offset,vm_prot_t * out_prot,boolean_t * wired,vm_object_fault_info_t fault_info,vm_map_t * real_map,bool * contended)13490 vm_map_lookup_and_lock_object(
13491 vm_map_t *var_map, /* IN/OUT */
13492 vm_map_offset_t vaddr,
13493 vm_prot_t fault_type,
13494 int object_lock_type,
13495 vm_map_version_t *out_version, /* OUT */
13496 vm_object_t *object, /* OUT */
13497 vm_object_offset_t *offset, /* OUT */
13498 vm_prot_t *out_prot, /* OUT */
13499 boolean_t *wired, /* OUT */
13500 vm_object_fault_info_t fault_info, /* OUT */
13501 vm_map_t *real_map, /* OUT */
13502 bool *contended) /* OUT */
13503 {
13504 vm_map_entry_t entry;
13505 vm_map_t map = *var_map;
13506 vm_map_t old_map = *var_map;
13507 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
13508 vm_map_offset_t cow_parent_vaddr = 0;
13509 vm_map_offset_t old_start = 0;
13510 vm_map_offset_t old_end = 0;
13511 vm_prot_t prot;
13512 boolean_t mask_protections;
13513 boolean_t force_copy;
13514 boolean_t no_force_copy_if_executable;
13515 boolean_t submap_needed_copy;
13516 vm_prot_t original_fault_type;
13517 vm_map_size_t fault_page_mask;
13518
13519 /*
13520 * VM_PROT_MASK means that the caller wants us to use "fault_type"
13521 * as a mask against the mapping's actual protections, not as an
13522 * absolute value.
13523 */
13524 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
13525 force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
13526 no_force_copy_if_executable = (fault_type & VM_PROT_COPY_FAIL_IF_EXECUTABLE) ? TRUE : FALSE;
13527 fault_type &= VM_PROT_ALL;
13528 original_fault_type = fault_type;
13529 if (contended) {
13530 *contended = false;
13531 }
13532
13533 *real_map = map;
13534
13535 fault_page_mask = MIN(VM_MAP_PAGE_MASK(map), PAGE_MASK);
13536 vaddr = VM_MAP_TRUNC_PAGE(vaddr, fault_page_mask);
13537
13538 RetryLookup:
13539 fault_type = original_fault_type;
13540
13541 /*
13542 * If the map has an interesting hint, try it before calling
13543 * full blown lookup routine.
13544 */
13545 entry = map->hint;
13546
13547 if ((entry == vm_map_to_entry(map)) ||
13548 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
13549 vm_map_entry_t tmp_entry;
13550
13551 /*
13552 * Entry was either not a valid hint, or the vaddr
13553 * was not contained in the entry, so do a full lookup.
13554 */
13555 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
13556 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13557 vm_map_unlock(cow_sub_map_parent);
13558 }
13559 if ((*real_map != map)
13560 && (*real_map != cow_sub_map_parent)) {
13561 vm_map_unlock(*real_map);
13562 }
13563 return KERN_INVALID_ADDRESS;
13564 }
13565
13566 entry = tmp_entry;
13567 }
13568 if (map == old_map) {
13569 old_start = entry->vme_start;
13570 old_end = entry->vme_end;
13571 }
13572
13573 /*
13574 * Handle submaps. Drop lock on upper map, submap is
13575 * returned locked.
13576 */
13577
13578 submap_needed_copy = FALSE;
13579 submap_recurse:
13580 if (entry->is_sub_map) {
13581 vm_map_offset_t local_vaddr;
13582 vm_map_offset_t end_delta;
13583 vm_map_offset_t start_delta;
13584 vm_map_entry_t submap_entry, saved_submap_entry;
13585 vm_object_offset_t submap_entry_offset;
13586 vm_object_size_t submap_entry_size;
13587 vm_prot_t subentry_protection;
13588 vm_prot_t subentry_max_protection;
13589 boolean_t subentry_no_copy_on_read;
13590 boolean_t subentry_permanent;
13591 boolean_t subentry_pmap_cs_associated;
13592 boolean_t mapped_needs_copy = FALSE;
13593 vm_map_version_t version;
13594
13595 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) >= VM_MAP_PAGE_SHIFT(map),
13596 "map %p (%d) entry %p submap %p (%d)\n",
13597 map, VM_MAP_PAGE_SHIFT(map), entry,
13598 VME_SUBMAP(entry), VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
13599
13600 local_vaddr = vaddr;
13601
13602 if ((entry->use_pmap &&
13603 !((fault_type & VM_PROT_WRITE) ||
13604 force_copy))) {
13605 /* if real_map equals map we unlock below */
13606 if ((*real_map != map) &&
13607 (*real_map != cow_sub_map_parent)) {
13608 vm_map_unlock(*real_map);
13609 }
13610 *real_map = VME_SUBMAP(entry);
13611 }
13612
13613 if (entry->needs_copy &&
13614 ((fault_type & VM_PROT_WRITE) ||
13615 force_copy)) {
13616 if (!mapped_needs_copy) {
13617 if (vm_map_lock_read_to_write(map)) {
13618 vm_map_lock_read(map);
13619 *real_map = map;
13620 goto RetryLookup;
13621 }
13622 vm_map_lock_read(VME_SUBMAP(entry));
13623 *var_map = VME_SUBMAP(entry);
13624 cow_sub_map_parent = map;
13625 /* reset base to map before cow object */
13626 /* this is the map which will accept */
13627 /* the new cow object */
13628 old_start = entry->vme_start;
13629 old_end = entry->vme_end;
13630 cow_parent_vaddr = vaddr;
13631 mapped_needs_copy = TRUE;
13632 } else {
13633 vm_map_lock_read(VME_SUBMAP(entry));
13634 *var_map = VME_SUBMAP(entry);
13635 if ((cow_sub_map_parent != map) &&
13636 (*real_map != map)) {
13637 vm_map_unlock(map);
13638 }
13639 }
13640 } else {
13641 if (entry->needs_copy) {
13642 submap_needed_copy = TRUE;
13643 }
13644 vm_map_lock_read(VME_SUBMAP(entry));
13645 *var_map = VME_SUBMAP(entry);
13646 /* leave map locked if it is a target */
13647 /* cow sub_map above otherwise, just */
13648 /* follow the maps down to the object */
13649 /* here we unlock knowing we are not */
13650 /* revisiting the map. */
13651 if ((*real_map != map) && (map != cow_sub_map_parent)) {
13652 vm_map_unlock_read(map);
13653 }
13654 }
13655
13656 map = *var_map;
13657
13658 /* calculate the offset in the submap for vaddr */
13659 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
13660 assertf(VM_MAP_PAGE_ALIGNED(local_vaddr, fault_page_mask),
13661 "local_vaddr 0x%llx entry->vme_start 0x%llx fault_page_mask 0x%llx\n",
13662 (uint64_t)local_vaddr, (uint64_t)entry->vme_start, (uint64_t)fault_page_mask);
13663
13664 RetrySubMap:
13665 if (!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
13666 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13667 vm_map_unlock(cow_sub_map_parent);
13668 }
13669 if ((*real_map != map)
13670 && (*real_map != cow_sub_map_parent)) {
13671 vm_map_unlock(*real_map);
13672 }
13673 *real_map = map;
13674 return KERN_INVALID_ADDRESS;
13675 }
13676
13677 /* find the attenuated shadow of the underlying object */
13678 /* on our target map */
13679
13680 /* in english the submap object may extend beyond the */
13681 /* region mapped by the entry or, may only fill a portion */
13682 /* of it. For our purposes, we only care if the object */
13683 /* doesn't fill. In this case the area which will */
13684 /* ultimately be clipped in the top map will only need */
13685 /* to be as big as the portion of the underlying entry */
13686 /* which is mapped */
13687 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
13688 submap_entry->vme_start - VME_OFFSET(entry) : 0;
13689
13690 end_delta =
13691 (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
13692 submap_entry->vme_end ?
13693 0 : (VME_OFFSET(entry) +
13694 (old_end - old_start))
13695 - submap_entry->vme_end;
13696
13697 old_start += start_delta;
13698 old_end -= end_delta;
13699
13700 if (submap_entry->is_sub_map) {
13701 entry = submap_entry;
13702 vaddr = local_vaddr;
13703 goto submap_recurse;
13704 }
13705
13706 if (((fault_type & VM_PROT_WRITE) ||
13707 force_copy)
13708 && cow_sub_map_parent) {
13709 vm_object_t sub_object, copy_object;
13710 vm_object_offset_t copy_offset;
13711 vm_map_offset_t local_start;
13712 vm_map_offset_t local_end;
13713 boolean_t object_copied = FALSE;
13714 vm_object_offset_t object_copied_offset = 0;
13715 boolean_t object_copied_needs_copy = FALSE;
13716 kern_return_t kr = KERN_SUCCESS;
13717
13718 if (vm_map_lock_read_to_write(map)) {
13719 vm_map_lock_read(map);
13720 old_start -= start_delta;
13721 old_end += end_delta;
13722 goto RetrySubMap;
13723 }
13724
13725
13726 sub_object = VME_OBJECT(submap_entry);
13727 if (sub_object == VM_OBJECT_NULL) {
13728 sub_object =
13729 vm_object_allocate(
13730 (vm_map_size_t)
13731 (submap_entry->vme_end -
13732 submap_entry->vme_start));
13733 VME_OBJECT_SET(submap_entry, sub_object, false, 0);
13734 VME_OFFSET_SET(submap_entry, 0);
13735 assert(!submap_entry->is_sub_map);
13736 assert(submap_entry->use_pmap);
13737 }
13738 local_start = local_vaddr -
13739 (cow_parent_vaddr - old_start);
13740 local_end = local_vaddr +
13741 (old_end - cow_parent_vaddr);
13742 vm_map_clip_start(map, submap_entry, local_start);
13743 vm_map_clip_end(map, submap_entry, local_end);
13744 if (submap_entry->is_sub_map) {
13745 /* unnesting was done when clipping */
13746 assert(!submap_entry->use_pmap);
13747 }
13748
13749 /* This is the COW case, lets connect */
13750 /* an entry in our space to the underlying */
13751 /* object in the submap, bypassing the */
13752 /* submap. */
13753 submap_entry_offset = VME_OFFSET(submap_entry);
13754 submap_entry_size = submap_entry->vme_end - submap_entry->vme_start;
13755
13756 if ((submap_entry->wired_count != 0 ||
13757 sub_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) &&
13758 (submap_entry->protection & VM_PROT_EXECUTE) &&
13759 no_force_copy_if_executable) {
13760 // printf("FBDP map %p entry %p start 0x%llx end 0x%llx wired %d strat %d\n", map, submap_entry, (uint64_t)local_start, (uint64_t)local_end, submap_entry->wired_count, sub_object->copy_strategy);
13761 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13762 vm_map_unlock(cow_sub_map_parent);
13763 }
13764 if ((*real_map != map)
13765 && (*real_map != cow_sub_map_parent)) {
13766 vm_map_unlock(*real_map);
13767 }
13768 *real_map = map;
13769 ktriage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_SUBMAP_NO_COW_ON_EXECUTABLE), 0 /* arg */);
13770 vm_map_lock_write_to_read(map);
13771 kr = KERN_PROTECTION_FAILURE;
13772 DTRACE_VM4(submap_no_copy_executable,
13773 vm_map_t, map,
13774 vm_object_offset_t, submap_entry_offset,
13775 vm_object_size_t, submap_entry_size,
13776 int, kr);
13777 return kr;
13778 }
13779
13780 if (submap_entry->wired_count != 0) {
13781 vm_object_reference(sub_object);
13782
13783 assertf(VM_MAP_PAGE_ALIGNED(VME_OFFSET(submap_entry), VM_MAP_PAGE_MASK(map)),
13784 "submap_entry %p offset 0x%llx\n",
13785 submap_entry, VME_OFFSET(submap_entry));
13786
13787 DTRACE_VM6(submap_copy_slowly,
13788 vm_map_t, cow_sub_map_parent,
13789 vm_map_offset_t, vaddr,
13790 vm_map_t, map,
13791 vm_object_size_t, submap_entry_size,
13792 int, submap_entry->wired_count,
13793 int, sub_object->copy_strategy);
13794
13795 saved_submap_entry = submap_entry;
13796 version.main_timestamp = map->timestamp;
13797 vm_map_unlock(map); /* Increments timestamp by 1 */
13798 submap_entry = VM_MAP_ENTRY_NULL;
13799
13800 vm_object_lock(sub_object);
13801 kr = vm_object_copy_slowly(sub_object,
13802 submap_entry_offset,
13803 submap_entry_size,
13804 FALSE,
13805 ©_object);
13806 object_copied = TRUE;
13807 object_copied_offset = 0;
13808 /* 4k: account for extra offset in physical page */
13809 object_copied_offset += submap_entry_offset - vm_object_trunc_page(submap_entry_offset);
13810 object_copied_needs_copy = FALSE;
13811 vm_object_deallocate(sub_object);
13812
13813 vm_map_lock(map);
13814
13815 if (kr != KERN_SUCCESS &&
13816 kr != KERN_MEMORY_RESTART_COPY) {
13817 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13818 vm_map_unlock(cow_sub_map_parent);
13819 }
13820 if ((*real_map != map)
13821 && (*real_map != cow_sub_map_parent)) {
13822 vm_map_unlock(*real_map);
13823 }
13824 *real_map = map;
13825 vm_object_deallocate(copy_object);
13826 copy_object = VM_OBJECT_NULL;
13827 ktriage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_SUBMAP_COPY_SLOWLY_FAILED), 0 /* arg */);
13828 vm_map_lock_write_to_read(map);
13829 DTRACE_VM4(submap_copy_error_slowly,
13830 vm_object_t, sub_object,
13831 vm_object_offset_t, submap_entry_offset,
13832 vm_object_size_t, submap_entry_size,
13833 int, kr);
13834 vm_map_lookup_and_lock_object_copy_slowly_error++;
13835 return kr;
13836 }
13837
13838 if ((kr == KERN_SUCCESS) &&
13839 (version.main_timestamp + 1) == map->timestamp) {
13840 submap_entry = saved_submap_entry;
13841 } else {
13842 saved_submap_entry = NULL;
13843 old_start -= start_delta;
13844 old_end += end_delta;
13845 vm_object_deallocate(copy_object);
13846 copy_object = VM_OBJECT_NULL;
13847 vm_map_lock_write_to_read(map);
13848 vm_map_lookup_and_lock_object_copy_slowly_restart++;
13849 goto RetrySubMap;
13850 }
13851 vm_map_lookup_and_lock_object_copy_slowly_count++;
13852 vm_map_lookup_and_lock_object_copy_slowly_size += submap_entry_size;
13853 if (submap_entry_size > vm_map_lookup_and_lock_object_copy_slowly_max) {
13854 vm_map_lookup_and_lock_object_copy_slowly_max = submap_entry_size;
13855 }
13856 } else if (sub_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
13857 submap_entry_offset = VME_OFFSET(submap_entry);
13858 copy_object = VM_OBJECT_NULL;
13859 object_copied_offset = submap_entry_offset;
13860 object_copied_needs_copy = FALSE;
13861 DTRACE_VM6(submap_copy_strategically,
13862 vm_map_t, cow_sub_map_parent,
13863 vm_map_offset_t, vaddr,
13864 vm_map_t, map,
13865 vm_object_size_t, submap_entry_size,
13866 int, submap_entry->wired_count,
13867 int, sub_object->copy_strategy);
13868 kr = vm_object_copy_strategically(
13869 sub_object,
13870 submap_entry_offset,
13871 submap_entry->vme_end - submap_entry->vme_start,
13872 ©_object,
13873 &object_copied_offset,
13874 &object_copied_needs_copy);
13875 if (kr == KERN_MEMORY_RESTART_COPY) {
13876 old_start -= start_delta;
13877 old_end += end_delta;
13878 vm_object_deallocate(copy_object);
13879 copy_object = VM_OBJECT_NULL;
13880 vm_map_lock_write_to_read(map);
13881 vm_map_lookup_and_lock_object_copy_strategically_restart++;
13882 goto RetrySubMap;
13883 }
13884 if (kr != KERN_SUCCESS) {
13885 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13886 vm_map_unlock(cow_sub_map_parent);
13887 }
13888 if ((*real_map != map)
13889 && (*real_map != cow_sub_map_parent)) {
13890 vm_map_unlock(*real_map);
13891 }
13892 *real_map = map;
13893 vm_object_deallocate(copy_object);
13894 copy_object = VM_OBJECT_NULL;
13895 ktriage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_SUBMAP_COPY_STRAT_FAILED), 0 /* arg */);
13896 vm_map_lock_write_to_read(map);
13897 DTRACE_VM4(submap_copy_error_strategically,
13898 vm_object_t, sub_object,
13899 vm_object_offset_t, submap_entry_offset,
13900 vm_object_size_t, submap_entry_size,
13901 int, kr);
13902 vm_map_lookup_and_lock_object_copy_strategically_error++;
13903 return kr;
13904 }
13905 assert(copy_object != VM_OBJECT_NULL);
13906 assert(copy_object != sub_object);
13907 object_copied = TRUE;
13908 vm_map_lookup_and_lock_object_copy_strategically_count++;
13909 vm_map_lookup_and_lock_object_copy_strategically_size += submap_entry_size;
13910 if (submap_entry_size > vm_map_lookup_and_lock_object_copy_strategically_max) {
13911 vm_map_lookup_and_lock_object_copy_strategically_max = submap_entry_size;
13912 }
13913 } else {
13914 /* set up shadow object */
13915 object_copied = FALSE;
13916 copy_object = sub_object;
13917 vm_object_lock(sub_object);
13918 vm_object_reference_locked(sub_object);
13919 sub_object->shadowed = TRUE;
13920 vm_object_unlock(sub_object);
13921
13922 assert(submap_entry->wired_count == 0);
13923 submap_entry->needs_copy = TRUE;
13924
13925 prot = submap_entry->protection;
13926 assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
13927 prot = prot & ~VM_PROT_WRITE;
13928 assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
13929
13930 if (override_nx(old_map,
13931 VME_ALIAS(submap_entry))
13932 && prot) {
13933 prot |= VM_PROT_EXECUTE;
13934 }
13935
13936 vm_object_pmap_protect(
13937 sub_object,
13938 VME_OFFSET(submap_entry),
13939 submap_entry->vme_end -
13940 submap_entry->vme_start,
13941 (submap_entry->is_shared
13942 || map->mapped_in_other_pmaps) ?
13943 PMAP_NULL : map->pmap,
13944 VM_MAP_PAGE_SIZE(map),
13945 submap_entry->vme_start,
13946 prot);
13947 vm_map_lookup_and_lock_object_copy_shadow_count++;
13948 vm_map_lookup_and_lock_object_copy_shadow_size += submap_entry_size;
13949 if (submap_entry_size > vm_map_lookup_and_lock_object_copy_shadow_max) {
13950 vm_map_lookup_and_lock_object_copy_shadow_max = submap_entry_size;
13951 }
13952 }
13953
13954 /*
13955 * Adjust the fault offset to the submap entry.
13956 */
13957 copy_offset = (local_vaddr -
13958 submap_entry->vme_start +
13959 VME_OFFSET(submap_entry));
13960
13961 /* This works diffently than the */
13962 /* normal submap case. We go back */
13963 /* to the parent of the cow map and*/
13964 /* clip out the target portion of */
13965 /* the sub_map, substituting the */
13966 /* new copy object, */
13967
13968 subentry_protection = submap_entry->protection;
13969 subentry_max_protection = submap_entry->max_protection;
13970 subentry_no_copy_on_read = submap_entry->vme_no_copy_on_read;
13971 subentry_permanent = submap_entry->vme_permanent;
13972 subentry_pmap_cs_associated = submap_entry->pmap_cs_associated;
13973
13974 vm_map_unlock(map);
13975 submap_entry = NULL; /* not valid after map unlock */
13976
13977 local_start = old_start;
13978 local_end = old_end;
13979 map = cow_sub_map_parent;
13980 *var_map = cow_sub_map_parent;
13981 vaddr = cow_parent_vaddr;
13982 cow_sub_map_parent = NULL;
13983
13984 if (!vm_map_lookup_entry(map,
13985 vaddr, &entry)) {
13986 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13987 vm_map_unlock(cow_sub_map_parent);
13988 }
13989 if ((*real_map != map)
13990 && (*real_map != cow_sub_map_parent)) {
13991 vm_map_unlock(*real_map);
13992 }
13993 *real_map = map;
13994 vm_object_deallocate(
13995 copy_object);
13996 copy_object = VM_OBJECT_NULL;
13997 vm_map_lock_write_to_read(map);
13998 DTRACE_VM4(submap_lookup_post_unlock,
13999 uint64_t, (uint64_t)entry->vme_start,
14000 uint64_t, (uint64_t)entry->vme_end,
14001 vm_map_offset_t, vaddr,
14002 int, object_copied);
14003 return KERN_INVALID_ADDRESS;
14004 }
14005
14006 /* clip out the portion of space */
14007 /* mapped by the sub map which */
14008 /* corresponds to the underlying */
14009 /* object */
14010
14011 /*
14012 * Clip (and unnest) the smallest nested chunk
14013 * possible around the faulting address...
14014 */
14015 local_start = vaddr & ~(pmap_shared_region_size_min(map->pmap) - 1);
14016 local_end = local_start + pmap_shared_region_size_min(map->pmap);
14017 /*
14018 * ... but don't go beyond the "old_start" to "old_end"
14019 * range, to avoid spanning over another VM region
14020 * with a possibly different VM object and/or offset.
14021 */
14022 if (local_start < old_start) {
14023 local_start = old_start;
14024 }
14025 if (local_end > old_end) {
14026 local_end = old_end;
14027 }
14028 /*
14029 * Adjust copy_offset to the start of the range.
14030 */
14031 copy_offset -= (vaddr - local_start);
14032
14033 vm_map_clip_start(map, entry, local_start);
14034 vm_map_clip_end(map, entry, local_end);
14035 if (entry->is_sub_map) {
14036 /* unnesting was done when clipping */
14037 assert(!entry->use_pmap);
14038 }
14039
14040 /* substitute copy object for */
14041 /* shared map entry */
14042 vm_map_deallocate(VME_SUBMAP(entry));
14043 assert(!entry->iokit_acct);
14044 entry->use_pmap = TRUE;
14045 VME_OBJECT_SET(entry, copy_object, false, 0);
14046
14047 /* propagate the submap entry's protections */
14048 if (entry->protection != VM_PROT_READ) {
14049 /*
14050 * Someone has already altered the top entry's
14051 * protections via vm_protect(VM_PROT_COPY).
14052 * Respect these new values and ignore the
14053 * submap entry's protections.
14054 */
14055 } else {
14056 /*
14057 * Regular copy-on-write: propagate the submap
14058 * entry's protections to the top map entry.
14059 */
14060 entry->protection |= subentry_protection;
14061 }
14062 entry->max_protection |= subentry_max_protection;
14063 /* propagate some attributes from subentry */
14064 entry->vme_no_copy_on_read = subentry_no_copy_on_read;
14065 entry->vme_permanent = subentry_permanent;
14066 entry->pmap_cs_associated = subentry_pmap_cs_associated;
14067
14068 if ((entry->protection & VM_PROT_WRITE) &&
14069 (entry->protection & VM_PROT_EXECUTE) &&
14070 #if XNU_TARGET_OS_OSX
14071 map->pmap != kernel_pmap &&
14072 (vm_map_cs_enforcement(map)
14073 #if __arm64__
14074 || !VM_MAP_IS_EXOTIC(map)
14075 #endif /* __arm64__ */
14076 ) &&
14077 #endif /* XNU_TARGET_OS_OSX */
14078 !(entry->used_for_jit) &&
14079 VM_MAP_POLICY_WX_STRIP_X(map)) {
14080 DTRACE_VM3(cs_wx,
14081 uint64_t, (uint64_t)entry->vme_start,
14082 uint64_t, (uint64_t)entry->vme_end,
14083 vm_prot_t, entry->protection);
14084 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
14085 proc_selfpid(),
14086 (get_bsdtask_info(current_task())
14087 ? proc_name_address(get_bsdtask_info(current_task()))
14088 : "?"),
14089 __FUNCTION__);
14090 entry->protection &= ~VM_PROT_EXECUTE;
14091 }
14092
14093 if (object_copied) {
14094 VME_OFFSET_SET(entry, local_start - old_start + object_copied_offset);
14095 entry->needs_copy = object_copied_needs_copy;
14096 entry->is_shared = FALSE;
14097 } else {
14098 assert(VME_OBJECT(entry) != VM_OBJECT_NULL);
14099 assert(VME_OBJECT(entry)->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
14100 assert(entry->wired_count == 0);
14101 VME_OFFSET_SET(entry, copy_offset);
14102 entry->needs_copy = TRUE;
14103 if (map != old_map) {
14104 entry->is_shared = TRUE;
14105 }
14106 }
14107 if (entry->inheritance == VM_INHERIT_SHARE) {
14108 entry->inheritance = VM_INHERIT_COPY;
14109 }
14110
14111 vm_map_lock_write_to_read(map);
14112 } else {
14113 if ((cow_sub_map_parent)
14114 && (cow_sub_map_parent != *real_map)
14115 && (cow_sub_map_parent != map)) {
14116 vm_map_unlock(cow_sub_map_parent);
14117 }
14118 entry = submap_entry;
14119 vaddr = local_vaddr;
14120 }
14121 }
14122
14123 /*
14124 * Check whether this task is allowed to have
14125 * this page.
14126 */
14127
14128 prot = entry->protection;
14129
14130 if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
14131 /*
14132 * HACK -- if not a stack, then allow execution
14133 */
14134 prot |= VM_PROT_EXECUTE;
14135 }
14136
14137 if (mask_protections) {
14138 fault_type &= prot;
14139 if (fault_type == VM_PROT_NONE) {
14140 goto protection_failure;
14141 }
14142 }
14143 if (((fault_type & prot) != fault_type)
14144 #if __arm64__
14145 /* prefetch abort in execute-only page */
14146 && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
14147 #elif defined(__x86_64__)
14148 /* Consider the UEXEC bit when handling an EXECUTE fault */
14149 && !((fault_type & VM_PROT_EXECUTE) && !(prot & VM_PROT_EXECUTE) && (prot & VM_PROT_UEXEC))
14150 #endif
14151 ) {
14152 protection_failure:
14153 if (*real_map != map) {
14154 vm_map_unlock(*real_map);
14155 }
14156 *real_map = map;
14157
14158 if ((fault_type & VM_PROT_EXECUTE) && prot) {
14159 log_stack_execution_failure((addr64_t)vaddr, prot);
14160 }
14161
14162 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
14163 DTRACE_VM3(prot_fault_detailed, vm_prot_t, fault_type, vm_prot_t, prot, void *, vaddr);
14164 /*
14165 * Noisy (esp. internally) and can be inferred from CrashReports. So OFF for now.
14166 *
14167 * ktriage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_PROTECTION_FAILURE), 0);
14168 */
14169 return KERN_PROTECTION_FAILURE;
14170 }
14171
14172 /*
14173 * If this page is not pageable, we have to get
14174 * it for all possible accesses.
14175 */
14176
14177 *wired = (entry->wired_count != 0);
14178 if (*wired) {
14179 fault_type = prot;
14180 }
14181
14182 /*
14183 * If the entry was copy-on-write, we either ...
14184 */
14185
14186 if (entry->needs_copy) {
14187 /*
14188 * If we want to write the page, we may as well
14189 * handle that now since we've got the map locked.
14190 *
14191 * If we don't need to write the page, we just
14192 * demote the permissions allowed.
14193 */
14194
14195 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
14196 /*
14197 * Make a new object, and place it in the
14198 * object chain. Note that no new references
14199 * have appeared -- one just moved from the
14200 * map to the new object.
14201 */
14202
14203 if (vm_map_lock_read_to_write(map)) {
14204 vm_map_lock_read(map);
14205 goto RetryLookup;
14206 }
14207
14208 if (VME_OBJECT(entry)->shadowed == FALSE) {
14209 vm_object_lock(VME_OBJECT(entry));
14210 VME_OBJECT(entry)->shadowed = TRUE;
14211 vm_object_unlock(VME_OBJECT(entry));
14212 }
14213 VME_OBJECT_SHADOW(entry,
14214 (vm_map_size_t) (entry->vme_end -
14215 entry->vme_start),
14216 vm_map_always_shadow(map));
14217 entry->needs_copy = FALSE;
14218
14219 vm_map_lock_write_to_read(map);
14220 }
14221 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
14222 /*
14223 * We're attempting to read a copy-on-write
14224 * page -- don't allow writes.
14225 */
14226
14227 prot &= (~VM_PROT_WRITE);
14228 }
14229 }
14230
14231 if (submap_needed_copy && (prot & VM_PROT_WRITE)) {
14232 /*
14233 * We went through a "needs_copy" submap without triggering
14234 * a copy, so granting write access to the page would bypass
14235 * that submap's "needs_copy".
14236 */
14237 assert(!(fault_type & VM_PROT_WRITE));
14238 assert(!*wired);
14239 assert(!force_copy);
14240 // printf("FBDP %d[%s] submap_needed_copy for %p 0x%llx\n", proc_selfpid(), proc_name_address(current_task()->bsd_info), map, vaddr);
14241 prot &= ~VM_PROT_WRITE;
14242 }
14243
14244 /*
14245 * Create an object if necessary.
14246 */
14247 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
14248 if (vm_map_lock_read_to_write(map)) {
14249 vm_map_lock_read(map);
14250 goto RetryLookup;
14251 }
14252
14253 VME_OBJECT_SET(entry,
14254 vm_object_allocate(
14255 (vm_map_size_t)(entry->vme_end -
14256 entry->vme_start)), false, 0);
14257 VME_OFFSET_SET(entry, 0);
14258 assert(entry->use_pmap);
14259 vm_map_lock_write_to_read(map);
14260 }
14261
14262 /*
14263 * Return the object/offset from this entry. If the entry
14264 * was copy-on-write or empty, it has been fixed up. Also
14265 * return the protection.
14266 */
14267
14268 *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
14269 *object = VME_OBJECT(entry);
14270 *out_prot = prot;
14271 KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_MAP_LOOKUP_OBJECT), VM_KERNEL_UNSLIDE_OR_PERM(*object), (unsigned long) VME_ALIAS(entry), 0, 0);
14272
14273 if (fault_info) {
14274 fault_info->interruptible = THREAD_UNINT; /* for now... */
14275 /* ... the caller will change "interruptible" if needed */
14276 fault_info->cluster_size = 0;
14277 fault_info->user_tag = VME_ALIAS(entry);
14278 fault_info->pmap_options = 0;
14279 if (entry->iokit_acct ||
14280 (!entry->is_sub_map && !entry->use_pmap)) {
14281 fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
14282 }
14283 fault_info->behavior = entry->behavior;
14284 fault_info->lo_offset = VME_OFFSET(entry);
14285 fault_info->hi_offset =
14286 (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
14287 fault_info->no_cache = entry->no_cache;
14288 fault_info->stealth = FALSE;
14289 fault_info->io_sync = FALSE;
14290 if (entry->used_for_jit ||
14291 entry->vme_resilient_codesign) {
14292 fault_info->cs_bypass = TRUE;
14293 } else {
14294 fault_info->cs_bypass = FALSE;
14295 }
14296 fault_info->pmap_cs_associated = FALSE;
14297 #if CONFIG_PMAP_CS
14298 if (entry->pmap_cs_associated) {
14299 /*
14300 * The pmap layer will validate this page
14301 * before allowing it to be executed from.
14302 */
14303 fault_info->pmap_cs_associated = TRUE;
14304 }
14305 #endif /* CONFIG_PMAP_CS */
14306 fault_info->mark_zf_absent = FALSE;
14307 fault_info->batch_pmap_op = FALSE;
14308 fault_info->resilient_media = entry->vme_resilient_media;
14309 fault_info->no_copy_on_read = entry->vme_no_copy_on_read;
14310 if (entry->translated_allow_execute) {
14311 fault_info->pmap_options |= PMAP_OPTIONS_TRANSLATED_ALLOW_EXECUTE;
14312 }
14313 }
14314
14315 /*
14316 * Lock the object to prevent it from disappearing
14317 */
14318 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE) {
14319 if (contended == NULL) {
14320 vm_object_lock(*object);
14321 } else {
14322 *contended = vm_object_lock_check_contended(*object);
14323 }
14324 } else {
14325 vm_object_lock_shared(*object);
14326 }
14327
14328 /*
14329 * Save the version number
14330 */
14331
14332 out_version->main_timestamp = map->timestamp;
14333
14334 return KERN_SUCCESS;
14335 }
14336
14337
14338 /*
14339 * vm_map_verify:
14340 *
14341 * Verifies that the map in question has not changed
14342 * since the given version. The map has to be locked
14343 * ("shared" mode is fine) before calling this function
14344 * and it will be returned locked too.
14345 */
14346 boolean_t
vm_map_verify(vm_map_t map,vm_map_version_t * version)14347 vm_map_verify(
14348 vm_map_t map,
14349 vm_map_version_t *version) /* REF */
14350 {
14351 boolean_t result;
14352
14353 vm_map_lock_assert_held(map);
14354 result = (map->timestamp == version->main_timestamp);
14355
14356 return result;
14357 }
14358
14359 /*
14360 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
14361 * Goes away after regular vm_region_recurse function migrates to
14362 * 64 bits
14363 * vm_region_recurse: A form of vm_region which follows the
14364 * submaps in a target map
14365 *
14366 */
14367
14368 kern_return_t
vm_map_region_recurse_64(vm_map_t map,vm_map_offset_t * address,vm_map_size_t * size,natural_t * nesting_depth,vm_region_submap_info_64_t submap_info,mach_msg_type_number_t * count)14369 vm_map_region_recurse_64(
14370 vm_map_t map,
14371 vm_map_offset_t *address, /* IN/OUT */
14372 vm_map_size_t *size, /* OUT */
14373 natural_t *nesting_depth, /* IN/OUT */
14374 vm_region_submap_info_64_t submap_info, /* IN/OUT */
14375 mach_msg_type_number_t *count) /* IN/OUT */
14376 {
14377 mach_msg_type_number_t original_count;
14378 vm_region_extended_info_data_t extended;
14379 vm_map_entry_t tmp_entry;
14380 vm_map_offset_t user_address;
14381 unsigned int user_max_depth;
14382
14383 /*
14384 * "curr_entry" is the VM map entry preceding or including the
14385 * address we're looking for.
14386 * "curr_map" is the map or sub-map containing "curr_entry".
14387 * "curr_address" is the equivalent of the top map's "user_address"
14388 * in the current map.
14389 * "curr_offset" is the cumulated offset of "curr_map" in the
14390 * target task's address space.
14391 * "curr_depth" is the depth of "curr_map" in the chain of
14392 * sub-maps.
14393 *
14394 * "curr_max_below" and "curr_max_above" limit the range (around
14395 * "curr_address") we should take into account in the current (sub)map.
14396 * They limit the range to what's visible through the map entries
14397 * we've traversed from the top map to the current map.
14398 *
14399 */
14400 vm_map_entry_t curr_entry;
14401 vm_map_address_t curr_address;
14402 vm_map_offset_t curr_offset;
14403 vm_map_t curr_map;
14404 unsigned int curr_depth;
14405 vm_map_offset_t curr_max_below, curr_max_above;
14406 vm_map_offset_t curr_skip;
14407
14408 /*
14409 * "next_" is the same as "curr_" but for the VM region immediately
14410 * after the address we're looking for. We need to keep track of this
14411 * too because we want to return info about that region if the
14412 * address we're looking for is not mapped.
14413 */
14414 vm_map_entry_t next_entry;
14415 vm_map_offset_t next_offset;
14416 vm_map_offset_t next_address;
14417 vm_map_t next_map;
14418 unsigned int next_depth;
14419 vm_map_offset_t next_max_below, next_max_above;
14420 vm_map_offset_t next_skip;
14421
14422 boolean_t look_for_pages;
14423 vm_region_submap_short_info_64_t short_info;
14424 boolean_t do_region_footprint;
14425 int effective_page_size, effective_page_shift;
14426 boolean_t submap_needed_copy;
14427
14428 if (map == VM_MAP_NULL) {
14429 /* no address space to work on */
14430 return KERN_INVALID_ARGUMENT;
14431 }
14432
14433 effective_page_shift = vm_self_region_page_shift(map);
14434 effective_page_size = (1 << effective_page_shift);
14435
14436 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
14437 /*
14438 * "info" structure is not big enough and
14439 * would overflow
14440 */
14441 return KERN_INVALID_ARGUMENT;
14442 }
14443
14444 do_region_footprint = task_self_region_footprint();
14445 original_count = *count;
14446
14447 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
14448 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
14449 look_for_pages = FALSE;
14450 short_info = (vm_region_submap_short_info_64_t) submap_info;
14451 submap_info = NULL;
14452 } else {
14453 look_for_pages = TRUE;
14454 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
14455 short_info = NULL;
14456
14457 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
14458 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
14459 }
14460 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
14461 *count = VM_REGION_SUBMAP_INFO_V2_COUNT_64;
14462 }
14463 }
14464
14465 user_address = *address;
14466 user_max_depth = *nesting_depth;
14467 submap_needed_copy = FALSE;
14468
14469 if (not_in_kdp) {
14470 vm_map_lock_read(map);
14471 }
14472
14473 recurse_again:
14474 curr_entry = NULL;
14475 curr_map = map;
14476 curr_address = user_address;
14477 curr_offset = 0;
14478 curr_skip = 0;
14479 curr_depth = 0;
14480 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
14481 curr_max_below = curr_address;
14482
14483 next_entry = NULL;
14484 next_map = NULL;
14485 next_address = 0;
14486 next_offset = 0;
14487 next_skip = 0;
14488 next_depth = 0;
14489 next_max_above = (vm_map_offset_t) -1;
14490 next_max_below = (vm_map_offset_t) -1;
14491
14492 for (;;) {
14493 if (vm_map_lookup_entry(curr_map,
14494 curr_address,
14495 &tmp_entry)) {
14496 /* tmp_entry contains the address we're looking for */
14497 curr_entry = tmp_entry;
14498 } else {
14499 vm_map_offset_t skip;
14500 /*
14501 * The address is not mapped. "tmp_entry" is the
14502 * map entry preceding the address. We want the next
14503 * one, if it exists.
14504 */
14505 curr_entry = tmp_entry->vme_next;
14506
14507 if (curr_entry == vm_map_to_entry(curr_map) ||
14508 (curr_entry->vme_start >=
14509 curr_address + curr_max_above)) {
14510 /* no next entry at this level: stop looking */
14511 if (not_in_kdp) {
14512 vm_map_unlock_read(curr_map);
14513 }
14514 curr_entry = NULL;
14515 curr_map = NULL;
14516 curr_skip = 0;
14517 curr_offset = 0;
14518 curr_depth = 0;
14519 curr_max_above = 0;
14520 curr_max_below = 0;
14521 break;
14522 }
14523
14524 /* adjust current address and offset */
14525 skip = curr_entry->vme_start - curr_address;
14526 curr_address = curr_entry->vme_start;
14527 curr_skip += skip;
14528 curr_offset += skip;
14529 curr_max_above -= skip;
14530 curr_max_below = 0;
14531 }
14532
14533 /*
14534 * Is the next entry at this level closer to the address (or
14535 * deeper in the submap chain) than the one we had
14536 * so far ?
14537 */
14538 tmp_entry = curr_entry->vme_next;
14539 if (tmp_entry == vm_map_to_entry(curr_map)) {
14540 /* no next entry at this level */
14541 } else if (tmp_entry->vme_start >=
14542 curr_address + curr_max_above) {
14543 /*
14544 * tmp_entry is beyond the scope of what we mapped of
14545 * this submap in the upper level: ignore it.
14546 */
14547 } else if ((next_entry == NULL) ||
14548 (tmp_entry->vme_start + curr_offset <=
14549 next_entry->vme_start + next_offset)) {
14550 /*
14551 * We didn't have a "next_entry" or this one is
14552 * closer to the address we're looking for:
14553 * use this "tmp_entry" as the new "next_entry".
14554 */
14555 if (next_entry != NULL) {
14556 /* unlock the last "next_map" */
14557 if (next_map != curr_map && not_in_kdp) {
14558 vm_map_unlock_read(next_map);
14559 }
14560 }
14561 next_entry = tmp_entry;
14562 next_map = curr_map;
14563 next_depth = curr_depth;
14564 next_address = next_entry->vme_start;
14565 next_skip = curr_skip;
14566 next_skip += (next_address - curr_address);
14567 next_offset = curr_offset;
14568 next_offset += (next_address - curr_address);
14569 next_max_above = MIN(next_max_above, curr_max_above);
14570 next_max_above = MIN(next_max_above,
14571 next_entry->vme_end - next_address);
14572 next_max_below = MIN(next_max_below, curr_max_below);
14573 next_max_below = MIN(next_max_below,
14574 next_address - next_entry->vme_start);
14575 }
14576
14577 /*
14578 * "curr_max_{above,below}" allow us to keep track of the
14579 * portion of the submap that is actually mapped at this level:
14580 * the rest of that submap is irrelevant to us, since it's not
14581 * mapped here.
14582 * The relevant portion of the map starts at
14583 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
14584 */
14585 curr_max_above = MIN(curr_max_above,
14586 curr_entry->vme_end - curr_address);
14587 curr_max_below = MIN(curr_max_below,
14588 curr_address - curr_entry->vme_start);
14589
14590 if (!curr_entry->is_sub_map ||
14591 curr_depth >= user_max_depth) {
14592 /*
14593 * We hit a leaf map or we reached the maximum depth
14594 * we could, so stop looking. Keep the current map
14595 * locked.
14596 */
14597 break;
14598 }
14599
14600 /*
14601 * Get down to the next submap level.
14602 */
14603
14604 if (curr_entry->needs_copy) {
14605 /* everything below this is effectively copy-on-write */
14606 submap_needed_copy = TRUE;
14607 }
14608
14609 /*
14610 * Lock the next level and unlock the current level,
14611 * unless we need to keep it locked to access the "next_entry"
14612 * later.
14613 */
14614 if (not_in_kdp) {
14615 vm_map_lock_read(VME_SUBMAP(curr_entry));
14616 }
14617 if (curr_map == next_map) {
14618 /* keep "next_map" locked in case we need it */
14619 } else {
14620 /* release this map */
14621 if (not_in_kdp) {
14622 vm_map_unlock_read(curr_map);
14623 }
14624 }
14625
14626 /*
14627 * Adjust the offset. "curr_entry" maps the submap
14628 * at relative address "curr_entry->vme_start" in the
14629 * curr_map but skips the first "VME_OFFSET(curr_entry)"
14630 * bytes of the submap.
14631 * "curr_offset" always represents the offset of a virtual
14632 * address in the curr_map relative to the absolute address
14633 * space (i.e. the top-level VM map).
14634 */
14635 curr_offset +=
14636 (VME_OFFSET(curr_entry) - curr_entry->vme_start);
14637 curr_address = user_address + curr_offset;
14638 /* switch to the submap */
14639 curr_map = VME_SUBMAP(curr_entry);
14640 curr_depth++;
14641 curr_entry = NULL;
14642 }
14643
14644 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
14645 // so probably should be a real 32b ID vs. ptr.
14646 // Current users just check for equality
14647
14648 if (curr_entry == NULL) {
14649 /* no VM region contains the address... */
14650
14651 if (do_region_footprint && /* we want footprint numbers */
14652 next_entry == NULL && /* & there are no more regions */
14653 /* & we haven't already provided our fake region: */
14654 user_address <= vm_map_last_entry(map)->vme_end) {
14655 ledger_amount_t ledger_resident, ledger_compressed;
14656
14657 /*
14658 * Add a fake memory region to account for
14659 * purgeable and/or ledger-tagged memory that
14660 * counts towards this task's memory footprint,
14661 * i.e. the resident/compressed pages of non-volatile
14662 * objects owned by that task.
14663 */
14664 task_ledgers_footprint(map->pmap->ledger,
14665 &ledger_resident,
14666 &ledger_compressed);
14667 if (ledger_resident + ledger_compressed == 0) {
14668 /* no purgeable memory usage to report */
14669 return KERN_INVALID_ADDRESS;
14670 }
14671 /* fake region to show nonvolatile footprint */
14672 if (look_for_pages) {
14673 submap_info->protection = VM_PROT_DEFAULT;
14674 submap_info->max_protection = VM_PROT_DEFAULT;
14675 submap_info->inheritance = VM_INHERIT_DEFAULT;
14676 submap_info->offset = 0;
14677 submap_info->user_tag = -1;
14678 submap_info->pages_resident = (unsigned int) (ledger_resident / effective_page_size);
14679 submap_info->pages_shared_now_private = 0;
14680 submap_info->pages_swapped_out = (unsigned int) (ledger_compressed / effective_page_size);
14681 submap_info->pages_dirtied = submap_info->pages_resident;
14682 submap_info->ref_count = 1;
14683 submap_info->shadow_depth = 0;
14684 submap_info->external_pager = 0;
14685 submap_info->share_mode = SM_PRIVATE;
14686 if (submap_needed_copy) {
14687 submap_info->share_mode = SM_COW;
14688 }
14689 submap_info->is_submap = 0;
14690 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
14691 submap_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
14692 submap_info->user_wired_count = 0;
14693 submap_info->pages_reusable = 0;
14694 } else {
14695 short_info->user_tag = -1;
14696 short_info->offset = 0;
14697 short_info->protection = VM_PROT_DEFAULT;
14698 short_info->inheritance = VM_INHERIT_DEFAULT;
14699 short_info->max_protection = VM_PROT_DEFAULT;
14700 short_info->behavior = VM_BEHAVIOR_DEFAULT;
14701 short_info->user_wired_count = 0;
14702 short_info->is_submap = 0;
14703 short_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
14704 short_info->external_pager = 0;
14705 short_info->shadow_depth = 0;
14706 short_info->share_mode = SM_PRIVATE;
14707 if (submap_needed_copy) {
14708 short_info->share_mode = SM_COW;
14709 }
14710 short_info->ref_count = 1;
14711 }
14712 *nesting_depth = 0;
14713 *size = (vm_map_size_t) (ledger_resident + ledger_compressed);
14714 // *address = user_address;
14715 *address = vm_map_last_entry(map)->vme_end;
14716 return KERN_SUCCESS;
14717 }
14718
14719 if (next_entry == NULL) {
14720 /* ... and no VM region follows it either */
14721 return KERN_INVALID_ADDRESS;
14722 }
14723 /* ... gather info about the next VM region */
14724 curr_entry = next_entry;
14725 curr_map = next_map; /* still locked ... */
14726 curr_address = next_address;
14727 curr_skip = next_skip;
14728 curr_offset = next_offset;
14729 curr_depth = next_depth;
14730 curr_max_above = next_max_above;
14731 curr_max_below = next_max_below;
14732 } else {
14733 /* we won't need "next_entry" after all */
14734 if (next_entry != NULL) {
14735 /* release "next_map" */
14736 if (next_map != curr_map && not_in_kdp) {
14737 vm_map_unlock_read(next_map);
14738 }
14739 }
14740 }
14741 next_entry = NULL;
14742 next_map = NULL;
14743 next_offset = 0;
14744 next_skip = 0;
14745 next_depth = 0;
14746 next_max_below = -1;
14747 next_max_above = -1;
14748
14749 if (curr_entry->is_sub_map &&
14750 curr_depth < user_max_depth) {
14751 /*
14752 * We're not as deep as we could be: we must have
14753 * gone back up after not finding anything mapped
14754 * below the original top-level map entry's.
14755 * Let's move "curr_address" forward and recurse again.
14756 */
14757 user_address = curr_address;
14758 goto recurse_again;
14759 }
14760
14761 *nesting_depth = curr_depth;
14762 *size = curr_max_above + curr_max_below;
14763 *address = user_address + curr_skip - curr_max_below;
14764
14765 if (look_for_pages) {
14766 submap_info->user_tag = VME_ALIAS(curr_entry);
14767 submap_info->offset = VME_OFFSET(curr_entry);
14768 submap_info->protection = curr_entry->protection;
14769 submap_info->inheritance = curr_entry->inheritance;
14770 submap_info->max_protection = curr_entry->max_protection;
14771 submap_info->behavior = curr_entry->behavior;
14772 submap_info->user_wired_count = curr_entry->user_wired_count;
14773 submap_info->is_submap = curr_entry->is_sub_map;
14774 if (curr_entry->is_sub_map) {
14775 submap_info->object_id = VM_OBJECT_ID(VME_SUBMAP(curr_entry));
14776 } else {
14777 submap_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
14778 }
14779 } else {
14780 short_info->user_tag = VME_ALIAS(curr_entry);
14781 short_info->offset = VME_OFFSET(curr_entry);
14782 short_info->protection = curr_entry->protection;
14783 short_info->inheritance = curr_entry->inheritance;
14784 short_info->max_protection = curr_entry->max_protection;
14785 short_info->behavior = curr_entry->behavior;
14786 short_info->user_wired_count = curr_entry->user_wired_count;
14787 short_info->is_submap = curr_entry->is_sub_map;
14788 if (curr_entry->is_sub_map) {
14789 short_info->object_id = VM_OBJECT_ID(VME_SUBMAP(curr_entry));
14790 } else {
14791 short_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
14792 }
14793 }
14794
14795 extended.pages_resident = 0;
14796 extended.pages_swapped_out = 0;
14797 extended.pages_shared_now_private = 0;
14798 extended.pages_dirtied = 0;
14799 extended.pages_reusable = 0;
14800 extended.external_pager = 0;
14801 extended.shadow_depth = 0;
14802 extended.share_mode = SM_EMPTY;
14803 extended.ref_count = 0;
14804
14805 if (not_in_kdp) {
14806 if (!curr_entry->is_sub_map) {
14807 vm_map_offset_t range_start, range_end;
14808 range_start = MAX((curr_address - curr_max_below),
14809 curr_entry->vme_start);
14810 range_end = MIN((curr_address + curr_max_above),
14811 curr_entry->vme_end);
14812 vm_map_region_walk(curr_map,
14813 range_start,
14814 curr_entry,
14815 (VME_OFFSET(curr_entry) +
14816 (range_start -
14817 curr_entry->vme_start)),
14818 range_end - range_start,
14819 &extended,
14820 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
14821 if (extended.external_pager &&
14822 extended.ref_count == 2 &&
14823 extended.share_mode == SM_SHARED) {
14824 extended.share_mode = SM_PRIVATE;
14825 }
14826 if (submap_needed_copy) {
14827 extended.share_mode = SM_COW;
14828 }
14829 } else {
14830 if (curr_entry->use_pmap) {
14831 extended.share_mode = SM_TRUESHARED;
14832 } else {
14833 extended.share_mode = SM_PRIVATE;
14834 }
14835 extended.ref_count = os_ref_get_count_raw(&VME_SUBMAP(curr_entry)->map_refcnt);
14836 }
14837 }
14838
14839 if (look_for_pages) {
14840 submap_info->pages_resident = extended.pages_resident;
14841 submap_info->pages_swapped_out = extended.pages_swapped_out;
14842 submap_info->pages_shared_now_private =
14843 extended.pages_shared_now_private;
14844 submap_info->pages_dirtied = extended.pages_dirtied;
14845 submap_info->external_pager = extended.external_pager;
14846 submap_info->shadow_depth = extended.shadow_depth;
14847 submap_info->share_mode = extended.share_mode;
14848 submap_info->ref_count = extended.ref_count;
14849
14850 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
14851 submap_info->pages_reusable = extended.pages_reusable;
14852 }
14853 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
14854 if (curr_entry->is_sub_map) {
14855 submap_info->object_id_full = (vm_object_id_t)VM_KERNEL_ADDRPERM(VME_SUBMAP(curr_entry));
14856 } else if (VME_OBJECT(curr_entry)) {
14857 submap_info->object_id_full = (vm_object_id_t)VM_KERNEL_ADDRPERM(VME_OBJECT(curr_entry));
14858 } else {
14859 submap_info->object_id_full = 0ull;
14860 }
14861 }
14862 } else {
14863 short_info->external_pager = extended.external_pager;
14864 short_info->shadow_depth = extended.shadow_depth;
14865 short_info->share_mode = extended.share_mode;
14866 short_info->ref_count = extended.ref_count;
14867 }
14868
14869 if (not_in_kdp) {
14870 vm_map_unlock_read(curr_map);
14871 }
14872
14873 return KERN_SUCCESS;
14874 }
14875
14876 /*
14877 * vm_region:
14878 *
14879 * User call to obtain information about a region in
14880 * a task's address map. Currently, only one flavor is
14881 * supported.
14882 *
14883 * XXX The reserved and behavior fields cannot be filled
14884 * in until the vm merge from the IK is completed, and
14885 * vm_reserve is implemented.
14886 */
14887
14888 kern_return_t
vm_map_region(vm_map_t map,vm_map_offset_t * address,vm_map_size_t * size,vm_region_flavor_t flavor,vm_region_info_t info,mach_msg_type_number_t * count,mach_port_t * object_name)14889 vm_map_region(
14890 vm_map_t map,
14891 vm_map_offset_t *address, /* IN/OUT */
14892 vm_map_size_t *size, /* OUT */
14893 vm_region_flavor_t flavor, /* IN */
14894 vm_region_info_t info, /* OUT */
14895 mach_msg_type_number_t *count, /* IN/OUT */
14896 mach_port_t *object_name) /* OUT */
14897 {
14898 vm_map_entry_t tmp_entry;
14899 vm_map_entry_t entry;
14900 vm_map_offset_t start;
14901
14902 if (map == VM_MAP_NULL) {
14903 return KERN_INVALID_ARGUMENT;
14904 }
14905
14906 switch (flavor) {
14907 case VM_REGION_BASIC_INFO:
14908 /* legacy for old 32-bit objects info */
14909 {
14910 vm_region_basic_info_t basic;
14911
14912 if (*count < VM_REGION_BASIC_INFO_COUNT) {
14913 return KERN_INVALID_ARGUMENT;
14914 }
14915
14916 basic = (vm_region_basic_info_t) info;
14917 *count = VM_REGION_BASIC_INFO_COUNT;
14918
14919 vm_map_lock_read(map);
14920
14921 start = *address;
14922 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14923 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14924 vm_map_unlock_read(map);
14925 return KERN_INVALID_ADDRESS;
14926 }
14927 } else {
14928 entry = tmp_entry;
14929 }
14930
14931 start = entry->vme_start;
14932
14933 basic->offset = (uint32_t)VME_OFFSET(entry);
14934 basic->protection = entry->protection;
14935 basic->inheritance = entry->inheritance;
14936 basic->max_protection = entry->max_protection;
14937 basic->behavior = entry->behavior;
14938 basic->user_wired_count = entry->user_wired_count;
14939 basic->reserved = entry->is_sub_map;
14940 *address = start;
14941 *size = (entry->vme_end - start);
14942
14943 if (object_name) {
14944 *object_name = IP_NULL;
14945 }
14946 if (entry->is_sub_map) {
14947 basic->shared = FALSE;
14948 } else {
14949 basic->shared = entry->is_shared;
14950 }
14951
14952 vm_map_unlock_read(map);
14953 return KERN_SUCCESS;
14954 }
14955
14956 case VM_REGION_BASIC_INFO_64:
14957 {
14958 vm_region_basic_info_64_t basic;
14959
14960 if (*count < VM_REGION_BASIC_INFO_COUNT_64) {
14961 return KERN_INVALID_ARGUMENT;
14962 }
14963
14964 basic = (vm_region_basic_info_64_t) info;
14965 *count = VM_REGION_BASIC_INFO_COUNT_64;
14966
14967 vm_map_lock_read(map);
14968
14969 start = *address;
14970 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14971 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14972 vm_map_unlock_read(map);
14973 return KERN_INVALID_ADDRESS;
14974 }
14975 } else {
14976 entry = tmp_entry;
14977 }
14978
14979 start = entry->vme_start;
14980
14981 basic->offset = VME_OFFSET(entry);
14982 basic->protection = entry->protection;
14983 basic->inheritance = entry->inheritance;
14984 basic->max_protection = entry->max_protection;
14985 basic->behavior = entry->behavior;
14986 basic->user_wired_count = entry->user_wired_count;
14987 basic->reserved = entry->is_sub_map;
14988 *address = start;
14989 *size = (entry->vme_end - start);
14990
14991 if (object_name) {
14992 *object_name = IP_NULL;
14993 }
14994 if (entry->is_sub_map) {
14995 basic->shared = FALSE;
14996 } else {
14997 basic->shared = entry->is_shared;
14998 }
14999
15000 vm_map_unlock_read(map);
15001 return KERN_SUCCESS;
15002 }
15003 case VM_REGION_EXTENDED_INFO:
15004 if (*count < VM_REGION_EXTENDED_INFO_COUNT) {
15005 return KERN_INVALID_ARGUMENT;
15006 }
15007 OS_FALLTHROUGH;
15008 case VM_REGION_EXTENDED_INFO__legacy:
15009 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy) {
15010 return KERN_INVALID_ARGUMENT;
15011 }
15012
15013 {
15014 vm_region_extended_info_t extended;
15015 mach_msg_type_number_t original_count;
15016 int effective_page_size, effective_page_shift;
15017
15018 extended = (vm_region_extended_info_t) info;
15019
15020 effective_page_shift = vm_self_region_page_shift(map);
15021 effective_page_size = (1 << effective_page_shift);
15022
15023 vm_map_lock_read(map);
15024
15025 start = *address;
15026 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
15027 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
15028 vm_map_unlock_read(map);
15029 return KERN_INVALID_ADDRESS;
15030 }
15031 } else {
15032 entry = tmp_entry;
15033 }
15034 start = entry->vme_start;
15035
15036 extended->protection = entry->protection;
15037 extended->user_tag = VME_ALIAS(entry);
15038 extended->pages_resident = 0;
15039 extended->pages_swapped_out = 0;
15040 extended->pages_shared_now_private = 0;
15041 extended->pages_dirtied = 0;
15042 extended->external_pager = 0;
15043 extended->shadow_depth = 0;
15044
15045 original_count = *count;
15046 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
15047 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
15048 } else {
15049 extended->pages_reusable = 0;
15050 *count = VM_REGION_EXTENDED_INFO_COUNT;
15051 }
15052
15053 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
15054
15055 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) {
15056 extended->share_mode = SM_PRIVATE;
15057 }
15058
15059 if (object_name) {
15060 *object_name = IP_NULL;
15061 }
15062 *address = start;
15063 *size = (entry->vme_end - start);
15064
15065 vm_map_unlock_read(map);
15066 return KERN_SUCCESS;
15067 }
15068 case VM_REGION_TOP_INFO:
15069 {
15070 vm_region_top_info_t top;
15071
15072 if (*count < VM_REGION_TOP_INFO_COUNT) {
15073 return KERN_INVALID_ARGUMENT;
15074 }
15075
15076 top = (vm_region_top_info_t) info;
15077 *count = VM_REGION_TOP_INFO_COUNT;
15078
15079 vm_map_lock_read(map);
15080
15081 start = *address;
15082 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
15083 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
15084 vm_map_unlock_read(map);
15085 return KERN_INVALID_ADDRESS;
15086 }
15087 } else {
15088 entry = tmp_entry;
15089 }
15090 start = entry->vme_start;
15091
15092 top->private_pages_resident = 0;
15093 top->shared_pages_resident = 0;
15094
15095 vm_map_region_top_walk(entry, top);
15096
15097 if (object_name) {
15098 *object_name = IP_NULL;
15099 }
15100 *address = start;
15101 *size = (entry->vme_end - start);
15102
15103 vm_map_unlock_read(map);
15104 return KERN_SUCCESS;
15105 }
15106 default:
15107 return KERN_INVALID_ARGUMENT;
15108 }
15109 }
15110
15111 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
15112 MIN((entry_size), \
15113 ((obj)->all_reusable ? \
15114 (obj)->wired_page_count : \
15115 (obj)->resident_page_count - (obj)->reusable_page_count))
15116
15117 void
vm_map_region_top_walk(vm_map_entry_t entry,vm_region_top_info_t top)15118 vm_map_region_top_walk(
15119 vm_map_entry_t entry,
15120 vm_region_top_info_t top)
15121 {
15122 if (entry->is_sub_map || VME_OBJECT(entry) == 0) {
15123 top->share_mode = SM_EMPTY;
15124 top->ref_count = 0;
15125 top->obj_id = 0;
15126 return;
15127 }
15128
15129 {
15130 struct vm_object *obj, *tmp_obj;
15131 int ref_count;
15132 uint32_t entry_size;
15133
15134 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
15135
15136 obj = VME_OBJECT(entry);
15137
15138 vm_object_lock(obj);
15139
15140 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
15141 ref_count--;
15142 }
15143
15144 assert(obj->reusable_page_count <= obj->resident_page_count);
15145 if (obj->shadow) {
15146 if (ref_count == 1) {
15147 top->private_pages_resident =
15148 OBJ_RESIDENT_COUNT(obj, entry_size);
15149 } else {
15150 top->shared_pages_resident =
15151 OBJ_RESIDENT_COUNT(obj, entry_size);
15152 }
15153 top->ref_count = ref_count;
15154 top->share_mode = SM_COW;
15155
15156 while ((tmp_obj = obj->shadow)) {
15157 vm_object_lock(tmp_obj);
15158 vm_object_unlock(obj);
15159 obj = tmp_obj;
15160
15161 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
15162 ref_count--;
15163 }
15164
15165 assert(obj->reusable_page_count <= obj->resident_page_count);
15166 top->shared_pages_resident +=
15167 OBJ_RESIDENT_COUNT(obj, entry_size);
15168 top->ref_count += ref_count - 1;
15169 }
15170 } else {
15171 if (entry->superpage_size) {
15172 top->share_mode = SM_LARGE_PAGE;
15173 top->shared_pages_resident = 0;
15174 top->private_pages_resident = entry_size;
15175 } else if (entry->needs_copy) {
15176 top->share_mode = SM_COW;
15177 top->shared_pages_resident =
15178 OBJ_RESIDENT_COUNT(obj, entry_size);
15179 } else {
15180 if (ref_count == 1 ||
15181 (ref_count == 2 && obj->named)) {
15182 top->share_mode = SM_PRIVATE;
15183 top->private_pages_resident =
15184 OBJ_RESIDENT_COUNT(obj,
15185 entry_size);
15186 } else {
15187 top->share_mode = SM_SHARED;
15188 top->shared_pages_resident =
15189 OBJ_RESIDENT_COUNT(obj,
15190 entry_size);
15191 }
15192 }
15193 top->ref_count = ref_count;
15194 }
15195 /* XXX K64: obj_id will be truncated */
15196 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
15197
15198 vm_object_unlock(obj);
15199 }
15200 }
15201
15202 void
vm_map_region_walk(vm_map_t map,vm_map_offset_t va,vm_map_entry_t entry,vm_object_offset_t offset,vm_object_size_t range,vm_region_extended_info_t extended,boolean_t look_for_pages,mach_msg_type_number_t count)15203 vm_map_region_walk(
15204 vm_map_t map,
15205 vm_map_offset_t va,
15206 vm_map_entry_t entry,
15207 vm_object_offset_t offset,
15208 vm_object_size_t range,
15209 vm_region_extended_info_t extended,
15210 boolean_t look_for_pages,
15211 mach_msg_type_number_t count)
15212 {
15213 struct vm_object *obj, *tmp_obj;
15214 vm_map_offset_t last_offset;
15215 int i;
15216 int ref_count;
15217 struct vm_object *shadow_object;
15218 unsigned short shadow_depth;
15219 boolean_t do_region_footprint;
15220 int effective_page_size, effective_page_shift;
15221 vm_map_offset_t effective_page_mask;
15222
15223 do_region_footprint = task_self_region_footprint();
15224
15225 if ((entry->is_sub_map) ||
15226 (VME_OBJECT(entry) == 0) ||
15227 (VME_OBJECT(entry)->phys_contiguous &&
15228 !entry->superpage_size)) {
15229 extended->share_mode = SM_EMPTY;
15230 extended->ref_count = 0;
15231 return;
15232 }
15233
15234 if (entry->superpage_size) {
15235 extended->shadow_depth = 0;
15236 extended->share_mode = SM_LARGE_PAGE;
15237 extended->ref_count = 1;
15238 extended->external_pager = 0;
15239
15240 /* TODO4K: Superpage in 4k mode? */
15241 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
15242 extended->shadow_depth = 0;
15243 return;
15244 }
15245
15246 effective_page_shift = vm_self_region_page_shift(map);
15247 effective_page_size = (1 << effective_page_shift);
15248 effective_page_mask = effective_page_size - 1;
15249
15250 offset = vm_map_trunc_page(offset, effective_page_mask);
15251
15252 obj = VME_OBJECT(entry);
15253
15254 vm_object_lock(obj);
15255
15256 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
15257 ref_count--;
15258 }
15259
15260 if (look_for_pages) {
15261 for (last_offset = offset + range;
15262 offset < last_offset;
15263 offset += effective_page_size, va += effective_page_size) {
15264 if (do_region_footprint) {
15265 int disp;
15266
15267 disp = 0;
15268 if (map->has_corpse_footprint) {
15269 /*
15270 * Query the page info data we saved
15271 * while forking the corpse.
15272 */
15273 vm_map_corpse_footprint_query_page_info(
15274 map,
15275 va,
15276 &disp);
15277 } else {
15278 /*
15279 * Query the pmap.
15280 */
15281 vm_map_footprint_query_page_info(
15282 map,
15283 entry,
15284 va,
15285 &disp);
15286 }
15287 if (disp & VM_PAGE_QUERY_PAGE_PRESENT) {
15288 extended->pages_resident++;
15289 }
15290 if (disp & VM_PAGE_QUERY_PAGE_REUSABLE) {
15291 extended->pages_reusable++;
15292 }
15293 if (disp & VM_PAGE_QUERY_PAGE_DIRTY) {
15294 extended->pages_dirtied++;
15295 }
15296 if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
15297 extended->pages_swapped_out++;
15298 }
15299 continue;
15300 }
15301
15302 vm_map_region_look_for_page(map, va, obj,
15303 vm_object_trunc_page(offset), ref_count,
15304 0, extended, count);
15305 }
15306
15307 if (do_region_footprint) {
15308 goto collect_object_info;
15309 }
15310 } else {
15311 collect_object_info:
15312 shadow_object = obj->shadow;
15313 shadow_depth = 0;
15314
15315 if (!(obj->internal)) {
15316 extended->external_pager = 1;
15317 }
15318
15319 if (shadow_object != VM_OBJECT_NULL) {
15320 vm_object_lock(shadow_object);
15321 for (;
15322 shadow_object != VM_OBJECT_NULL;
15323 shadow_depth++) {
15324 vm_object_t next_shadow;
15325
15326 if (!(shadow_object->internal)) {
15327 extended->external_pager = 1;
15328 }
15329
15330 next_shadow = shadow_object->shadow;
15331 if (next_shadow) {
15332 vm_object_lock(next_shadow);
15333 }
15334 vm_object_unlock(shadow_object);
15335 shadow_object = next_shadow;
15336 }
15337 }
15338 extended->shadow_depth = shadow_depth;
15339 }
15340
15341 if (extended->shadow_depth || entry->needs_copy) {
15342 extended->share_mode = SM_COW;
15343 } else {
15344 if (ref_count == 1) {
15345 extended->share_mode = SM_PRIVATE;
15346 } else {
15347 if (obj->true_share) {
15348 extended->share_mode = SM_TRUESHARED;
15349 } else {
15350 extended->share_mode = SM_SHARED;
15351 }
15352 }
15353 }
15354 extended->ref_count = ref_count - extended->shadow_depth;
15355
15356 for (i = 0; i < extended->shadow_depth; i++) {
15357 if ((tmp_obj = obj->shadow) == 0) {
15358 break;
15359 }
15360 vm_object_lock(tmp_obj);
15361 vm_object_unlock(obj);
15362
15363 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) {
15364 ref_count--;
15365 }
15366
15367 extended->ref_count += ref_count;
15368 obj = tmp_obj;
15369 }
15370 vm_object_unlock(obj);
15371
15372 if (extended->share_mode == SM_SHARED) {
15373 vm_map_entry_t cur;
15374 vm_map_entry_t last;
15375 int my_refs;
15376
15377 obj = VME_OBJECT(entry);
15378 last = vm_map_to_entry(map);
15379 my_refs = 0;
15380
15381 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
15382 ref_count--;
15383 }
15384 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) {
15385 my_refs += vm_map_region_count_obj_refs(cur, obj);
15386 }
15387
15388 if (my_refs == ref_count) {
15389 extended->share_mode = SM_PRIVATE_ALIASED;
15390 } else if (my_refs > 1) {
15391 extended->share_mode = SM_SHARED_ALIASED;
15392 }
15393 }
15394 }
15395
15396
15397 /* object is locked on entry and locked on return */
15398
15399
15400 static void
vm_map_region_look_for_page(__unused vm_map_t map,__unused vm_map_offset_t va,vm_object_t object,vm_object_offset_t offset,int max_refcnt,unsigned short depth,vm_region_extended_info_t extended,mach_msg_type_number_t count)15401 vm_map_region_look_for_page(
15402 __unused vm_map_t map,
15403 __unused vm_map_offset_t va,
15404 vm_object_t object,
15405 vm_object_offset_t offset,
15406 int max_refcnt,
15407 unsigned short depth,
15408 vm_region_extended_info_t extended,
15409 mach_msg_type_number_t count)
15410 {
15411 vm_page_t p;
15412 vm_object_t shadow;
15413 int ref_count;
15414 vm_object_t caller_object;
15415
15416 shadow = object->shadow;
15417 caller_object = object;
15418
15419
15420 while (TRUE) {
15421 if (!(object->internal)) {
15422 extended->external_pager = 1;
15423 }
15424
15425 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
15426 if (shadow && (max_refcnt == 1)) {
15427 extended->pages_shared_now_private++;
15428 }
15429
15430 if (!p->vmp_fictitious &&
15431 (p->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p)))) {
15432 extended->pages_dirtied++;
15433 } else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
15434 if (p->vmp_reusable || object->all_reusable) {
15435 extended->pages_reusable++;
15436 }
15437 }
15438
15439 extended->pages_resident++;
15440
15441 if (object != caller_object) {
15442 vm_object_unlock(object);
15443 }
15444
15445 return;
15446 }
15447 if (object->internal &&
15448 object->alive &&
15449 !object->terminating &&
15450 object->pager_ready) {
15451 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
15452 == VM_EXTERNAL_STATE_EXISTS) {
15453 /* the pager has that page */
15454 extended->pages_swapped_out++;
15455 if (object != caller_object) {
15456 vm_object_unlock(object);
15457 }
15458 return;
15459 }
15460 }
15461
15462 if (shadow) {
15463 vm_object_lock(shadow);
15464
15465 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) {
15466 ref_count--;
15467 }
15468
15469 if (++depth > extended->shadow_depth) {
15470 extended->shadow_depth = depth;
15471 }
15472
15473 if (ref_count > max_refcnt) {
15474 max_refcnt = ref_count;
15475 }
15476
15477 if (object != caller_object) {
15478 vm_object_unlock(object);
15479 }
15480
15481 offset = offset + object->vo_shadow_offset;
15482 object = shadow;
15483 shadow = object->shadow;
15484 continue;
15485 }
15486 if (object != caller_object) {
15487 vm_object_unlock(object);
15488 }
15489 break;
15490 }
15491 }
15492
15493 static int
vm_map_region_count_obj_refs(vm_map_entry_t entry,vm_object_t object)15494 vm_map_region_count_obj_refs(
15495 vm_map_entry_t entry,
15496 vm_object_t object)
15497 {
15498 int ref_count;
15499 vm_object_t chk_obj;
15500 vm_object_t tmp_obj;
15501
15502 if (entry->is_sub_map || VME_OBJECT(entry) == VM_OBJECT_NULL) {
15503 return 0;
15504 }
15505
15506 ref_count = 0;
15507 chk_obj = VME_OBJECT(entry);
15508 vm_object_lock(chk_obj);
15509
15510 while (chk_obj) {
15511 if (chk_obj == object) {
15512 ref_count++;
15513 }
15514 tmp_obj = chk_obj->shadow;
15515 if (tmp_obj) {
15516 vm_object_lock(tmp_obj);
15517 }
15518 vm_object_unlock(chk_obj);
15519
15520 chk_obj = tmp_obj;
15521 }
15522
15523 return ref_count;
15524 }
15525
15526
15527 /*
15528 * Routine: vm_map_simplify
15529 *
15530 * Description:
15531 * Attempt to simplify the map representation in
15532 * the vicinity of the given starting address.
15533 * Note:
15534 * This routine is intended primarily to keep the
15535 * kernel maps more compact -- they generally don't
15536 * benefit from the "expand a map entry" technology
15537 * at allocation time because the adjacent entry
15538 * is often wired down.
15539 */
15540 void
vm_map_simplify_entry(vm_map_t map,vm_map_entry_t this_entry)15541 vm_map_simplify_entry(
15542 vm_map_t map,
15543 vm_map_entry_t this_entry)
15544 {
15545 vm_map_entry_t prev_entry;
15546
15547 prev_entry = this_entry->vme_prev;
15548
15549 if ((this_entry != vm_map_to_entry(map)) &&
15550 (prev_entry != vm_map_to_entry(map)) &&
15551
15552 (prev_entry->vme_end == this_entry->vme_start) &&
15553
15554 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
15555 (prev_entry->vme_object_value == this_entry->vme_object_value) &&
15556 (prev_entry->vme_kernel_object == this_entry->vme_kernel_object) &&
15557 ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
15558 prev_entry->vme_start))
15559 == VME_OFFSET(this_entry)) &&
15560
15561 (prev_entry->behavior == this_entry->behavior) &&
15562 (prev_entry->needs_copy == this_entry->needs_copy) &&
15563 (prev_entry->protection == this_entry->protection) &&
15564 (prev_entry->max_protection == this_entry->max_protection) &&
15565 (prev_entry->inheritance == this_entry->inheritance) &&
15566 (prev_entry->use_pmap == this_entry->use_pmap) &&
15567 (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
15568 (prev_entry->no_cache == this_entry->no_cache) &&
15569 (prev_entry->vme_permanent == this_entry->vme_permanent) &&
15570 (prev_entry->map_aligned == this_entry->map_aligned) &&
15571 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
15572 (prev_entry->used_for_jit == this_entry->used_for_jit) &&
15573 (prev_entry->pmap_cs_associated == this_entry->pmap_cs_associated) &&
15574 (prev_entry->iokit_acct == this_entry->iokit_acct) &&
15575 (prev_entry->vme_resilient_codesign ==
15576 this_entry->vme_resilient_codesign) &&
15577 (prev_entry->vme_resilient_media ==
15578 this_entry->vme_resilient_media) &&
15579 (prev_entry->vme_no_copy_on_read == this_entry->vme_no_copy_on_read) &&
15580
15581 (prev_entry->wired_count == this_entry->wired_count) &&
15582 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
15583
15584 ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
15585 (prev_entry->in_transition == FALSE) &&
15586 (this_entry->in_transition == FALSE) &&
15587 (prev_entry->needs_wakeup == FALSE) &&
15588 (this_entry->needs_wakeup == FALSE) &&
15589 (prev_entry->is_shared == this_entry->is_shared) &&
15590 (prev_entry->superpage_size == FALSE) &&
15591 (this_entry->superpage_size == FALSE)
15592 ) {
15593 if (prev_entry->vme_permanent) {
15594 assert(this_entry->vme_permanent);
15595 prev_entry->vme_permanent = false;
15596 }
15597 vm_map_store_entry_unlink(map, prev_entry, true);
15598 assert(prev_entry->vme_start < this_entry->vme_end);
15599 if (prev_entry->map_aligned) {
15600 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
15601 VM_MAP_PAGE_MASK(map)));
15602 }
15603 this_entry->vme_start = prev_entry->vme_start;
15604 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
15605
15606 if (map->holelistenabled) {
15607 vm_map_store_update_first_free(map, this_entry, TRUE);
15608 }
15609
15610 if (prev_entry->is_sub_map) {
15611 vm_map_deallocate(VME_SUBMAP(prev_entry));
15612 } else {
15613 vm_object_deallocate(VME_OBJECT(prev_entry));
15614 }
15615 vm_map_entry_dispose(prev_entry);
15616 SAVE_HINT_MAP_WRITE(map, this_entry);
15617 }
15618 }
15619
15620 void
vm_map_simplify(vm_map_t map,vm_map_offset_t start)15621 vm_map_simplify(
15622 vm_map_t map,
15623 vm_map_offset_t start)
15624 {
15625 vm_map_entry_t this_entry;
15626
15627 vm_map_lock(map);
15628 if (vm_map_lookup_entry(map, start, &this_entry)) {
15629 vm_map_simplify_entry(map, this_entry);
15630 vm_map_simplify_entry(map, this_entry->vme_next);
15631 }
15632 vm_map_unlock(map);
15633 }
15634
15635 static void
vm_map_simplify_range(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)15636 vm_map_simplify_range(
15637 vm_map_t map,
15638 vm_map_offset_t start,
15639 vm_map_offset_t end)
15640 {
15641 vm_map_entry_t entry;
15642
15643 /*
15644 * The map should be locked (for "write") by the caller.
15645 */
15646
15647 if (start >= end) {
15648 /* invalid address range */
15649 return;
15650 }
15651
15652 start = vm_map_trunc_page(start,
15653 VM_MAP_PAGE_MASK(map));
15654 end = vm_map_round_page(end,
15655 VM_MAP_PAGE_MASK(map));
15656
15657 if (!vm_map_lookup_entry(map, start, &entry)) {
15658 /* "start" is not mapped and "entry" ends before "start" */
15659 if (entry == vm_map_to_entry(map)) {
15660 /* start with first entry in the map */
15661 entry = vm_map_first_entry(map);
15662 } else {
15663 /* start with next entry */
15664 entry = entry->vme_next;
15665 }
15666 }
15667
15668 while (entry != vm_map_to_entry(map) &&
15669 entry->vme_start <= end) {
15670 /* try and coalesce "entry" with its previous entry */
15671 vm_map_simplify_entry(map, entry);
15672 entry = entry->vme_next;
15673 }
15674 }
15675
15676
15677 /*
15678 * Routine: vm_map_machine_attribute
15679 * Purpose:
15680 * Provide machine-specific attributes to mappings,
15681 * such as cachability etc. for machines that provide
15682 * them. NUMA architectures and machines with big/strange
15683 * caches will use this.
15684 * Note:
15685 * Responsibilities for locking and checking are handled here,
15686 * everything else in the pmap module. If any non-volatile
15687 * information must be kept, the pmap module should handle
15688 * it itself. [This assumes that attributes do not
15689 * need to be inherited, which seems ok to me]
15690 */
15691 kern_return_t
vm_map_machine_attribute(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_machine_attribute_t attribute,vm_machine_attribute_val_t * value)15692 vm_map_machine_attribute(
15693 vm_map_t map,
15694 vm_map_offset_t start,
15695 vm_map_offset_t end,
15696 vm_machine_attribute_t attribute,
15697 vm_machine_attribute_val_t* value) /* IN/OUT */
15698 {
15699 kern_return_t ret;
15700 vm_map_size_t sync_size;
15701 vm_map_entry_t entry;
15702
15703 if (start < vm_map_min(map) || end > vm_map_max(map)) {
15704 return KERN_INVALID_ADDRESS;
15705 }
15706
15707 /* Figure how much memory we need to flush (in page increments) */
15708 sync_size = end - start;
15709
15710 vm_map_lock(map);
15711
15712 if (attribute != MATTR_CACHE) {
15713 /* If we don't have to find physical addresses, we */
15714 /* don't have to do an explicit traversal here. */
15715 ret = pmap_attribute(map->pmap, start, end - start,
15716 attribute, value);
15717 vm_map_unlock(map);
15718 return ret;
15719 }
15720
15721 ret = KERN_SUCCESS; /* Assume it all worked */
15722
15723 while (sync_size) {
15724 if (vm_map_lookup_entry(map, start, &entry)) {
15725 vm_map_size_t sub_size;
15726 if ((entry->vme_end - start) > sync_size) {
15727 sub_size = sync_size;
15728 sync_size = 0;
15729 } else {
15730 sub_size = entry->vme_end - start;
15731 sync_size -= sub_size;
15732 }
15733 if (entry->is_sub_map) {
15734 vm_map_offset_t sub_start;
15735 vm_map_offset_t sub_end;
15736
15737 sub_start = (start - entry->vme_start)
15738 + VME_OFFSET(entry);
15739 sub_end = sub_start + sub_size;
15740 vm_map_machine_attribute(
15741 VME_SUBMAP(entry),
15742 sub_start,
15743 sub_end,
15744 attribute, value);
15745 } else if (VME_OBJECT(entry)) {
15746 vm_page_t m;
15747 vm_object_t object;
15748 vm_object_t base_object;
15749 vm_object_t last_object;
15750 vm_object_offset_t offset;
15751 vm_object_offset_t base_offset;
15752 vm_map_size_t range;
15753 range = sub_size;
15754 offset = (start - entry->vme_start)
15755 + VME_OFFSET(entry);
15756 offset = vm_object_trunc_page(offset);
15757 base_offset = offset;
15758 object = VME_OBJECT(entry);
15759 base_object = object;
15760 last_object = NULL;
15761
15762 vm_object_lock(object);
15763
15764 while (range) {
15765 m = vm_page_lookup(
15766 object, offset);
15767
15768 if (m && !m->vmp_fictitious) {
15769 ret =
15770 pmap_attribute_cache_sync(
15771 VM_PAGE_GET_PHYS_PAGE(m),
15772 PAGE_SIZE,
15773 attribute, value);
15774 } else if (object->shadow) {
15775 offset = offset + object->vo_shadow_offset;
15776 last_object = object;
15777 object = object->shadow;
15778 vm_object_lock(last_object->shadow);
15779 vm_object_unlock(last_object);
15780 continue;
15781 }
15782 if (range < PAGE_SIZE) {
15783 range = 0;
15784 } else {
15785 range -= PAGE_SIZE;
15786 }
15787
15788 if (base_object != object) {
15789 vm_object_unlock(object);
15790 vm_object_lock(base_object);
15791 object = base_object;
15792 }
15793 /* Bump to the next page */
15794 base_offset += PAGE_SIZE;
15795 offset = base_offset;
15796 }
15797 vm_object_unlock(object);
15798 }
15799 start += sub_size;
15800 } else {
15801 vm_map_unlock(map);
15802 return KERN_FAILURE;
15803 }
15804 }
15805
15806 vm_map_unlock(map);
15807
15808 return ret;
15809 }
15810
15811 /*
15812 * vm_map_behavior_set:
15813 *
15814 * Sets the paging reference behavior of the specified address
15815 * range in the target map. Paging reference behavior affects
15816 * how pagein operations resulting from faults on the map will be
15817 * clustered.
15818 */
15819 kern_return_t
vm_map_behavior_set(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_behavior_t new_behavior)15820 vm_map_behavior_set(
15821 vm_map_t map,
15822 vm_map_offset_t start,
15823 vm_map_offset_t end,
15824 vm_behavior_t new_behavior)
15825 {
15826 vm_map_entry_t entry;
15827 vm_map_entry_t temp_entry;
15828
15829 if (start > end ||
15830 start < vm_map_min(map) ||
15831 end > vm_map_max(map)) {
15832 return KERN_NO_SPACE;
15833 }
15834
15835 switch (new_behavior) {
15836 /*
15837 * This first block of behaviors all set a persistent state on the specified
15838 * memory range. All we have to do here is to record the desired behavior
15839 * in the vm_map_entry_t's.
15840 */
15841
15842 case VM_BEHAVIOR_DEFAULT:
15843 case VM_BEHAVIOR_RANDOM:
15844 case VM_BEHAVIOR_SEQUENTIAL:
15845 case VM_BEHAVIOR_RSEQNTL:
15846 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
15847 vm_map_lock(map);
15848
15849 /*
15850 * The entire address range must be valid for the map.
15851 * Note that vm_map_range_check() does a
15852 * vm_map_lookup_entry() internally and returns the
15853 * entry containing the start of the address range if
15854 * the entire range is valid.
15855 */
15856 if (vm_map_range_check(map, start, end, &temp_entry)) {
15857 entry = temp_entry;
15858 vm_map_clip_start(map, entry, start);
15859 } else {
15860 vm_map_unlock(map);
15861 return KERN_INVALID_ADDRESS;
15862 }
15863
15864 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
15865 vm_map_clip_end(map, entry, end);
15866 if (entry->is_sub_map) {
15867 assert(!entry->use_pmap);
15868 }
15869
15870 if (new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES) {
15871 entry->zero_wired_pages = TRUE;
15872 } else {
15873 entry->behavior = new_behavior;
15874 }
15875 entry = entry->vme_next;
15876 }
15877
15878 vm_map_unlock(map);
15879 break;
15880
15881 /*
15882 * The rest of these are different from the above in that they cause
15883 * an immediate action to take place as opposed to setting a behavior that
15884 * affects future actions.
15885 */
15886
15887 case VM_BEHAVIOR_WILLNEED:
15888 return vm_map_willneed(map, start, end);
15889
15890 case VM_BEHAVIOR_DONTNEED:
15891 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
15892
15893 case VM_BEHAVIOR_FREE:
15894 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
15895
15896 case VM_BEHAVIOR_REUSABLE:
15897 return vm_map_reusable_pages(map, start, end);
15898
15899 case VM_BEHAVIOR_REUSE:
15900 return vm_map_reuse_pages(map, start, end);
15901
15902 case VM_BEHAVIOR_CAN_REUSE:
15903 return vm_map_can_reuse(map, start, end);
15904
15905 #if MACH_ASSERT
15906 case VM_BEHAVIOR_PAGEOUT:
15907 return vm_map_pageout(map, start, end);
15908 #endif /* MACH_ASSERT */
15909
15910 default:
15911 return KERN_INVALID_ARGUMENT;
15912 }
15913
15914 return KERN_SUCCESS;
15915 }
15916
15917
15918 /*
15919 * Internals for madvise(MADV_WILLNEED) system call.
15920 *
15921 * The implementation is to do:-
15922 * a) read-ahead if the mapping corresponds to a mapped regular file
15923 * b) or, fault in the pages (zero-fill, decompress etc) if it's an anonymous mapping
15924 */
15925
15926
15927 static kern_return_t
vm_map_willneed(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)15928 vm_map_willneed(
15929 vm_map_t map,
15930 vm_map_offset_t start,
15931 vm_map_offset_t end
15932 )
15933 {
15934 vm_map_entry_t entry;
15935 vm_object_t object;
15936 memory_object_t pager;
15937 struct vm_object_fault_info fault_info = {};
15938 kern_return_t kr;
15939 vm_object_size_t len;
15940 vm_object_offset_t offset;
15941
15942 fault_info.interruptible = THREAD_UNINT; /* ignored value */
15943 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
15944 fault_info.stealth = TRUE;
15945
15946 /*
15947 * The MADV_WILLNEED operation doesn't require any changes to the
15948 * vm_map_entry_t's, so the read lock is sufficient.
15949 */
15950
15951 vm_map_lock_read(map);
15952
15953 /*
15954 * The madvise semantics require that the address range be fully
15955 * allocated with no holes. Otherwise, we're required to return
15956 * an error.
15957 */
15958
15959 if (!vm_map_range_check(map, start, end, &entry)) {
15960 vm_map_unlock_read(map);
15961 return KERN_INVALID_ADDRESS;
15962 }
15963
15964 /*
15965 * Examine each vm_map_entry_t in the range.
15966 */
15967 for (; entry != vm_map_to_entry(map) && start < end;) {
15968 /*
15969 * The first time through, the start address could be anywhere
15970 * within the vm_map_entry we found. So adjust the offset to
15971 * correspond. After that, the offset will always be zero to
15972 * correspond to the beginning of the current vm_map_entry.
15973 */
15974 offset = (start - entry->vme_start) + VME_OFFSET(entry);
15975
15976 /*
15977 * Set the length so we don't go beyond the end of the
15978 * map_entry or beyond the end of the range we were given.
15979 * This range could span also multiple map entries all of which
15980 * map different files, so make sure we only do the right amount
15981 * of I/O for each object. Note that it's possible for there
15982 * to be multiple map entries all referring to the same object
15983 * but with different page permissions, but it's not worth
15984 * trying to optimize that case.
15985 */
15986 len = MIN(entry->vme_end - start, end - start);
15987
15988 if ((vm_size_t) len != len) {
15989 /* 32-bit overflow */
15990 len = (vm_size_t) (0 - PAGE_SIZE);
15991 }
15992 fault_info.cluster_size = (vm_size_t) len;
15993 fault_info.lo_offset = offset;
15994 fault_info.hi_offset = offset + len;
15995 fault_info.user_tag = VME_ALIAS(entry);
15996 fault_info.pmap_options = 0;
15997 if (entry->iokit_acct ||
15998 (!entry->is_sub_map && !entry->use_pmap)) {
15999 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
16000 }
16001
16002 /*
16003 * If the entry is a submap OR there's no read permission
16004 * to this mapping, then just skip it.
16005 */
16006 if ((entry->is_sub_map) || (entry->protection & VM_PROT_READ) == 0) {
16007 entry = entry->vme_next;
16008 start = entry->vme_start;
16009 continue;
16010 }
16011
16012 object = VME_OBJECT(entry);
16013
16014 if (object == NULL ||
16015 (object && object->internal)) {
16016 /*
16017 * Memory range backed by anonymous memory.
16018 */
16019 vm_size_t region_size = 0, effective_page_size = 0;
16020 vm_map_offset_t addr = 0, effective_page_mask = 0;
16021
16022 region_size = len;
16023 addr = start;
16024
16025 effective_page_mask = MIN(vm_map_page_mask(current_map()), PAGE_MASK);
16026 effective_page_size = effective_page_mask + 1;
16027
16028 vm_map_unlock_read(map);
16029
16030 while (region_size) {
16031 vm_pre_fault(
16032 vm_map_trunc_page(addr, effective_page_mask),
16033 VM_PROT_READ | VM_PROT_WRITE);
16034
16035 region_size -= effective_page_size;
16036 addr += effective_page_size;
16037 }
16038 } else {
16039 /*
16040 * Find the file object backing this map entry. If there is
16041 * none, then we simply ignore the "will need" advice for this
16042 * entry and go on to the next one.
16043 */
16044 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
16045 entry = entry->vme_next;
16046 start = entry->vme_start;
16047 continue;
16048 }
16049
16050 vm_object_paging_begin(object);
16051 pager = object->pager;
16052 vm_object_unlock(object);
16053
16054 /*
16055 * The data_request() could take a long time, so let's
16056 * release the map lock to avoid blocking other threads.
16057 */
16058 vm_map_unlock_read(map);
16059
16060 /*
16061 * Get the data from the object asynchronously.
16062 *
16063 * Note that memory_object_data_request() places limits on the
16064 * amount of I/O it will do. Regardless of the len we
16065 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
16066 * silently truncates the len to that size. This isn't
16067 * necessarily bad since madvise shouldn't really be used to
16068 * page in unlimited amounts of data. Other Unix variants
16069 * limit the willneed case as well. If this turns out to be an
16070 * issue for developers, then we can always adjust the policy
16071 * here and still be backwards compatible since this is all
16072 * just "advice".
16073 */
16074 kr = memory_object_data_request(
16075 pager,
16076 vm_object_trunc_page(offset) + object->paging_offset,
16077 0, /* ignored */
16078 VM_PROT_READ,
16079 (memory_object_fault_info_t)&fault_info);
16080
16081 vm_object_lock(object);
16082 vm_object_paging_end(object);
16083 vm_object_unlock(object);
16084
16085 /*
16086 * If we couldn't do the I/O for some reason, just give up on
16087 * the madvise. We still return success to the user since
16088 * madvise isn't supposed to fail when the advice can't be
16089 * taken.
16090 */
16091
16092 if (kr != KERN_SUCCESS) {
16093 return KERN_SUCCESS;
16094 }
16095 }
16096
16097 start += len;
16098 if (start >= end) {
16099 /* done */
16100 return KERN_SUCCESS;
16101 }
16102
16103 /* look up next entry */
16104 vm_map_lock_read(map);
16105 if (!vm_map_lookup_entry(map, start, &entry)) {
16106 /*
16107 * There's a new hole in the address range.
16108 */
16109 vm_map_unlock_read(map);
16110 return KERN_INVALID_ADDRESS;
16111 }
16112 }
16113
16114 vm_map_unlock_read(map);
16115 return KERN_SUCCESS;
16116 }
16117
16118 static boolean_t
vm_map_entry_is_reusable(vm_map_entry_t entry)16119 vm_map_entry_is_reusable(
16120 vm_map_entry_t entry)
16121 {
16122 /* Only user map entries */
16123
16124 vm_object_t object;
16125
16126 if (entry->is_sub_map) {
16127 return FALSE;
16128 }
16129
16130 switch (VME_ALIAS(entry)) {
16131 case VM_MEMORY_MALLOC:
16132 case VM_MEMORY_MALLOC_SMALL:
16133 case VM_MEMORY_MALLOC_LARGE:
16134 case VM_MEMORY_REALLOC:
16135 case VM_MEMORY_MALLOC_TINY:
16136 case VM_MEMORY_MALLOC_LARGE_REUSABLE:
16137 case VM_MEMORY_MALLOC_LARGE_REUSED:
16138 /*
16139 * This is a malloc() memory region: check if it's still
16140 * in its original state and can be re-used for more
16141 * malloc() allocations.
16142 */
16143 break;
16144 default:
16145 /*
16146 * Not a malloc() memory region: let the caller decide if
16147 * it's re-usable.
16148 */
16149 return TRUE;
16150 }
16151
16152 if (/*entry->is_shared ||*/
16153 entry->is_sub_map ||
16154 entry->in_transition ||
16155 entry->protection != VM_PROT_DEFAULT ||
16156 entry->max_protection != VM_PROT_ALL ||
16157 entry->inheritance != VM_INHERIT_DEFAULT ||
16158 entry->no_cache ||
16159 entry->vme_permanent ||
16160 entry->superpage_size != FALSE ||
16161 entry->zero_wired_pages ||
16162 entry->wired_count != 0 ||
16163 entry->user_wired_count != 0) {
16164 return FALSE;
16165 }
16166
16167 object = VME_OBJECT(entry);
16168 if (object == VM_OBJECT_NULL) {
16169 return TRUE;
16170 }
16171 if (
16172 #if 0
16173 /*
16174 * Let's proceed even if the VM object is potentially
16175 * shared.
16176 * We check for this later when processing the actual
16177 * VM pages, so the contents will be safe if shared.
16178 *
16179 * But we can still mark this memory region as "reusable" to
16180 * acknowledge that the caller did let us know that the memory
16181 * could be re-used and should not be penalized for holding
16182 * on to it. This allows its "resident size" to not include
16183 * the reusable range.
16184 */
16185 object->ref_count == 1 &&
16186 #endif
16187 object->wired_page_count == 0 &&
16188 object->copy == VM_OBJECT_NULL &&
16189 object->shadow == VM_OBJECT_NULL &&
16190 object->internal &&
16191 object->purgable == VM_PURGABLE_DENY &&
16192 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
16193 !object->code_signed) {
16194 return TRUE;
16195 }
16196 return FALSE;
16197 }
16198
16199 static kern_return_t
vm_map_reuse_pages(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)16200 vm_map_reuse_pages(
16201 vm_map_t map,
16202 vm_map_offset_t start,
16203 vm_map_offset_t end)
16204 {
16205 vm_map_entry_t entry;
16206 vm_object_t object;
16207 vm_object_offset_t start_offset, end_offset;
16208
16209 /*
16210 * The MADV_REUSE operation doesn't require any changes to the
16211 * vm_map_entry_t's, so the read lock is sufficient.
16212 */
16213
16214 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
16215 /*
16216 * XXX TODO4K
16217 * need to figure out what reusable means for a
16218 * portion of a native page.
16219 */
16220 return KERN_SUCCESS;
16221 }
16222
16223 vm_map_lock_read(map);
16224 assert(map->pmap != kernel_pmap); /* protect alias access */
16225
16226 /*
16227 * The madvise semantics require that the address range be fully
16228 * allocated with no holes. Otherwise, we're required to return
16229 * an error.
16230 */
16231
16232 if (!vm_map_range_check(map, start, end, &entry)) {
16233 vm_map_unlock_read(map);
16234 vm_page_stats_reusable.reuse_pages_failure++;
16235 return KERN_INVALID_ADDRESS;
16236 }
16237
16238 /*
16239 * Examine each vm_map_entry_t in the range.
16240 */
16241 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16242 entry = entry->vme_next) {
16243 /*
16244 * Sanity check on the VM map entry.
16245 */
16246 if (!vm_map_entry_is_reusable(entry)) {
16247 vm_map_unlock_read(map);
16248 vm_page_stats_reusable.reuse_pages_failure++;
16249 return KERN_INVALID_ADDRESS;
16250 }
16251
16252 /*
16253 * The first time through, the start address could be anywhere
16254 * within the vm_map_entry we found. So adjust the offset to
16255 * correspond.
16256 */
16257 if (entry->vme_start < start) {
16258 start_offset = start - entry->vme_start;
16259 } else {
16260 start_offset = 0;
16261 }
16262 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
16263 start_offset += VME_OFFSET(entry);
16264 end_offset += VME_OFFSET(entry);
16265
16266 object = VME_OBJECT(entry);
16267 if (object != VM_OBJECT_NULL) {
16268 vm_object_lock(object);
16269 vm_object_reuse_pages(object, start_offset, end_offset,
16270 TRUE);
16271 vm_object_unlock(object);
16272 }
16273
16274 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
16275 /*
16276 * XXX
16277 * We do not hold the VM map exclusively here.
16278 * The "alias" field is not that critical, so it's
16279 * safe to update it here, as long as it is the only
16280 * one that can be modified while holding the VM map
16281 * "shared".
16282 */
16283 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
16284 }
16285 }
16286
16287 vm_map_unlock_read(map);
16288 vm_page_stats_reusable.reuse_pages_success++;
16289 return KERN_SUCCESS;
16290 }
16291
16292
16293 static kern_return_t
vm_map_reusable_pages(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)16294 vm_map_reusable_pages(
16295 vm_map_t map,
16296 vm_map_offset_t start,
16297 vm_map_offset_t end)
16298 {
16299 vm_map_entry_t entry;
16300 vm_object_t object;
16301 vm_object_offset_t start_offset, end_offset;
16302 vm_map_offset_t pmap_offset;
16303
16304 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
16305 /*
16306 * XXX TODO4K
16307 * need to figure out what reusable means for a portion
16308 * of a native page.
16309 */
16310 return KERN_SUCCESS;
16311 }
16312
16313 /*
16314 * The MADV_REUSABLE operation doesn't require any changes to the
16315 * vm_map_entry_t's, so the read lock is sufficient.
16316 */
16317
16318 vm_map_lock_read(map);
16319 assert(map->pmap != kernel_pmap); /* protect alias access */
16320
16321 /*
16322 * The madvise semantics require that the address range be fully
16323 * allocated with no holes. Otherwise, we're required to return
16324 * an error.
16325 */
16326
16327 if (!vm_map_range_check(map, start, end, &entry)) {
16328 vm_map_unlock_read(map);
16329 vm_page_stats_reusable.reusable_pages_failure++;
16330 return KERN_INVALID_ADDRESS;
16331 }
16332
16333 /*
16334 * Examine each vm_map_entry_t in the range.
16335 */
16336 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16337 entry = entry->vme_next) {
16338 int kill_pages = 0;
16339
16340 /*
16341 * Sanity check on the VM map entry.
16342 */
16343 if (!vm_map_entry_is_reusable(entry)) {
16344 vm_map_unlock_read(map);
16345 vm_page_stats_reusable.reusable_pages_failure++;
16346 return KERN_INVALID_ADDRESS;
16347 }
16348
16349 if (!(entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
16350 /* not writable: can't discard contents */
16351 vm_map_unlock_read(map);
16352 vm_page_stats_reusable.reusable_nonwritable++;
16353 vm_page_stats_reusable.reusable_pages_failure++;
16354 return KERN_PROTECTION_FAILURE;
16355 }
16356
16357 /*
16358 * The first time through, the start address could be anywhere
16359 * within the vm_map_entry we found. So adjust the offset to
16360 * correspond.
16361 */
16362 if (entry->vme_start < start) {
16363 start_offset = start - entry->vme_start;
16364 pmap_offset = start;
16365 } else {
16366 start_offset = 0;
16367 pmap_offset = entry->vme_start;
16368 }
16369 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
16370 start_offset += VME_OFFSET(entry);
16371 end_offset += VME_OFFSET(entry);
16372
16373 object = VME_OBJECT(entry);
16374 if (object == VM_OBJECT_NULL) {
16375 continue;
16376 }
16377
16378
16379 vm_object_lock(object);
16380 if (((object->ref_count == 1) ||
16381 (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
16382 object->copy == VM_OBJECT_NULL)) &&
16383 object->shadow == VM_OBJECT_NULL &&
16384 /*
16385 * "iokit_acct" entries are billed for their virtual size
16386 * (rather than for their resident pages only), so they
16387 * wouldn't benefit from making pages reusable, and it
16388 * would be hard to keep track of pages that are both
16389 * "iokit_acct" and "reusable" in the pmap stats and
16390 * ledgers.
16391 */
16392 !(entry->iokit_acct ||
16393 (!entry->is_sub_map && !entry->use_pmap))) {
16394 if (object->ref_count != 1) {
16395 vm_page_stats_reusable.reusable_shared++;
16396 }
16397 kill_pages = 1;
16398 } else {
16399 kill_pages = -1;
16400 }
16401 if (kill_pages != -1) {
16402 vm_object_deactivate_pages(object,
16403 start_offset,
16404 end_offset - start_offset,
16405 kill_pages,
16406 TRUE /*reusable_pages*/,
16407 map->pmap,
16408 pmap_offset);
16409 } else {
16410 vm_page_stats_reusable.reusable_pages_shared++;
16411 DTRACE_VM4(vm_map_reusable_pages_shared,
16412 unsigned int, VME_ALIAS(entry),
16413 vm_map_t, map,
16414 vm_map_entry_t, entry,
16415 vm_object_t, object);
16416 }
16417 vm_object_unlock(object);
16418
16419 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
16420 VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
16421 /*
16422 * XXX
16423 * We do not hold the VM map exclusively here.
16424 * The "alias" field is not that critical, so it's
16425 * safe to update it here, as long as it is the only
16426 * one that can be modified while holding the VM map
16427 * "shared".
16428 */
16429 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
16430 }
16431 }
16432
16433 vm_map_unlock_read(map);
16434 vm_page_stats_reusable.reusable_pages_success++;
16435 return KERN_SUCCESS;
16436 }
16437
16438
16439 static kern_return_t
vm_map_can_reuse(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)16440 vm_map_can_reuse(
16441 vm_map_t map,
16442 vm_map_offset_t start,
16443 vm_map_offset_t end)
16444 {
16445 vm_map_entry_t entry;
16446
16447 /*
16448 * The MADV_REUSABLE operation doesn't require any changes to the
16449 * vm_map_entry_t's, so the read lock is sufficient.
16450 */
16451
16452 vm_map_lock_read(map);
16453 assert(map->pmap != kernel_pmap); /* protect alias access */
16454
16455 /*
16456 * The madvise semantics require that the address range be fully
16457 * allocated with no holes. Otherwise, we're required to return
16458 * an error.
16459 */
16460
16461 if (!vm_map_range_check(map, start, end, &entry)) {
16462 vm_map_unlock_read(map);
16463 vm_page_stats_reusable.can_reuse_failure++;
16464 return KERN_INVALID_ADDRESS;
16465 }
16466
16467 /*
16468 * Examine each vm_map_entry_t in the range.
16469 */
16470 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16471 entry = entry->vme_next) {
16472 /*
16473 * Sanity check on the VM map entry.
16474 */
16475 if (!vm_map_entry_is_reusable(entry)) {
16476 vm_map_unlock_read(map);
16477 vm_page_stats_reusable.can_reuse_failure++;
16478 return KERN_INVALID_ADDRESS;
16479 }
16480 }
16481
16482 vm_map_unlock_read(map);
16483 vm_page_stats_reusable.can_reuse_success++;
16484 return KERN_SUCCESS;
16485 }
16486
16487
16488 #if MACH_ASSERT
16489 static kern_return_t
vm_map_pageout(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)16490 vm_map_pageout(
16491 vm_map_t map,
16492 vm_map_offset_t start,
16493 vm_map_offset_t end)
16494 {
16495 vm_map_entry_t entry;
16496
16497 /*
16498 * The MADV_PAGEOUT operation doesn't require any changes to the
16499 * vm_map_entry_t's, so the read lock is sufficient.
16500 */
16501
16502 vm_map_lock_read(map);
16503
16504 /*
16505 * The madvise semantics require that the address range be fully
16506 * allocated with no holes. Otherwise, we're required to return
16507 * an error.
16508 */
16509
16510 if (!vm_map_range_check(map, start, end, &entry)) {
16511 vm_map_unlock_read(map);
16512 return KERN_INVALID_ADDRESS;
16513 }
16514
16515 /*
16516 * Examine each vm_map_entry_t in the range.
16517 */
16518 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16519 entry = entry->vme_next) {
16520 vm_object_t object;
16521
16522 /*
16523 * Sanity check on the VM map entry.
16524 */
16525 if (entry->is_sub_map) {
16526 vm_map_t submap;
16527 vm_map_offset_t submap_start;
16528 vm_map_offset_t submap_end;
16529 vm_map_entry_t submap_entry;
16530
16531 submap = VME_SUBMAP(entry);
16532 submap_start = VME_OFFSET(entry);
16533 submap_end = submap_start + (entry->vme_end -
16534 entry->vme_start);
16535
16536 vm_map_lock_read(submap);
16537
16538 if (!vm_map_range_check(submap,
16539 submap_start,
16540 submap_end,
16541 &submap_entry)) {
16542 vm_map_unlock_read(submap);
16543 vm_map_unlock_read(map);
16544 return KERN_INVALID_ADDRESS;
16545 }
16546
16547 if (submap_entry->is_sub_map) {
16548 vm_map_unlock_read(submap);
16549 continue;
16550 }
16551
16552 object = VME_OBJECT(submap_entry);
16553 if (object == VM_OBJECT_NULL || !object->internal) {
16554 vm_map_unlock_read(submap);
16555 continue;
16556 }
16557
16558 vm_object_pageout(object);
16559
16560 vm_map_unlock_read(submap);
16561 submap = VM_MAP_NULL;
16562 submap_entry = VM_MAP_ENTRY_NULL;
16563 continue;
16564 }
16565
16566 object = VME_OBJECT(entry);
16567 if (object == VM_OBJECT_NULL || !object->internal) {
16568 continue;
16569 }
16570
16571 vm_object_pageout(object);
16572 }
16573
16574 vm_map_unlock_read(map);
16575 return KERN_SUCCESS;
16576 }
16577 #endif /* MACH_ASSERT */
16578
16579
16580 /*
16581 * Routine: vm_map_entry_insert
16582 *
16583 * Description: This routine inserts a new vm_entry in a locked map.
16584 */
16585 static vm_map_entry_t
vm_map_entry_insert(vm_map_t map,vm_map_entry_t insp_entry,vm_map_offset_t start,vm_map_offset_t end,vm_object_t object,vm_object_offset_t offset,vm_map_kernel_flags_t vmk_flags,boolean_t needs_copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance,boolean_t no_cache,boolean_t permanent,unsigned int superpage_size,boolean_t clear_map_aligned,int alias)16586 vm_map_entry_insert(
16587 vm_map_t map,
16588 vm_map_entry_t insp_entry,
16589 vm_map_offset_t start,
16590 vm_map_offset_t end,
16591 vm_object_t object,
16592 vm_object_offset_t offset,
16593 vm_map_kernel_flags_t vmk_flags,
16594 boolean_t needs_copy,
16595 vm_prot_t cur_protection,
16596 vm_prot_t max_protection,
16597 vm_inherit_t inheritance,
16598 boolean_t no_cache,
16599 boolean_t permanent,
16600 unsigned int superpage_size,
16601 boolean_t clear_map_aligned,
16602 int alias)
16603 {
16604 vm_map_entry_t new_entry;
16605 boolean_t map_aligned = FALSE;
16606
16607 assert(insp_entry != (vm_map_entry_t)0);
16608 vm_map_lock_assert_exclusive(map);
16609
16610 #if DEVELOPMENT || DEBUG
16611 vm_object_offset_t end_offset = 0;
16612 assertf(!os_add_overflow(end - start, offset, &end_offset), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end - start), offset);
16613 #endif /* DEVELOPMENT || DEBUG */
16614
16615 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
16616 map_aligned = TRUE;
16617 }
16618 if (clear_map_aligned &&
16619 (!VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
16620 !VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
16621 map_aligned = FALSE;
16622 }
16623 if (map_aligned) {
16624 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
16625 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
16626 } else {
16627 assert(page_aligned(start));
16628 assert(page_aligned(end));
16629 }
16630 assert(start < end);
16631
16632 new_entry = vm_map_entry_create(map);
16633
16634 new_entry->vme_start = start;
16635 new_entry->vme_end = end;
16636
16637 if (vmk_flags.vmkf_submap) {
16638 new_entry->vme_atomic = vmk_flags.vmkf_submap_atomic;
16639 VME_SUBMAP_SET(new_entry, (vm_map_t)object);
16640 } else {
16641 VME_OBJECT_SET(new_entry, object, false, 0);
16642 }
16643 VME_OFFSET_SET(new_entry, offset);
16644 VME_ALIAS_SET(new_entry, alias);
16645
16646 new_entry->map_aligned = map_aligned;
16647 new_entry->needs_copy = needs_copy;
16648 new_entry->inheritance = inheritance;
16649 new_entry->protection = cur_protection;
16650 new_entry->max_protection = max_protection;
16651 /*
16652 * submap: "use_pmap" means "nested".
16653 * default: false.
16654 *
16655 * object: "use_pmap" means "use pmap accounting" for footprint.
16656 * default: true.
16657 */
16658 new_entry->use_pmap = !vmk_flags.vmkf_submap;
16659 new_entry->no_cache = no_cache;
16660 new_entry->vme_permanent = permanent;
16661 new_entry->translated_allow_execute = vmk_flags.vmkf_translated_allow_execute;
16662 new_entry->vme_no_copy_on_read = vmk_flags.vmkf_no_copy_on_read;
16663 new_entry->superpage_size = (superpage_size != 0);
16664
16665 if (vmk_flags.vmkf_map_jit) {
16666 if (!(map->jit_entry_exists) ||
16667 VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map)) {
16668 new_entry->used_for_jit = TRUE;
16669 map->jit_entry_exists = TRUE;
16670 }
16671 }
16672
16673 /*
16674 * Insert the new entry into the list.
16675 */
16676
16677 vm_map_store_entry_link(map, insp_entry, new_entry, vmk_flags);
16678 map->size += end - start;
16679
16680 /*
16681 * Update the free space hint and the lookup hint.
16682 */
16683
16684 SAVE_HINT_MAP_WRITE(map, new_entry);
16685 return new_entry;
16686 }
16687
16688 /*
16689 * Routine: vm_map_remap_extract
16690 *
16691 * Description: This routine returns a vm_entry list from a map.
16692 */
16693 static kern_return_t
vm_map_remap_extract(vm_map_t map,vm_map_offset_t addr,vm_map_size_t size,boolean_t copy,struct vm_map_header * map_header,vm_prot_t * cur_protection,vm_prot_t * max_protection,vm_inherit_t inheritance,vm_map_kernel_flags_t vmk_flags)16694 vm_map_remap_extract(
16695 vm_map_t map,
16696 vm_map_offset_t addr,
16697 vm_map_size_t size,
16698 boolean_t copy,
16699 struct vm_map_header *map_header,
16700 vm_prot_t *cur_protection, /* IN/OUT */
16701 vm_prot_t *max_protection, /* IN/OUT */
16702 /* What, no behavior? */
16703 vm_inherit_t inheritance,
16704 vm_map_kernel_flags_t vmk_flags)
16705 {
16706 kern_return_t result;
16707 vm_map_size_t mapped_size;
16708 vm_map_size_t tmp_size;
16709 vm_map_entry_t src_entry; /* result of last map lookup */
16710 vm_map_entry_t new_entry;
16711 vm_object_offset_t offset;
16712 vm_map_offset_t map_address;
16713 vm_map_offset_t src_start; /* start of entry to map */
16714 vm_map_offset_t src_end; /* end of region to be mapped */
16715 vm_object_t object;
16716 vm_map_version_t version;
16717 boolean_t src_needs_copy;
16718 boolean_t new_entry_needs_copy;
16719 vm_map_entry_t saved_src_entry;
16720 boolean_t src_entry_was_wired;
16721 vm_prot_t max_prot_for_prot_copy;
16722 vm_map_offset_t effective_page_mask;
16723 boolean_t pageable, same_map;
16724 boolean_t vm_remap_legacy;
16725 vm_prot_t required_cur_prot, required_max_prot;
16726 vm_object_t new_copy_object; /* vm_object_copy_* result */
16727 boolean_t saved_used_for_jit; /* Saved used_for_jit. */
16728
16729 pageable = vmk_flags.vmkf_copy_pageable;
16730 same_map = vmk_flags.vmkf_copy_same_map;
16731
16732 effective_page_mask = MIN(PAGE_MASK, VM_MAP_PAGE_MASK(map));
16733
16734 assert(map != VM_MAP_NULL);
16735 assert(size != 0);
16736 assert(size == vm_map_round_page(size, effective_page_mask));
16737 assert(inheritance == VM_INHERIT_NONE ||
16738 inheritance == VM_INHERIT_COPY ||
16739 inheritance == VM_INHERIT_SHARE);
16740 assert(!(*cur_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)));
16741 assert(!(*max_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)));
16742 assert((*cur_protection & *max_protection) == *cur_protection);
16743
16744 /*
16745 * Compute start and end of region.
16746 */
16747 src_start = vm_map_trunc_page(addr, effective_page_mask);
16748 src_end = vm_map_round_page(src_start + size, effective_page_mask);
16749
16750 /*
16751 * Initialize map_header.
16752 */
16753 map_header->links.next = CAST_TO_VM_MAP_ENTRY(&map_header->links);
16754 map_header->links.prev = CAST_TO_VM_MAP_ENTRY(&map_header->links);
16755 map_header->nentries = 0;
16756 map_header->entries_pageable = pageable;
16757 // map_header->page_shift = MIN(VM_MAP_PAGE_SHIFT(map), PAGE_SHIFT);
16758 map_header->page_shift = (uint16_t)VM_MAP_PAGE_SHIFT(map);
16759 map_header->rb_head_store.rbh_root = (void *)(int)SKIP_RB_TREE;
16760
16761 vm_map_store_init( map_header );
16762
16763 if (copy && vmk_flags.vmkf_remap_prot_copy) {
16764 /*
16765 * Special case for vm_map_protect(VM_PROT_COPY):
16766 * we want to set the new mappings' max protection to the
16767 * specified *max_protection...
16768 */
16769 max_prot_for_prot_copy = *max_protection & (VM_PROT_ALL | VM_PROT_ALLEXEC);
16770 /* ... but we want to use the vm_remap() legacy mode */
16771 *max_protection = VM_PROT_NONE;
16772 *cur_protection = VM_PROT_NONE;
16773 } else {
16774 max_prot_for_prot_copy = VM_PROT_NONE;
16775 }
16776
16777 if (*cur_protection == VM_PROT_NONE &&
16778 *max_protection == VM_PROT_NONE) {
16779 /*
16780 * vm_remap() legacy mode:
16781 * Extract all memory regions in the specified range and
16782 * collect the strictest set of protections allowed on the
16783 * entire range, so the caller knows what they can do with
16784 * the remapped range.
16785 * We start with VM_PROT_ALL and we'll remove the protections
16786 * missing from each memory region.
16787 */
16788 vm_remap_legacy = TRUE;
16789 *cur_protection = VM_PROT_ALL;
16790 *max_protection = VM_PROT_ALL;
16791 required_cur_prot = VM_PROT_NONE;
16792 required_max_prot = VM_PROT_NONE;
16793 } else {
16794 /*
16795 * vm_remap_new() mode:
16796 * Extract all memory regions in the specified range and
16797 * ensure that they have at least the protections specified
16798 * by the caller via *cur_protection and *max_protection.
16799 * The resulting mapping should have these protections.
16800 */
16801 vm_remap_legacy = FALSE;
16802 if (copy) {
16803 required_cur_prot = VM_PROT_NONE;
16804 required_max_prot = VM_PROT_READ;
16805 } else {
16806 required_cur_prot = *cur_protection;
16807 required_max_prot = *max_protection;
16808 }
16809 }
16810
16811 map_address = 0;
16812 mapped_size = 0;
16813 result = KERN_SUCCESS;
16814
16815 /*
16816 * The specified source virtual space might correspond to
16817 * multiple map entries, need to loop on them.
16818 */
16819 vm_map_lock(map);
16820 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
16821 /*
16822 * This address space uses sub-pages so the range might
16823 * not be re-mappable in an address space with larger
16824 * pages. Re-assemble any broken-up VM map entries to
16825 * improve our chances of making it work.
16826 */
16827 vm_map_simplify_range(map, src_start, src_end);
16828 }
16829 while (mapped_size != size) {
16830 vm_map_size_t entry_size;
16831
16832 /*
16833 * Find the beginning of the region.
16834 */
16835 if (!vm_map_lookup_entry(map, src_start, &src_entry)) {
16836 result = KERN_INVALID_ADDRESS;
16837 break;
16838 }
16839
16840 if (src_start < src_entry->vme_start ||
16841 (mapped_size && src_start != src_entry->vme_start)) {
16842 result = KERN_INVALID_ADDRESS;
16843 break;
16844 }
16845
16846 tmp_size = size - mapped_size;
16847 if (src_end > src_entry->vme_end) {
16848 tmp_size -= (src_end - src_entry->vme_end);
16849 }
16850
16851 entry_size = (vm_map_size_t)(src_entry->vme_end -
16852 src_entry->vme_start);
16853
16854 if (src_entry->is_sub_map &&
16855 vmk_flags.vmkf_copy_single_object) {
16856 vm_map_t submap;
16857 vm_map_offset_t submap_start;
16858 vm_map_size_t submap_size;
16859 boolean_t submap_needs_copy;
16860
16861 /*
16862 * No check for "required protection" on "src_entry"
16863 * because the protections that matter are the ones
16864 * on the submap's VM map entry, which will be checked
16865 * during the call to vm_map_remap_extract() below.
16866 */
16867 submap_size = src_entry->vme_end - src_start;
16868 if (submap_size > size) {
16869 submap_size = size;
16870 }
16871 submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
16872 submap = VME_SUBMAP(src_entry);
16873 if (copy) {
16874 /*
16875 * The caller wants a copy-on-write re-mapping,
16876 * so let's extract from the submap accordingly.
16877 */
16878 submap_needs_copy = TRUE;
16879 } else if (src_entry->needs_copy) {
16880 /*
16881 * The caller wants a shared re-mapping but the
16882 * submap is mapped with "needs_copy", so its
16883 * contents can't be shared as is. Extract the
16884 * contents of the submap as "copy-on-write".
16885 * The re-mapping won't be shared with the
16886 * original mapping but this is equivalent to
16887 * what happened with the original "remap from
16888 * submap" code.
16889 * The shared region is mapped "needs_copy", for
16890 * example.
16891 */
16892 submap_needs_copy = TRUE;
16893 } else {
16894 /*
16895 * The caller wants a shared re-mapping and
16896 * this mapping can be shared (no "needs_copy"),
16897 * so let's extract from the submap accordingly.
16898 * Kernel submaps are mapped without
16899 * "needs_copy", for example.
16900 */
16901 submap_needs_copy = FALSE;
16902 }
16903 vm_map_reference(submap);
16904 vm_map_unlock(map);
16905 src_entry = NULL;
16906 if (vm_remap_legacy) {
16907 *cur_protection = VM_PROT_NONE;
16908 *max_protection = VM_PROT_NONE;
16909 }
16910
16911 DTRACE_VM7(remap_submap_recurse,
16912 vm_map_t, map,
16913 vm_map_offset_t, addr,
16914 vm_map_size_t, size,
16915 boolean_t, copy,
16916 vm_map_offset_t, submap_start,
16917 vm_map_size_t, submap_size,
16918 boolean_t, submap_needs_copy);
16919
16920 result = vm_map_remap_extract(submap,
16921 submap_start,
16922 submap_size,
16923 submap_needs_copy,
16924 map_header,
16925 cur_protection,
16926 max_protection,
16927 inheritance,
16928 vmk_flags);
16929 vm_map_deallocate(submap);
16930 return result;
16931 }
16932
16933 if (src_entry->is_sub_map) {
16934 /* protections for submap mapping are irrelevant here */
16935 } else if (((src_entry->protection & required_cur_prot) !=
16936 required_cur_prot) ||
16937 ((src_entry->max_protection & required_max_prot) !=
16938 required_max_prot)) {
16939 if (vmk_flags.vmkf_copy_single_object &&
16940 mapped_size != 0) {
16941 /*
16942 * Single object extraction.
16943 * We can't extract more with the required
16944 * protection but we've extracted some, so
16945 * stop there and declare success.
16946 * The caller should check the size of
16947 * the copy entry we've extracted.
16948 */
16949 result = KERN_SUCCESS;
16950 } else {
16951 /*
16952 * VM range extraction.
16953 * Required proctection is not available
16954 * for this part of the range: fail.
16955 */
16956 result = KERN_PROTECTION_FAILURE;
16957 }
16958 break;
16959 }
16960
16961 if (src_entry->is_sub_map) {
16962 vm_map_t submap;
16963 vm_map_offset_t submap_start;
16964 vm_map_size_t submap_size;
16965 vm_map_copy_t submap_copy;
16966 vm_prot_t submap_curprot, submap_maxprot;
16967 boolean_t submap_needs_copy;
16968
16969 /*
16970 * No check for "required protection" on "src_entry"
16971 * because the protections that matter are the ones
16972 * on the submap's VM map entry, which will be checked
16973 * during the call to vm_map_copy_extract() below.
16974 */
16975 object = VM_OBJECT_NULL;
16976 submap_copy = VM_MAP_COPY_NULL;
16977
16978 /* find equivalent range in the submap */
16979 submap = VME_SUBMAP(src_entry);
16980 submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
16981 submap_size = tmp_size;
16982 if (copy) {
16983 /*
16984 * The caller wants a copy-on-write re-mapping,
16985 * so let's extract from the submap accordingly.
16986 */
16987 submap_needs_copy = TRUE;
16988 } else if (src_entry->needs_copy) {
16989 /*
16990 * The caller wants a shared re-mapping but the
16991 * submap is mapped with "needs_copy", so its
16992 * contents can't be shared as is. Extract the
16993 * contents of the submap as "copy-on-write".
16994 * The re-mapping won't be shared with the
16995 * original mapping but this is equivalent to
16996 * what happened with the original "remap from
16997 * submap" code.
16998 * The shared region is mapped "needs_copy", for
16999 * example.
17000 */
17001 submap_needs_copy = TRUE;
17002 } else {
17003 /*
17004 * The caller wants a shared re-mapping and
17005 * this mapping can be shared (no "needs_copy"),
17006 * so let's extract from the submap accordingly.
17007 * Kernel submaps are mapped without
17008 * "needs_copy", for example.
17009 */
17010 submap_needs_copy = FALSE;
17011 }
17012 /* extra ref to keep submap alive */
17013 vm_map_reference(submap);
17014
17015 DTRACE_VM7(remap_submap_recurse,
17016 vm_map_t, map,
17017 vm_map_offset_t, addr,
17018 vm_map_size_t, size,
17019 boolean_t, copy,
17020 vm_map_offset_t, submap_start,
17021 vm_map_size_t, submap_size,
17022 boolean_t, submap_needs_copy);
17023
17024 /*
17025 * The map can be safely unlocked since we
17026 * already hold a reference on the submap.
17027 *
17028 * No timestamp since we don't care if the map
17029 * gets modified while we're down in the submap.
17030 * We'll resume the extraction at src_start + tmp_size
17031 * anyway.
17032 */
17033 vm_map_unlock(map);
17034 src_entry = NULL; /* not valid once map is unlocked */
17035
17036 if (vm_remap_legacy) {
17037 submap_curprot = VM_PROT_NONE;
17038 submap_maxprot = VM_PROT_NONE;
17039 if (max_prot_for_prot_copy) {
17040 submap_maxprot = max_prot_for_prot_copy;
17041 }
17042 } else {
17043 assert(!max_prot_for_prot_copy);
17044 submap_curprot = *cur_protection;
17045 submap_maxprot = *max_protection;
17046 }
17047 result = vm_map_copy_extract(submap,
17048 submap_start,
17049 submap_size,
17050 submap_needs_copy,
17051 &submap_copy,
17052 &submap_curprot,
17053 &submap_maxprot,
17054 inheritance,
17055 vmk_flags);
17056
17057 /* release extra ref on submap */
17058 vm_map_deallocate(submap);
17059 submap = VM_MAP_NULL;
17060
17061 if (result != KERN_SUCCESS) {
17062 vm_map_lock(map);
17063 break;
17064 }
17065
17066 /* transfer submap_copy entries to map_header */
17067 while (vm_map_copy_first_entry(submap_copy) !=
17068 vm_map_copy_to_entry(submap_copy)) {
17069 vm_map_entry_t copy_entry;
17070 vm_map_size_t copy_entry_size;
17071
17072 copy_entry = vm_map_copy_first_entry(submap_copy);
17073
17074 /*
17075 * Prevent kernel_object from being exposed to
17076 * user space.
17077 */
17078 if (__improbable(copy_entry->vme_kernel_object)) {
17079 printf("%d[%s]: rejecting attempt to extract from kernel_object\n",
17080 proc_selfpid(),
17081 (get_bsdtask_info(current_task())
17082 ? proc_name_address(get_bsdtask_info(current_task()))
17083 : "?"));
17084 DTRACE_VM(extract_kernel_only);
17085 result = KERN_INVALID_RIGHT;
17086 vm_map_copy_discard(submap_copy);
17087 submap_copy = VM_MAP_COPY_NULL;
17088 vm_map_lock(map);
17089 break;
17090 }
17091
17092 vm_map_copy_entry_unlink(submap_copy, copy_entry);
17093 copy_entry_size = copy_entry->vme_end - copy_entry->vme_start;
17094 copy_entry->vme_start = map_address;
17095 copy_entry->vme_end = map_address + copy_entry_size;
17096 map_address += copy_entry_size;
17097 mapped_size += copy_entry_size;
17098 src_start += copy_entry_size;
17099 assert(src_start <= src_end);
17100 _vm_map_store_entry_link(map_header,
17101 map_header->links.prev,
17102 copy_entry);
17103 }
17104 /* done with submap_copy */
17105 vm_map_copy_discard(submap_copy);
17106
17107 if (vm_remap_legacy) {
17108 *cur_protection &= submap_curprot;
17109 *max_protection &= submap_maxprot;
17110 }
17111
17112 /* re-acquire the map lock and continue to next entry */
17113 vm_map_lock(map);
17114 continue;
17115 } else {
17116 object = VME_OBJECT(src_entry);
17117
17118 /*
17119 * Prevent kernel_object from being exposed to
17120 * user space.
17121 */
17122 if (__improbable(object == kernel_object)) {
17123 printf("%d[%s]: rejecting attempt to extract from kernel_object\n",
17124 proc_selfpid(),
17125 (get_bsdtask_info(current_task())
17126 ? proc_name_address(get_bsdtask_info(current_task()))
17127 : "?"));
17128 DTRACE_VM(extract_kernel_only);
17129 result = KERN_INVALID_RIGHT;
17130 break;
17131 }
17132
17133 if (src_entry->iokit_acct) {
17134 /*
17135 * This entry uses "IOKit accounting".
17136 */
17137 } else if (object != VM_OBJECT_NULL &&
17138 (object->purgable != VM_PURGABLE_DENY ||
17139 object->vo_ledger_tag != VM_LEDGER_TAG_NONE)) {
17140 /*
17141 * Purgeable objects have their own accounting:
17142 * no pmap accounting for them.
17143 */
17144 assertf(!src_entry->use_pmap,
17145 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
17146 map,
17147 src_entry,
17148 (uint64_t)src_entry->vme_start,
17149 (uint64_t)src_entry->vme_end,
17150 src_entry->protection,
17151 src_entry->max_protection,
17152 VME_ALIAS(src_entry));
17153 } else {
17154 /*
17155 * Not IOKit or purgeable:
17156 * must be accounted by pmap stats.
17157 */
17158 assertf(src_entry->use_pmap,
17159 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
17160 map,
17161 src_entry,
17162 (uint64_t)src_entry->vme_start,
17163 (uint64_t)src_entry->vme_end,
17164 src_entry->protection,
17165 src_entry->max_protection,
17166 VME_ALIAS(src_entry));
17167 }
17168
17169 if (object == VM_OBJECT_NULL) {
17170 assert(!src_entry->needs_copy);
17171 if (src_entry->max_protection == VM_PROT_NONE) {
17172 assert(src_entry->protection == VM_PROT_NONE);
17173 /*
17174 * No VM object and no permissions:
17175 * this must be a reserved range with
17176 * nothing to share or copy.
17177 * There could also be all sorts of
17178 * pmap shenanigans within that reserved
17179 * range, so let's just copy the map
17180 * entry as is to remap a similar
17181 * reserved range.
17182 */
17183 offset = 0; /* no object => no offset */
17184 goto copy_src_entry;
17185 }
17186 object = vm_object_allocate(entry_size);
17187 VME_OFFSET_SET(src_entry, 0);
17188 VME_OBJECT_SET(src_entry, object, false, 0);
17189 assert(src_entry->use_pmap);
17190 assert(!map->mapped_in_other_pmaps);
17191 } else if (src_entry->wired_count ||
17192 object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
17193 /*
17194 * A wired memory region should not have
17195 * any pending copy-on-write and needs to
17196 * keep pointing at the VM object that
17197 * contains the wired pages.
17198 * If we're sharing this memory (copy=false),
17199 * we'll share this VM object.
17200 * If we're copying this memory (copy=true),
17201 * we'll call vm_object_copy_slowly() below
17202 * and use the new VM object for the remapping.
17203 *
17204 * Or, we are already using an asymmetric
17205 * copy, and therefore we already have
17206 * the right object.
17207 */
17208 assert(!src_entry->needs_copy);
17209 } else if (src_entry->needs_copy || object->shadowed ||
17210 (object->internal && !object->true_share &&
17211 !src_entry->is_shared &&
17212 object->vo_size > entry_size)) {
17213 VME_OBJECT_SHADOW(src_entry, entry_size,
17214 vm_map_always_shadow(map));
17215 assert(src_entry->use_pmap);
17216
17217 if (!src_entry->needs_copy &&
17218 (src_entry->protection & VM_PROT_WRITE)) {
17219 vm_prot_t prot;
17220
17221 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
17222
17223 prot = src_entry->protection & ~VM_PROT_WRITE;
17224
17225 if (override_nx(map,
17226 VME_ALIAS(src_entry))
17227 && prot) {
17228 prot |= VM_PROT_EXECUTE;
17229 }
17230
17231 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
17232
17233 if (map->mapped_in_other_pmaps) {
17234 vm_object_pmap_protect(
17235 VME_OBJECT(src_entry),
17236 VME_OFFSET(src_entry),
17237 entry_size,
17238 PMAP_NULL,
17239 PAGE_SIZE,
17240 src_entry->vme_start,
17241 prot);
17242 #if MACH_ASSERT
17243 } else if (__improbable(map->pmap == PMAP_NULL)) {
17244 extern boolean_t vm_tests_in_progress;
17245 assert(vm_tests_in_progress);
17246 /*
17247 * Some VM tests (in vm_tests.c)
17248 * sometimes want to use a VM
17249 * map without a pmap.
17250 * Otherwise, this should never
17251 * happen.
17252 */
17253 #endif /* MACH_ASSERT */
17254 } else {
17255 pmap_protect(vm_map_pmap(map),
17256 src_entry->vme_start,
17257 src_entry->vme_end,
17258 prot);
17259 }
17260 }
17261
17262 object = VME_OBJECT(src_entry);
17263 src_entry->needs_copy = FALSE;
17264 }
17265
17266
17267 vm_object_lock(object);
17268 vm_object_reference_locked(object); /* object ref. for new entry */
17269 assert(!src_entry->needs_copy);
17270 if (object->copy_strategy ==
17271 MEMORY_OBJECT_COPY_SYMMETRIC) {
17272 /*
17273 * If we want to share this object (copy==0),
17274 * it needs to be COPY_DELAY.
17275 * If we want to copy this object (copy==1),
17276 * we can't just set "needs_copy" on our side
17277 * and expect the other side to do the same
17278 * (symmetrically), so we can't let the object
17279 * stay COPY_SYMMETRIC.
17280 * So we always switch from COPY_SYMMETRIC to
17281 * COPY_DELAY.
17282 */
17283 object->copy_strategy =
17284 MEMORY_OBJECT_COPY_DELAY;
17285 object->true_share = TRUE;
17286 }
17287 vm_object_unlock(object);
17288 }
17289
17290 offset = (VME_OFFSET(src_entry) +
17291 (src_start - src_entry->vme_start));
17292
17293 copy_src_entry:
17294 new_entry = _vm_map_entry_create(map_header);
17295 vm_map_entry_copy(map, new_entry, src_entry);
17296 if (new_entry->is_sub_map) {
17297 /* clr address space specifics */
17298 new_entry->use_pmap = FALSE;
17299 } else if (copy) {
17300 /*
17301 * We're dealing with a copy-on-write operation,
17302 * so the resulting mapping should not inherit the
17303 * original mapping's accounting settings.
17304 * "use_pmap" should be reset to its default (TRUE)
17305 * so that the new mapping gets accounted for in
17306 * the task's memory footprint.
17307 */
17308 new_entry->use_pmap = TRUE;
17309 }
17310 /* "iokit_acct" was cleared in vm_map_entry_copy() */
17311 assert(!new_entry->iokit_acct);
17312
17313 new_entry->map_aligned = FALSE;
17314
17315 new_entry->vme_start = map_address;
17316 new_entry->vme_end = map_address + tmp_size;
17317 assert(new_entry->vme_start < new_entry->vme_end);
17318 if (copy && vmk_flags.vmkf_remap_prot_copy) {
17319 /* security: keep "permanent" and "pmap_cs_associated" */
17320 new_entry->vme_permanent = src_entry->vme_permanent;
17321 new_entry->pmap_cs_associated = src_entry->pmap_cs_associated;
17322 /*
17323 * Remapping for vm_map_protect(VM_PROT_COPY)
17324 * to convert a read-only mapping into a
17325 * copy-on-write version of itself but
17326 * with write access:
17327 * keep the original inheritance but let's not
17328 * add VM_PROT_WRITE to the max protection yet
17329 * since we want to do more security checks against
17330 * the target map.
17331 */
17332 new_entry->inheritance = src_entry->inheritance;
17333 new_entry->protection &= max_prot_for_prot_copy;
17334 } else {
17335 new_entry->inheritance = inheritance;
17336 if (!vm_remap_legacy) {
17337 new_entry->protection = *cur_protection;
17338 new_entry->max_protection = *max_protection;
17339 }
17340 }
17341 VME_OFFSET_SET(new_entry, offset);
17342
17343 /*
17344 * The new region has to be copied now if required.
17345 */
17346 RestartCopy:
17347 if (!copy) {
17348 if (src_entry->used_for_jit == TRUE) {
17349 if (same_map) {
17350 } else if (!VM_MAP_POLICY_ALLOW_JIT_SHARING(map)) {
17351 /*
17352 * Cannot allow an entry describing a JIT
17353 * region to be shared across address spaces.
17354 */
17355 result = KERN_INVALID_ARGUMENT;
17356 vm_object_deallocate(object);
17357 vm_map_entry_dispose(new_entry);
17358 new_entry = VM_MAP_ENTRY_NULL;
17359 break;
17360 }
17361 }
17362
17363 src_entry->is_shared = TRUE;
17364 new_entry->is_shared = TRUE;
17365 if (!(new_entry->is_sub_map)) {
17366 new_entry->needs_copy = FALSE;
17367 }
17368 } else if (src_entry->is_sub_map) {
17369 /* make this a COW sub_map if not already */
17370 assert(new_entry->wired_count == 0);
17371 new_entry->needs_copy = TRUE;
17372 object = VM_OBJECT_NULL;
17373 } else if (src_entry->wired_count == 0 &&
17374 !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) &&
17375 vm_object_copy_quickly(VME_OBJECT(new_entry),
17376 VME_OFFSET(new_entry),
17377 (new_entry->vme_end -
17378 new_entry->vme_start),
17379 &src_needs_copy,
17380 &new_entry_needs_copy)) {
17381 new_entry->needs_copy = new_entry_needs_copy;
17382 new_entry->is_shared = FALSE;
17383 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
17384
17385 /*
17386 * Handle copy_on_write semantics.
17387 */
17388 if (src_needs_copy && !src_entry->needs_copy) {
17389 vm_prot_t prot;
17390
17391 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
17392
17393 prot = src_entry->protection & ~VM_PROT_WRITE;
17394
17395 if (override_nx(map,
17396 VME_ALIAS(src_entry))
17397 && prot) {
17398 prot |= VM_PROT_EXECUTE;
17399 }
17400
17401 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
17402
17403 vm_object_pmap_protect(object,
17404 offset,
17405 entry_size,
17406 ((src_entry->is_shared
17407 || map->mapped_in_other_pmaps) ?
17408 PMAP_NULL : map->pmap),
17409 VM_MAP_PAGE_SIZE(map),
17410 src_entry->vme_start,
17411 prot);
17412
17413 assert(src_entry->wired_count == 0);
17414 src_entry->needs_copy = TRUE;
17415 }
17416 /*
17417 * Throw away the old object reference of the new entry.
17418 */
17419 vm_object_deallocate(object);
17420 } else {
17421 new_entry->is_shared = FALSE;
17422 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
17423
17424 src_entry_was_wired = (src_entry->wired_count > 0);
17425 saved_src_entry = src_entry;
17426 src_entry = VM_MAP_ENTRY_NULL;
17427
17428 /*
17429 * The map can be safely unlocked since we
17430 * already hold a reference on the object.
17431 *
17432 * Record the timestamp of the map for later
17433 * verification, and unlock the map.
17434 */
17435 version.main_timestamp = map->timestamp;
17436 vm_map_unlock(map); /* Increments timestamp once! */
17437
17438 /*
17439 * Perform the copy.
17440 */
17441 if (src_entry_was_wired > 0 ||
17442 (debug4k_no_cow_copyin &&
17443 VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT)) {
17444 vm_object_lock(object);
17445 result = vm_object_copy_slowly(
17446 object,
17447 offset,
17448 (new_entry->vme_end -
17449 new_entry->vme_start),
17450 THREAD_UNINT,
17451 &new_copy_object);
17452 /* VME_OBJECT_SET will reset used_for_jit, so preserve it. */
17453 saved_used_for_jit = new_entry->used_for_jit;
17454 VME_OBJECT_SET(new_entry, new_copy_object, false, 0);
17455 new_entry->used_for_jit = saved_used_for_jit;
17456 VME_OFFSET_SET(new_entry, offset - vm_object_trunc_page(offset));
17457 new_entry->needs_copy = FALSE;
17458 } else {
17459 vm_object_offset_t new_offset;
17460
17461 new_offset = VME_OFFSET(new_entry);
17462 result = vm_object_copy_strategically(
17463 object,
17464 offset,
17465 (new_entry->vme_end -
17466 new_entry->vme_start),
17467 &new_copy_object,
17468 &new_offset,
17469 &new_entry_needs_copy);
17470 /* VME_OBJECT_SET will reset used_for_jit, so preserve it. */
17471 saved_used_for_jit = new_entry->used_for_jit;
17472 VME_OBJECT_SET(new_entry, new_copy_object, false, 0);
17473 new_entry->used_for_jit = saved_used_for_jit;
17474 if (new_offset != VME_OFFSET(new_entry)) {
17475 VME_OFFSET_SET(new_entry, new_offset);
17476 }
17477
17478 new_entry->needs_copy = new_entry_needs_copy;
17479 }
17480
17481 /*
17482 * Throw away the old object reference of the new entry.
17483 */
17484 vm_object_deallocate(object);
17485
17486 if (result != KERN_SUCCESS &&
17487 result != KERN_MEMORY_RESTART_COPY) {
17488 vm_map_entry_dispose(new_entry);
17489 vm_map_lock(map);
17490 break;
17491 }
17492
17493 /*
17494 * Verify that the map has not substantially
17495 * changed while the copy was being made.
17496 */
17497
17498 vm_map_lock(map);
17499 if (version.main_timestamp + 1 != map->timestamp) {
17500 /*
17501 * Simple version comparison failed.
17502 *
17503 * Retry the lookup and verify that the
17504 * same object/offset are still present.
17505 */
17506 saved_src_entry = VM_MAP_ENTRY_NULL;
17507 vm_object_deallocate(VME_OBJECT(new_entry));
17508 vm_map_entry_dispose(new_entry);
17509 if (result == KERN_MEMORY_RESTART_COPY) {
17510 result = KERN_SUCCESS;
17511 }
17512 continue;
17513 }
17514 /* map hasn't changed: src_entry is still valid */
17515 src_entry = saved_src_entry;
17516 saved_src_entry = VM_MAP_ENTRY_NULL;
17517
17518 if (result == KERN_MEMORY_RESTART_COPY) {
17519 vm_object_reference(object);
17520 goto RestartCopy;
17521 }
17522 }
17523
17524 _vm_map_store_entry_link(map_header,
17525 map_header->links.prev, new_entry);
17526
17527 /* protections for submap mapping are irrelevant here */
17528 if (vm_remap_legacy && !src_entry->is_sub_map) {
17529 *cur_protection &= src_entry->protection;
17530 *max_protection &= src_entry->max_protection;
17531 }
17532
17533 map_address += tmp_size;
17534 mapped_size += tmp_size;
17535 src_start += tmp_size;
17536
17537 if (vmk_flags.vmkf_copy_single_object) {
17538 if (mapped_size != size) {
17539 DEBUG4K_SHARE("map %p addr 0x%llx size 0x%llx clipped copy at mapped_size 0x%llx\n",
17540 map, (uint64_t)addr, (uint64_t)size, (uint64_t)mapped_size);
17541 if (src_entry->vme_next != vm_map_to_entry(map) &&
17542 src_entry->vme_next->vme_object_value ==
17543 src_entry->vme_object_value) {
17544 /* XXX TODO4K */
17545 DEBUG4K_ERROR("could have extended copy to next entry...\n");
17546 }
17547 }
17548 break;
17549 }
17550 } /* end while */
17551
17552 vm_map_unlock(map);
17553 if (result != KERN_SUCCESS) {
17554 /*
17555 * Free all allocated elements.
17556 */
17557 for (src_entry = map_header->links.next;
17558 src_entry != CAST_TO_VM_MAP_ENTRY(&map_header->links);
17559 src_entry = new_entry) {
17560 new_entry = src_entry->vme_next;
17561 _vm_map_store_entry_unlink(map_header, src_entry, false);
17562 if (src_entry->is_sub_map) {
17563 vm_map_deallocate(VME_SUBMAP(src_entry));
17564 } else {
17565 vm_object_deallocate(VME_OBJECT(src_entry));
17566 }
17567 vm_map_entry_dispose(src_entry);
17568 }
17569 }
17570 return result;
17571 }
17572
17573 bool
vm_map_is_exotic(vm_map_t map)17574 vm_map_is_exotic(
17575 vm_map_t map)
17576 {
17577 return VM_MAP_IS_EXOTIC(map);
17578 }
17579
17580 bool
vm_map_is_alien(vm_map_t map)17581 vm_map_is_alien(
17582 vm_map_t map)
17583 {
17584 return VM_MAP_IS_ALIEN(map);
17585 }
17586
17587 #if XNU_TARGET_OS_OSX
17588 void
vm_map_mark_alien(vm_map_t map)17589 vm_map_mark_alien(
17590 vm_map_t map)
17591 {
17592 vm_map_lock(map);
17593 map->is_alien = true;
17594 vm_map_unlock(map);
17595 }
17596
17597 void
vm_map_single_jit(vm_map_t map)17598 vm_map_single_jit(
17599 vm_map_t map)
17600 {
17601 vm_map_lock(map);
17602 map->single_jit = true;
17603 vm_map_unlock(map);
17604 }
17605 #endif /* XNU_TARGET_OS_OSX */
17606
17607 /*
17608 * Callers of this function must call vm_map_copy_require on
17609 * previously created vm_map_copy_t or pass a newly created
17610 * one to ensure that it hasn't been forged.
17611 */
17612 static kern_return_t
vm_map_copy_to_physcopy(vm_map_copy_t copy_map,vm_map_t target_map)17613 vm_map_copy_to_physcopy(
17614 vm_map_copy_t copy_map,
17615 vm_map_t target_map)
17616 {
17617 vm_map_size_t size;
17618 vm_map_entry_t entry;
17619 vm_map_entry_t new_entry;
17620 vm_object_t new_object;
17621 unsigned int pmap_flags;
17622 pmap_t new_pmap;
17623 vm_map_t new_map;
17624 vm_map_address_t src_start, src_end, src_cur;
17625 vm_map_address_t dst_start, dst_end, dst_cur;
17626 kern_return_t kr;
17627 void *kbuf;
17628
17629 /*
17630 * Perform the equivalent of vm_allocate() and memcpy().
17631 * Replace the mappings in "copy_map" with the newly allocated mapping.
17632 */
17633 DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) BEFORE\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
17634
17635 assert(copy_map->cpy_hdr.page_shift != VM_MAP_PAGE_MASK(target_map));
17636
17637 /* create a new pmap to map "copy_map" */
17638 pmap_flags = 0;
17639 assert(copy_map->cpy_hdr.page_shift == FOURK_PAGE_SHIFT);
17640 #if PMAP_CREATE_FORCE_4K_PAGES
17641 pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
17642 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
17643 pmap_flags |= PMAP_CREATE_64BIT;
17644 new_pmap = pmap_create_options(NULL, (vm_map_size_t)0, pmap_flags);
17645 if (new_pmap == NULL) {
17646 return KERN_RESOURCE_SHORTAGE;
17647 }
17648
17649 /* allocate new VM object */
17650 size = VM_MAP_ROUND_PAGE(copy_map->size, PAGE_MASK);
17651 new_object = vm_object_allocate(size);
17652 assert(new_object);
17653
17654 /* allocate new VM map entry */
17655 new_entry = vm_map_copy_entry_create(copy_map);
17656 assert(new_entry);
17657
17658 /* finish initializing new VM map entry */
17659 new_entry->protection = VM_PROT_DEFAULT;
17660 new_entry->max_protection = VM_PROT_DEFAULT;
17661 new_entry->use_pmap = TRUE;
17662
17663 /* make new VM map entry point to new VM object */
17664 new_entry->vme_start = 0;
17665 new_entry->vme_end = size;
17666 VME_OBJECT_SET(new_entry, new_object, false, 0);
17667 VME_OFFSET_SET(new_entry, 0);
17668
17669 /* create a new pageable VM map to map "copy_map" */
17670 new_map = vm_map_create_options(new_pmap, 0, MACH_VM_MAX_ADDRESS,
17671 VM_MAP_CREATE_PAGEABLE);
17672 assert(new_map);
17673 vm_map_set_page_shift(new_map, copy_map->cpy_hdr.page_shift);
17674
17675 /* map "copy_map" in the new VM map */
17676 src_start = 0;
17677 kr = vm_map_copyout_internal(
17678 new_map,
17679 &src_start,
17680 copy_map,
17681 copy_map->size,
17682 FALSE, /* consume_on_success */
17683 VM_PROT_DEFAULT,
17684 VM_PROT_DEFAULT,
17685 VM_INHERIT_DEFAULT);
17686 assert(kr == KERN_SUCCESS);
17687 src_end = src_start + copy_map->size;
17688
17689 /* map "new_object" in the new VM map */
17690 vm_object_reference(new_object);
17691 dst_start = 0;
17692 kr = vm_map_enter(new_map,
17693 &dst_start,
17694 size,
17695 0, /* mask */
17696 VM_FLAGS_ANYWHERE,
17697 VM_MAP_KERNEL_FLAGS_NONE,
17698 VM_KERN_MEMORY_OSFMK,
17699 new_object,
17700 0, /* offset */
17701 FALSE, /* needs copy */
17702 VM_PROT_DEFAULT,
17703 VM_PROT_DEFAULT,
17704 VM_INHERIT_DEFAULT);
17705 assert(kr == KERN_SUCCESS);
17706 dst_end = dst_start + size;
17707
17708 /* get a kernel buffer */
17709 kbuf = kalloc_data(PAGE_SIZE, Z_WAITOK | Z_NOFAIL);
17710
17711 /* physically copy "copy_map" mappings to new VM object */
17712 for (src_cur = src_start, dst_cur = dst_start;
17713 src_cur < src_end;
17714 src_cur += PAGE_SIZE, dst_cur += PAGE_SIZE) {
17715 vm_size_t bytes;
17716
17717 bytes = PAGE_SIZE;
17718 if (src_cur + PAGE_SIZE > src_end) {
17719 /* partial copy for last page */
17720 bytes = src_end - src_cur;
17721 assert(bytes > 0 && bytes < PAGE_SIZE);
17722 /* rest of dst page should be zero-filled */
17723 }
17724 /* get bytes from src mapping */
17725 kr = copyinmap(new_map, src_cur, kbuf, bytes);
17726 if (kr != KERN_SUCCESS) {
17727 DEBUG4K_COPY("copyinmap(%p, 0x%llx, %p, 0x%llx) kr 0x%x\n", new_map, (uint64_t)src_cur, kbuf, (uint64_t)bytes, kr);
17728 }
17729 /* put bytes in dst mapping */
17730 assert(dst_cur < dst_end);
17731 assert(dst_cur + bytes <= dst_end);
17732 kr = copyoutmap(new_map, kbuf, dst_cur, bytes);
17733 if (kr != KERN_SUCCESS) {
17734 DEBUG4K_COPY("copyoutmap(%p, %p, 0x%llx, 0x%llx) kr 0x%x\n", new_map, kbuf, (uint64_t)dst_cur, (uint64_t)bytes, kr);
17735 }
17736 }
17737
17738 /* free kernel buffer */
17739 kfree_data(kbuf, PAGE_SIZE);
17740
17741 /* destroy new map */
17742 vm_map_destroy(new_map);
17743 new_map = VM_MAP_NULL;
17744
17745 /* dispose of the old map entries in "copy_map" */
17746 while (vm_map_copy_first_entry(copy_map) !=
17747 vm_map_copy_to_entry(copy_map)) {
17748 entry = vm_map_copy_first_entry(copy_map);
17749 vm_map_copy_entry_unlink(copy_map, entry);
17750 if (entry->is_sub_map) {
17751 vm_map_deallocate(VME_SUBMAP(entry));
17752 } else {
17753 vm_object_deallocate(VME_OBJECT(entry));
17754 }
17755 vm_map_copy_entry_dispose(entry);
17756 }
17757
17758 /* change "copy_map"'s page_size to match "target_map" */
17759 copy_map->cpy_hdr.page_shift = (uint16_t)VM_MAP_PAGE_SHIFT(target_map);
17760 copy_map->offset = 0;
17761 copy_map->size = size;
17762
17763 /* insert new map entry in "copy_map" */
17764 assert(vm_map_copy_last_entry(copy_map) == vm_map_copy_to_entry(copy_map));
17765 vm_map_copy_entry_link(copy_map, vm_map_copy_last_entry(copy_map), new_entry);
17766
17767 DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) AFTER\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
17768 return KERN_SUCCESS;
17769 }
17770
17771 void
17772 vm_map_copy_adjust_get_target_copy_map(
17773 vm_map_copy_t copy_map,
17774 vm_map_copy_t *target_copy_map_p);
17775 void
vm_map_copy_adjust_get_target_copy_map(vm_map_copy_t copy_map,vm_map_copy_t * target_copy_map_p)17776 vm_map_copy_adjust_get_target_copy_map(
17777 vm_map_copy_t copy_map,
17778 vm_map_copy_t *target_copy_map_p)
17779 {
17780 vm_map_copy_t target_copy_map;
17781 vm_map_entry_t entry, target_entry;
17782
17783 if (*target_copy_map_p != VM_MAP_COPY_NULL) {
17784 /* the caller already has a "target_copy_map": use it */
17785 return;
17786 }
17787
17788 /* the caller wants us to create a new copy of "copy_map" */
17789 target_copy_map = vm_map_copy_allocate();
17790 target_copy_map->type = copy_map->type;
17791 assert(target_copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17792 target_copy_map->offset = copy_map->offset;
17793 target_copy_map->size = copy_map->size;
17794 target_copy_map->cpy_hdr.page_shift = copy_map->cpy_hdr.page_shift;
17795 vm_map_store_init(&target_copy_map->cpy_hdr);
17796 for (entry = vm_map_copy_first_entry(copy_map);
17797 entry != vm_map_copy_to_entry(copy_map);
17798 entry = entry->vme_next) {
17799 target_entry = vm_map_copy_entry_create(target_copy_map);
17800 vm_map_entry_copy_full(target_entry, entry);
17801 if (target_entry->is_sub_map) {
17802 vm_map_reference(VME_SUBMAP(target_entry));
17803 } else {
17804 vm_object_reference(VME_OBJECT(target_entry));
17805 }
17806 vm_map_copy_entry_link(
17807 target_copy_map,
17808 vm_map_copy_last_entry(target_copy_map),
17809 target_entry);
17810 }
17811 entry = VM_MAP_ENTRY_NULL;
17812 *target_copy_map_p = target_copy_map;
17813 }
17814
17815 /*
17816 * Callers of this function must call vm_map_copy_require on
17817 * previously created vm_map_copy_t or pass a newly created
17818 * one to ensure that it hasn't been forged.
17819 */
17820 static void
vm_map_copy_trim(vm_map_copy_t copy_map,uint16_t new_page_shift,vm_map_offset_t trim_start,vm_map_offset_t trim_end)17821 vm_map_copy_trim(
17822 vm_map_copy_t copy_map,
17823 uint16_t new_page_shift,
17824 vm_map_offset_t trim_start,
17825 vm_map_offset_t trim_end)
17826 {
17827 uint16_t copy_page_shift;
17828 vm_map_entry_t entry, next_entry;
17829
17830 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17831 assert(copy_map->cpy_hdr.nentries > 0);
17832
17833 trim_start += vm_map_copy_first_entry(copy_map)->vme_start;
17834 trim_end += vm_map_copy_first_entry(copy_map)->vme_start;
17835
17836 /* use the new page_shift to do the clipping */
17837 copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
17838 copy_map->cpy_hdr.page_shift = new_page_shift;
17839
17840 for (entry = vm_map_copy_first_entry(copy_map);
17841 entry != vm_map_copy_to_entry(copy_map);
17842 entry = next_entry) {
17843 next_entry = entry->vme_next;
17844 if (entry->vme_end <= trim_start) {
17845 /* entry fully before trim range: skip */
17846 continue;
17847 }
17848 if (entry->vme_start >= trim_end) {
17849 /* entry fully after trim range: done */
17850 break;
17851 }
17852 /* clip entry if needed */
17853 vm_map_copy_clip_start(copy_map, entry, trim_start);
17854 vm_map_copy_clip_end(copy_map, entry, trim_end);
17855 /* dispose of entry */
17856 copy_map->size -= entry->vme_end - entry->vme_start;
17857 vm_map_copy_entry_unlink(copy_map, entry);
17858 if (entry->is_sub_map) {
17859 vm_map_deallocate(VME_SUBMAP(entry));
17860 } else {
17861 vm_object_deallocate(VME_OBJECT(entry));
17862 }
17863 vm_map_copy_entry_dispose(entry);
17864 entry = VM_MAP_ENTRY_NULL;
17865 }
17866
17867 /* restore copy_map's original page_shift */
17868 copy_map->cpy_hdr.page_shift = copy_page_shift;
17869 }
17870
17871 /*
17872 * Make any necessary adjustments to "copy_map" to allow it to be
17873 * mapped into "target_map".
17874 * If no changes were necessary, "target_copy_map" points to the
17875 * untouched "copy_map".
17876 * If changes are necessary, changes will be made to "target_copy_map".
17877 * If "target_copy_map" was NULL, we create a new "vm_map_copy_t" and
17878 * copy the original "copy_map" to it before applying the changes.
17879 * The caller should discard "target_copy_map" if it's not the same as
17880 * the original "copy_map".
17881 */
17882 /* TODO4K: also adjust to sub-range in the copy_map -> add start&end? */
17883 kern_return_t
vm_map_copy_adjust_to_target(vm_map_copy_t src_copy_map,vm_map_offset_t offset,vm_map_size_t size,vm_map_t target_map,boolean_t copy,vm_map_copy_t * target_copy_map_p,vm_map_offset_t * overmap_start_p,vm_map_offset_t * overmap_end_p,vm_map_offset_t * trimmed_start_p)17884 vm_map_copy_adjust_to_target(
17885 vm_map_copy_t src_copy_map,
17886 vm_map_offset_t offset,
17887 vm_map_size_t size,
17888 vm_map_t target_map,
17889 boolean_t copy,
17890 vm_map_copy_t *target_copy_map_p,
17891 vm_map_offset_t *overmap_start_p,
17892 vm_map_offset_t *overmap_end_p,
17893 vm_map_offset_t *trimmed_start_p)
17894 {
17895 vm_map_copy_t copy_map, target_copy_map;
17896 vm_map_size_t target_size;
17897 vm_map_size_t src_copy_map_size;
17898 vm_map_size_t overmap_start, overmap_end;
17899 int misalignments;
17900 vm_map_entry_t entry, target_entry;
17901 vm_map_offset_t addr_adjustment;
17902 vm_map_offset_t new_start, new_end;
17903 int copy_page_mask, target_page_mask;
17904 uint16_t copy_page_shift, target_page_shift;
17905 vm_map_offset_t trimmed_end;
17906
17907 /*
17908 * Assert that the vm_map_copy is coming from the right
17909 * zone and hasn't been forged
17910 */
17911 vm_map_copy_require(src_copy_map);
17912 assert(src_copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17913
17914 /*
17915 * Start working with "src_copy_map" but we'll switch
17916 * to "target_copy_map" as soon as we start making adjustments.
17917 */
17918 copy_map = src_copy_map;
17919 src_copy_map_size = src_copy_map->size;
17920
17921 copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
17922 copy_page_mask = VM_MAP_COPY_PAGE_MASK(copy_map);
17923 target_page_shift = (uint16_t)VM_MAP_PAGE_SHIFT(target_map);
17924 target_page_mask = VM_MAP_PAGE_MASK(target_map);
17925
17926 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p...\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, *target_copy_map_p);
17927
17928 target_copy_map = *target_copy_map_p;
17929 if (target_copy_map != VM_MAP_COPY_NULL) {
17930 vm_map_copy_require(target_copy_map);
17931 }
17932
17933 if (offset + size > copy_map->size) {
17934 DEBUG4K_ERROR("copy_map %p (%d->%d) copy_map->size 0x%llx offset 0x%llx size 0x%llx KERN_INVALID_ARGUMENT\n", copy_map, copy_page_shift, target_page_shift, (uint64_t)copy_map->size, (uint64_t)offset, (uint64_t)size);
17935 return KERN_INVALID_ARGUMENT;
17936 }
17937
17938 /* trim the end */
17939 trimmed_end = 0;
17940 new_end = VM_MAP_ROUND_PAGE(offset + size, target_page_mask);
17941 if (new_end < copy_map->size) {
17942 trimmed_end = src_copy_map_size - new_end;
17943 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim end from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)new_end, (uint64_t)copy_map->size);
17944 /* get "target_copy_map" if needed and adjust it */
17945 vm_map_copy_adjust_get_target_copy_map(copy_map,
17946 &target_copy_map);
17947 copy_map = target_copy_map;
17948 vm_map_copy_trim(target_copy_map, target_page_shift,
17949 new_end, copy_map->size);
17950 }
17951
17952 /* trim the start */
17953 new_start = VM_MAP_TRUNC_PAGE(offset, target_page_mask);
17954 if (new_start != 0) {
17955 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim start from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)0, (uint64_t)new_start);
17956 /* get "target_copy_map" if needed and adjust it */
17957 vm_map_copy_adjust_get_target_copy_map(copy_map,
17958 &target_copy_map);
17959 copy_map = target_copy_map;
17960 vm_map_copy_trim(target_copy_map, target_page_shift,
17961 0, new_start);
17962 }
17963 *trimmed_start_p = new_start;
17964
17965 /* target_size starts with what's left after trimming */
17966 target_size = copy_map->size;
17967 assertf(target_size == src_copy_map_size - *trimmed_start_p - trimmed_end,
17968 "target_size 0x%llx src_copy_map_size 0x%llx trimmed_start 0x%llx trimmed_end 0x%llx\n",
17969 (uint64_t)target_size, (uint64_t)src_copy_map_size,
17970 (uint64_t)*trimmed_start_p, (uint64_t)trimmed_end);
17971
17972 /* check for misalignments but don't adjust yet */
17973 misalignments = 0;
17974 overmap_start = 0;
17975 overmap_end = 0;
17976 if (copy_page_shift < target_page_shift) {
17977 /*
17978 * Remapping from 4K to 16K: check the VM object alignments
17979 * throughout the range.
17980 * If the start and end of the range are mis-aligned, we can
17981 * over-map to re-align, and adjust the "overmap" start/end
17982 * and "target_size" of the range accordingly.
17983 * If there is any mis-alignment within the range:
17984 * if "copy":
17985 * we can do immediate-copy instead of copy-on-write,
17986 * else:
17987 * no way to remap and share; fail.
17988 */
17989 for (entry = vm_map_copy_first_entry(copy_map);
17990 entry != vm_map_copy_to_entry(copy_map);
17991 entry = entry->vme_next) {
17992 vm_object_offset_t object_offset_start, object_offset_end;
17993
17994 object_offset_start = VME_OFFSET(entry);
17995 object_offset_end = object_offset_start;
17996 object_offset_end += entry->vme_end - entry->vme_start;
17997 if (object_offset_start & target_page_mask) {
17998 if (entry == vm_map_copy_first_entry(copy_map) && !copy) {
17999 overmap_start++;
18000 } else {
18001 misalignments++;
18002 }
18003 }
18004 if (object_offset_end & target_page_mask) {
18005 if (entry->vme_next == vm_map_copy_to_entry(copy_map) && !copy) {
18006 overmap_end++;
18007 } else {
18008 misalignments++;
18009 }
18010 }
18011 }
18012 }
18013 entry = VM_MAP_ENTRY_NULL;
18014
18015 /* decide how to deal with misalignments */
18016 assert(overmap_start <= 1);
18017 assert(overmap_end <= 1);
18018 if (!overmap_start && !overmap_end && !misalignments) {
18019 /* copy_map is properly aligned for target_map ... */
18020 if (*trimmed_start_p) {
18021 /* ... but we trimmed it, so still need to adjust */
18022 } else {
18023 /* ... and we didn't trim anything: we're done */
18024 if (target_copy_map == VM_MAP_COPY_NULL) {
18025 target_copy_map = copy_map;
18026 }
18027 *target_copy_map_p = target_copy_map;
18028 *overmap_start_p = 0;
18029 *overmap_end_p = 0;
18030 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
18031 return KERN_SUCCESS;
18032 }
18033 } else if (misalignments && !copy) {
18034 /* can't "share" if misaligned */
18035 DEBUG4K_ADJUST("unsupported sharing\n");
18036 #if MACH_ASSERT
18037 if (debug4k_panic_on_misaligned_sharing) {
18038 panic("DEBUG4k %s:%d unsupported sharing", __FUNCTION__, __LINE__);
18039 }
18040 #endif /* MACH_ASSERT */
18041 DEBUG4K_ADJUST("copy_map %p (%d) target_map %p (%d) copy %d target_copy_map %p -> KERN_NOT_SUPPORTED\n", copy_map, copy_page_shift, target_map, target_page_shift, copy, *target_copy_map_p);
18042 return KERN_NOT_SUPPORTED;
18043 } else {
18044 /* can't virtual-copy if misaligned (but can physical-copy) */
18045 DEBUG4K_ADJUST("mis-aligned copying\n");
18046 }
18047
18048 /* get a "target_copy_map" if needed and switch to it */
18049 vm_map_copy_adjust_get_target_copy_map(copy_map, &target_copy_map);
18050 copy_map = target_copy_map;
18051
18052 if (misalignments && copy) {
18053 vm_map_size_t target_copy_map_size;
18054
18055 /*
18056 * Can't do copy-on-write with misaligned mappings.
18057 * Replace the mappings with a physical copy of the original
18058 * mappings' contents.
18059 */
18060 target_copy_map_size = target_copy_map->size;
18061 kern_return_t kr = vm_map_copy_to_physcopy(target_copy_map, target_map);
18062 if (kr != KERN_SUCCESS) {
18063 return kr;
18064 }
18065 *target_copy_map_p = target_copy_map;
18066 *overmap_start_p = 0;
18067 *overmap_end_p = target_copy_map->size - target_copy_map_size;
18068 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx)-> trimmed 0x%llx overmap start 0x%llx end 0x%llx PHYSCOPY\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
18069 return KERN_SUCCESS;
18070 }
18071
18072 /* apply the adjustments */
18073 misalignments = 0;
18074 overmap_start = 0;
18075 overmap_end = 0;
18076 /* remove copy_map->offset, so that everything starts at offset 0 */
18077 addr_adjustment = copy_map->offset;
18078 /* also remove whatever we trimmed from the start */
18079 addr_adjustment += *trimmed_start_p;
18080 for (target_entry = vm_map_copy_first_entry(target_copy_map);
18081 target_entry != vm_map_copy_to_entry(target_copy_map);
18082 target_entry = target_entry->vme_next) {
18083 vm_object_offset_t object_offset_start, object_offset_end;
18084
18085 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx BEFORE\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
18086 object_offset_start = VME_OFFSET(target_entry);
18087 if (object_offset_start & target_page_mask) {
18088 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at start\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
18089 if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
18090 /*
18091 * start of 1st entry is mis-aligned:
18092 * re-adjust by over-mapping.
18093 */
18094 overmap_start = object_offset_start - trunc_page_mask_64(object_offset_start, target_page_mask);
18095 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_start 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_start);
18096 VME_OFFSET_SET(target_entry, VME_OFFSET(target_entry) - overmap_start);
18097 } else {
18098 misalignments++;
18099 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
18100 assert(copy);
18101 }
18102 }
18103
18104 if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
18105 target_size += overmap_start;
18106 } else {
18107 target_entry->vme_start += overmap_start;
18108 }
18109 target_entry->vme_end += overmap_start;
18110
18111 object_offset_end = VME_OFFSET(target_entry) + target_entry->vme_end - target_entry->vme_start;
18112 if (object_offset_end & target_page_mask) {
18113 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at end\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
18114 if (target_entry->vme_next == vm_map_copy_to_entry(target_copy_map)) {
18115 /*
18116 * end of last entry is mis-aligned: re-adjust by over-mapping.
18117 */
18118 overmap_end = round_page_mask_64(object_offset_end, target_page_mask) - object_offset_end;
18119 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_end 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_end);
18120 target_entry->vme_end += overmap_end;
18121 target_size += overmap_end;
18122 } else {
18123 misalignments++;
18124 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
18125 assert(copy);
18126 }
18127 }
18128 target_entry->vme_start -= addr_adjustment;
18129 target_entry->vme_end -= addr_adjustment;
18130 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx AFTER\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
18131 }
18132
18133 target_copy_map->size = target_size;
18134 target_copy_map->offset += overmap_start;
18135 target_copy_map->offset -= addr_adjustment;
18136 target_copy_map->cpy_hdr.page_shift = target_page_shift;
18137
18138 // assert(VM_MAP_PAGE_ALIGNED(target_copy_map->size, target_page_mask));
18139 // assert(VM_MAP_PAGE_ALIGNED(target_copy_map->offset, FOURK_PAGE_MASK));
18140 assert(overmap_start < VM_MAP_PAGE_SIZE(target_map));
18141 assert(overmap_end < VM_MAP_PAGE_SIZE(target_map));
18142
18143 *target_copy_map_p = target_copy_map;
18144 *overmap_start_p = overmap_start;
18145 *overmap_end_p = overmap_end;
18146
18147 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
18148 return KERN_SUCCESS;
18149 }
18150
18151 kern_return_t
vm_map_range_physical_size(vm_map_t map,vm_map_address_t start,mach_vm_size_t size,mach_vm_size_t * phys_size)18152 vm_map_range_physical_size(
18153 vm_map_t map,
18154 vm_map_address_t start,
18155 mach_vm_size_t size,
18156 mach_vm_size_t * phys_size)
18157 {
18158 kern_return_t kr;
18159 vm_map_copy_t copy_map, target_copy_map;
18160 vm_map_offset_t adjusted_start, adjusted_end;
18161 vm_map_size_t adjusted_size;
18162 vm_prot_t cur_prot, max_prot;
18163 vm_map_offset_t overmap_start, overmap_end, trimmed_start, end;
18164 vm_map_kernel_flags_t vmk_flags;
18165
18166 if (size == 0) {
18167 DEBUG4K_SHARE("map %p start 0x%llx size 0x%llx -> phys_size 0!\n", map, (uint64_t)start, (uint64_t)size);
18168 *phys_size = 0;
18169 return KERN_SUCCESS;
18170 }
18171
18172 adjusted_start = vm_map_trunc_page(start, VM_MAP_PAGE_MASK(map));
18173 adjusted_end = vm_map_round_page(start + size, VM_MAP_PAGE_MASK(map));
18174 if (__improbable(os_add_overflow(start, size, &end) ||
18175 adjusted_end <= adjusted_start)) {
18176 /* wraparound */
18177 printf("%s:%d(start=0x%llx, size=0x%llx) pgmask 0x%x: wraparound\n", __FUNCTION__, __LINE__, (uint64_t)start, (uint64_t)size, VM_MAP_PAGE_MASK(map));
18178 *phys_size = 0;
18179 return KERN_INVALID_ARGUMENT;
18180 }
18181 assert(adjusted_end > adjusted_start);
18182 adjusted_size = adjusted_end - adjusted_start;
18183 *phys_size = adjusted_size;
18184 if (VM_MAP_PAGE_SIZE(map) == PAGE_SIZE) {
18185 return KERN_SUCCESS;
18186 }
18187 if (start == 0) {
18188 adjusted_start = vm_map_trunc_page(start, PAGE_MASK);
18189 adjusted_end = vm_map_round_page(start + size, PAGE_MASK);
18190 if (__improbable(adjusted_end <= adjusted_start)) {
18191 /* wraparound */
18192 printf("%s:%d(start=0x%llx, size=0x%llx) pgmask 0x%x: wraparound\n", __FUNCTION__, __LINE__, (uint64_t)start, (uint64_t)size, PAGE_MASK);
18193 *phys_size = 0;
18194 return KERN_INVALID_ARGUMENT;
18195 }
18196 assert(adjusted_end > adjusted_start);
18197 adjusted_size = adjusted_end - adjusted_start;
18198 *phys_size = adjusted_size;
18199 return KERN_SUCCESS;
18200 }
18201
18202 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
18203 vmk_flags.vmkf_copy_pageable = TRUE;
18204 vmk_flags.vmkf_copy_same_map = TRUE;
18205 assert(adjusted_size != 0);
18206 cur_prot = VM_PROT_NONE; /* legacy mode */
18207 max_prot = VM_PROT_NONE; /* legacy mode */
18208 kr = vm_map_copy_extract(map, adjusted_start, adjusted_size,
18209 FALSE /* copy */,
18210 ©_map,
18211 &cur_prot, &max_prot, VM_INHERIT_DEFAULT,
18212 vmk_flags);
18213 if (kr != KERN_SUCCESS) {
18214 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
18215 //assert(0);
18216 *phys_size = 0;
18217 return kr;
18218 }
18219 assert(copy_map != VM_MAP_COPY_NULL);
18220 target_copy_map = copy_map;
18221 DEBUG4K_ADJUST("adjusting...\n");
18222 kr = vm_map_copy_adjust_to_target(
18223 copy_map,
18224 start - adjusted_start, /* offset */
18225 size, /* size */
18226 kernel_map,
18227 FALSE, /* copy */
18228 &target_copy_map,
18229 &overmap_start,
18230 &overmap_end,
18231 &trimmed_start);
18232 if (kr == KERN_SUCCESS) {
18233 if (target_copy_map->size != *phys_size) {
18234 DEBUG4K_ADJUST("map %p (%d) start 0x%llx size 0x%llx adjusted_start 0x%llx adjusted_end 0x%llx overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx phys_size 0x%llx -> 0x%llx\n", map, VM_MAP_PAGE_SHIFT(map), (uint64_t)start, (uint64_t)size, (uint64_t)adjusted_start, (uint64_t)adjusted_end, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start, (uint64_t)*phys_size, (uint64_t)target_copy_map->size);
18235 }
18236 *phys_size = target_copy_map->size;
18237 } else {
18238 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
18239 //assert(0);
18240 *phys_size = 0;
18241 }
18242 vm_map_copy_discard(copy_map);
18243 copy_map = VM_MAP_COPY_NULL;
18244
18245 return kr;
18246 }
18247
18248
18249 kern_return_t
memory_entry_check_for_adjustment(vm_map_t src_map,ipc_port_t port,vm_map_offset_t * overmap_start,vm_map_offset_t * overmap_end)18250 memory_entry_check_for_adjustment(
18251 vm_map_t src_map,
18252 ipc_port_t port,
18253 vm_map_offset_t *overmap_start,
18254 vm_map_offset_t *overmap_end)
18255 {
18256 kern_return_t kr = KERN_SUCCESS;
18257 vm_map_copy_t copy_map = VM_MAP_COPY_NULL, target_copy_map = VM_MAP_COPY_NULL;
18258
18259 assert(port);
18260 assertf(ip_kotype(port) == IKOT_NAMED_ENTRY, "Port Type expected: %d...received:%d\n", IKOT_NAMED_ENTRY, ip_kotype(port));
18261
18262 vm_named_entry_t named_entry;
18263
18264 named_entry = mach_memory_entry_from_port(port);
18265 named_entry_lock(named_entry);
18266 copy_map = named_entry->backing.copy;
18267 target_copy_map = copy_map;
18268
18269 if (src_map && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT) {
18270 vm_map_offset_t trimmed_start;
18271
18272 trimmed_start = 0;
18273 DEBUG4K_ADJUST("adjusting...\n");
18274 kr = vm_map_copy_adjust_to_target(
18275 copy_map,
18276 0, /* offset */
18277 copy_map->size, /* size */
18278 src_map,
18279 FALSE, /* copy */
18280 &target_copy_map,
18281 overmap_start,
18282 overmap_end,
18283 &trimmed_start);
18284 assert(trimmed_start == 0);
18285 }
18286 named_entry_unlock(named_entry);
18287
18288 return kr;
18289 }
18290
18291
18292 /*
18293 * Routine: vm_remap
18294 *
18295 * Map portion of a task's address space.
18296 * Mapped region must not overlap more than
18297 * one vm memory object. Protections and
18298 * inheritance attributes remain the same
18299 * as in the original task and are out parameters.
18300 * Source and Target task can be identical
18301 * Other attributes are identical as for vm_map()
18302 */
18303 kern_return_t
vm_map_remap(vm_map_t target_map,vm_map_address_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,vm_map_t src_map,vm_map_offset_t memory_address,boolean_t copy,vm_prot_t * cur_protection,vm_prot_t * max_protection,vm_inherit_t inheritance)18304 vm_map_remap(
18305 vm_map_t target_map,
18306 vm_map_address_t *address,
18307 vm_map_size_t size,
18308 vm_map_offset_t mask,
18309 int flags,
18310 vm_map_kernel_flags_t vmk_flags,
18311 vm_tag_t tag,
18312 vm_map_t src_map,
18313 vm_map_offset_t memory_address,
18314 boolean_t copy,
18315 vm_prot_t *cur_protection, /* IN/OUT */
18316 vm_prot_t *max_protection, /* IN/OUT */
18317 vm_inherit_t inheritance)
18318 {
18319 kern_return_t result;
18320 vm_map_entry_t entry;
18321 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
18322 vm_map_entry_t new_entry;
18323 vm_map_copy_t copy_map;
18324 vm_map_offset_t offset_in_mapping;
18325 vm_map_size_t target_size = 0;
18326 vm_map_size_t src_page_mask, target_page_mask;
18327 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
18328 vm_map_offset_t initial_memory_address;
18329 vm_map_size_t initial_size;
18330 VM_MAP_ZAP_DECLARE(zap_list);
18331
18332 if (target_map == VM_MAP_NULL) {
18333 return KERN_INVALID_ARGUMENT;
18334 }
18335
18336 initial_memory_address = memory_address;
18337 initial_size = size;
18338 src_page_mask = VM_MAP_PAGE_MASK(src_map);
18339 target_page_mask = VM_MAP_PAGE_MASK(target_map);
18340
18341 switch (inheritance) {
18342 case VM_INHERIT_NONE:
18343 case VM_INHERIT_COPY:
18344 case VM_INHERIT_SHARE:
18345 if (size != 0 && src_map != VM_MAP_NULL) {
18346 break;
18347 }
18348 OS_FALLTHROUGH;
18349 default:
18350 return KERN_INVALID_ARGUMENT;
18351 }
18352
18353 if (src_page_mask != target_page_mask) {
18354 if (copy) {
18355 DEBUG4K_COPY("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map, VM_MAP_PAGE_SIZE(src_map), (uint64_t)memory_address, (uint64_t)size, copy, target_map, VM_MAP_PAGE_SIZE(target_map));
18356 } else {
18357 DEBUG4K_SHARE("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map, VM_MAP_PAGE_SIZE(src_map), (uint64_t)memory_address, (uint64_t)size, copy, target_map, VM_MAP_PAGE_SIZE(target_map));
18358 }
18359 }
18360
18361 /*
18362 * If the user is requesting that we return the address of the
18363 * first byte of the data (rather than the base of the page),
18364 * then we use different rounding semantics: specifically,
18365 * we assume that (memory_address, size) describes a region
18366 * all of whose pages we must cover, rather than a base to be truncated
18367 * down and a size to be added to that base. So we figure out
18368 * the highest page that the requested region includes and make
18369 * sure that the size will cover it.
18370 *
18371 * The key example we're worried about it is of the form:
18372 *
18373 * memory_address = 0x1ff0, size = 0x20
18374 *
18375 * With the old semantics, we round down the memory_address to 0x1000
18376 * and round up the size to 0x1000, resulting in our covering *only*
18377 * page 0x1000. With the new semantics, we'd realize that the region covers
18378 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
18379 * 0x1000 and page 0x2000 in the region we remap.
18380 */
18381 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
18382 vm_map_offset_t range_start, range_end;
18383
18384 range_start = vm_map_trunc_page(memory_address, src_page_mask);
18385 range_end = vm_map_round_page(memory_address + size, src_page_mask);
18386 memory_address = range_start;
18387 size = range_end - range_start;
18388 offset_in_mapping = initial_memory_address - memory_address;
18389 } else {
18390 /*
18391 * IMPORTANT:
18392 * This legacy code path is broken: for the range mentioned
18393 * above [ memory_address = 0x1ff0,size = 0x20 ], which spans
18394 * two 4k pages, it yields [ memory_address = 0x1000,
18395 * size = 0x1000 ], which covers only the first 4k page.
18396 * BUT some code unfortunately depends on this bug, so we
18397 * can't fix it without breaking something.
18398 * New code should get automatically opted in the new
18399 * behavior with the new VM_FLAGS_RETURN_DATA_ADDR flags.
18400 */
18401 offset_in_mapping = 0;
18402 memory_address = vm_map_trunc_page(memory_address, src_page_mask);
18403 size = vm_map_round_page(size, src_page_mask);
18404 initial_memory_address = memory_address;
18405 initial_size = size;
18406 }
18407
18408
18409 if (size == 0) {
18410 return KERN_INVALID_ARGUMENT;
18411 }
18412
18413 if (flags & VM_FLAGS_RESILIENT_MEDIA) {
18414 /* must be copy-on-write to be "media resilient" */
18415 if (!copy) {
18416 return KERN_INVALID_ARGUMENT;
18417 }
18418 }
18419
18420 vmk_flags.vmkf_copy_pageable = target_map->hdr.entries_pageable;
18421 vmk_flags.vmkf_copy_same_map = (src_map == target_map);
18422
18423 assert(size != 0);
18424 result = vm_map_copy_extract(src_map,
18425 memory_address,
18426 size,
18427 copy, ©_map,
18428 cur_protection, /* IN/OUT */
18429 max_protection, /* IN/OUT */
18430 inheritance,
18431 vmk_flags);
18432 if (result != KERN_SUCCESS) {
18433 return result;
18434 }
18435 assert(copy_map != VM_MAP_COPY_NULL);
18436
18437 overmap_start = 0;
18438 overmap_end = 0;
18439 trimmed_start = 0;
18440 target_size = size;
18441 if (src_page_mask != target_page_mask) {
18442 vm_map_copy_t target_copy_map;
18443
18444 target_copy_map = copy_map; /* can modify "copy_map" itself */
18445 DEBUG4K_ADJUST("adjusting...\n");
18446 result = vm_map_copy_adjust_to_target(
18447 copy_map,
18448 offset_in_mapping, /* offset */
18449 initial_size,
18450 target_map,
18451 copy,
18452 &target_copy_map,
18453 &overmap_start,
18454 &overmap_end,
18455 &trimmed_start);
18456 if (result != KERN_SUCCESS) {
18457 DEBUG4K_COPY("failed to adjust 0x%x\n", result);
18458 vm_map_copy_discard(copy_map);
18459 return result;
18460 }
18461 if (trimmed_start == 0) {
18462 /* nothing trimmed: no adjustment needed */
18463 } else if (trimmed_start >= offset_in_mapping) {
18464 /* trimmed more than offset_in_mapping: nothing left */
18465 assert(overmap_start == 0);
18466 assert(overmap_end == 0);
18467 offset_in_mapping = 0;
18468 } else {
18469 /* trimmed some of offset_in_mapping: adjust */
18470 assert(overmap_start == 0);
18471 assert(overmap_end == 0);
18472 offset_in_mapping -= trimmed_start;
18473 }
18474 offset_in_mapping += overmap_start;
18475 target_size = target_copy_map->size;
18476 }
18477
18478 /*
18479 * Allocate/check a range of free virtual address
18480 * space for the target
18481 */
18482 *address = vm_map_trunc_page(*address, target_page_mask);
18483 vm_map_lock(target_map);
18484 target_size = vm_map_round_page(target_size, target_page_mask);
18485 result = vm_map_remap_range_allocate(target_map, address,
18486 target_size, mask, flags, vmk_flags, tag,
18487 &insp_entry, &zap_list);
18488
18489 for (entry = vm_map_copy_first_entry(copy_map);
18490 entry != vm_map_copy_to_entry(copy_map);
18491 entry = new_entry) {
18492 new_entry = entry->vme_next;
18493 vm_map_copy_entry_unlink(copy_map, entry);
18494 if (result == KERN_SUCCESS) {
18495 if (vmk_flags.vmkf_remap_prot_copy) {
18496 /*
18497 * This vm_map_remap() is for a
18498 * vm_protect(VM_PROT_COPY), so the caller
18499 * expects to be allowed to add write access
18500 * to this new mapping. This is done by
18501 * adding VM_PROT_WRITE to each entry's
18502 * max_protection... unless some security
18503 * settings disallow it.
18504 */
18505 bool allow_write = false;
18506 if (entry->vme_permanent) {
18507 /* immutable mapping... */
18508 if ((entry->max_protection & VM_PROT_EXECUTE) &&
18509 developer_mode_state()) {
18510 /*
18511 * ... but executable and
18512 * possibly being debugged,
18513 * so let's allow it to become
18514 * writable, for breakpoints
18515 * and dtrace probes, for
18516 * example.
18517 */
18518 allow_write = true;
18519 } else {
18520 printf("%d[%s] vm_remap(0x%llx,0x%llx) VM_PROT_COPY denied on permanent mapping prot 0x%x/0x%x developer %d\n",
18521 proc_selfpid(),
18522 (get_bsdtask_info(current_task())
18523 ? proc_name_address(get_bsdtask_info(current_task()))
18524 : "?"),
18525 (uint64_t)memory_address,
18526 (uint64_t)size,
18527 entry->protection,
18528 entry->max_protection,
18529 developer_mode_state());
18530 DTRACE_VM6(vm_map_delete_permanent_deny_protcopy,
18531 vm_map_entry_t, entry,
18532 vm_map_offset_t, entry->vme_start,
18533 vm_map_offset_t, entry->vme_end,
18534 vm_prot_t, entry->protection,
18535 vm_prot_t, entry->max_protection,
18536 int, VME_ALIAS(entry));
18537 }
18538 } else {
18539 allow_write = true;
18540 }
18541
18542 /*
18543 * VM_PROT_COPY: allow this mapping to become
18544 * writable, unless it was "permanent".
18545 */
18546 if (allow_write) {
18547 entry->max_protection |= VM_PROT_WRITE;
18548 }
18549 }
18550 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
18551 /* no codesigning -> read-only access */
18552 entry->max_protection = VM_PROT_READ;
18553 entry->protection = VM_PROT_READ;
18554 entry->vme_resilient_codesign = TRUE;
18555 }
18556 entry->vme_start += *address;
18557 entry->vme_end += *address;
18558 assert(!entry->map_aligned);
18559 if ((flags & VM_FLAGS_RESILIENT_MEDIA) &&
18560 !entry->is_sub_map &&
18561 (VME_OBJECT(entry) == VM_OBJECT_NULL ||
18562 VME_OBJECT(entry)->internal)) {
18563 entry->vme_resilient_media = TRUE;
18564 }
18565 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start, MIN(target_page_mask, PAGE_MASK)));
18566 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end, MIN(target_page_mask, PAGE_MASK)));
18567 assert(VM_MAP_PAGE_ALIGNED(VME_OFFSET(entry), MIN(target_page_mask, PAGE_MASK)));
18568 vm_map_store_entry_link(target_map, insp_entry, entry,
18569 vmk_flags);
18570 insp_entry = entry;
18571 } else {
18572 if (!entry->is_sub_map) {
18573 vm_object_deallocate(VME_OBJECT(entry));
18574 } else {
18575 vm_map_deallocate(VME_SUBMAP(entry));
18576 }
18577 vm_map_copy_entry_dispose(entry);
18578 }
18579 }
18580
18581 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
18582 *cur_protection = VM_PROT_READ;
18583 *max_protection = VM_PROT_READ;
18584 }
18585
18586 if (result == KERN_SUCCESS) {
18587 target_map->size += target_size;
18588 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
18589
18590 }
18591 vm_map_unlock(target_map);
18592
18593 vm_map_zap_dispose(&zap_list);
18594
18595 if (result == KERN_SUCCESS && target_map->wiring_required) {
18596 result = vm_map_wire_kernel(target_map, *address,
18597 *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
18598 TRUE);
18599 }
18600
18601 /*
18602 * If requested, return the address of the data pointed to by the
18603 * request, rather than the base of the resulting page.
18604 */
18605 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
18606 *address += offset_in_mapping;
18607 }
18608
18609 if (src_page_mask != target_page_mask) {
18610 DEBUG4K_SHARE("vm_remap(%p 0x%llx 0x%llx copy=%d-> %p 0x%llx 0x%llx result=0x%x\n", src_map, (uint64_t)memory_address, (uint64_t)size, copy, target_map, (uint64_t)*address, (uint64_t)offset_in_mapping, result);
18611 }
18612 vm_map_copy_discard(copy_map);
18613 copy_map = VM_MAP_COPY_NULL;
18614
18615 return result;
18616 }
18617
18618 /*
18619 * Routine: vm_map_remap_range_allocate
18620 *
18621 * Description:
18622 * Allocate a range in the specified virtual address map.
18623 * returns the address and the map entry just before the allocated
18624 * range
18625 *
18626 * Map must be locked.
18627 */
18628
18629 static kern_return_t
vm_map_remap_range_allocate(vm_map_t map,vm_map_address_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,__unused vm_tag_t tag,vm_map_entry_t * map_entry,vm_map_zap_t zap_list)18630 vm_map_remap_range_allocate(
18631 vm_map_t map,
18632 vm_map_address_t *address, /* IN/OUT */
18633 vm_map_size_t size,
18634 vm_map_offset_t mask,
18635 int flags,
18636 vm_map_kernel_flags_t vmk_flags,
18637 __unused vm_tag_t tag,
18638 vm_map_entry_t *map_entry, /* OUT */
18639 vm_map_zap_t zap_list)
18640 {
18641 vm_map_entry_t entry;
18642 vm_map_offset_t start;
18643 kern_return_t kr;
18644
18645 start = *address;
18646
18647 if (flags & VM_FLAGS_ANYWHERE) {
18648 if (flags & VM_FLAGS_RANDOM_ADDR) {
18649 vmk_flags.vmkf_random_address = true;
18650 }
18651
18652 if (start) {
18653 /* override the target range if a hint has been provided */
18654 vmk_flags.vmkf_range_id = (map == kernel_map ?
18655 kmem_addr_get_range(start, size) :
18656 VM_MAP_REMAP_RANGE_ID(map, NULL, start, size));
18657 }
18658
18659 kr = vm_map_locate_space(map, size, mask, vmk_flags,
18660 &start, &entry);
18661 if (kr != KERN_SUCCESS) {
18662 return kr;
18663 }
18664 *address = start;
18665 } else {
18666 vm_map_entry_t temp_entry;
18667 vm_map_offset_t end;
18668
18669 /*
18670 * Verify that:
18671 * the address doesn't itself violate
18672 * the mask requirement.
18673 */
18674
18675 if ((start & mask) != 0) {
18676 return KERN_NO_SPACE;
18677 }
18678
18679
18680 /*
18681 * ... the address is within bounds
18682 */
18683
18684 end = start + size;
18685
18686 if ((start < map->min_offset) ||
18687 (end > map->max_offset) ||
18688 (start >= end)) {
18689 return KERN_INVALID_ADDRESS;
18690 }
18691
18692 /*
18693 * If we're asked to overwrite whatever was mapped in that
18694 * range, first deallocate that range.
18695 */
18696 if (flags & VM_FLAGS_OVERWRITE) {
18697 vmr_flags_t remove_flags = VM_MAP_REMOVE_NO_MAP_ALIGN;
18698
18699 /*
18700 * We use a "zap_list" to avoid having to unlock
18701 * the "map" in vm_map_delete(), which would compromise
18702 * the atomicity of the "deallocate" and then "remap"
18703 * combination.
18704 */
18705 remove_flags |= VM_MAP_REMOVE_NO_YIELD;
18706
18707 if (vmk_flags.vmkf_overwrite_immutable) {
18708 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
18709 }
18710 if (vmk_flags.vmkf_remap_prot_copy) {
18711 remove_flags |= VM_MAP_REMOVE_IMMUTABLE_CODE;
18712 }
18713 kr = vm_map_delete(map, start, end, remove_flags,
18714 KMEM_GUARD_NONE, zap_list).kmr_return;
18715 if (kr != KERN_SUCCESS) {
18716 /* XXX FBDP restore zap_list? */
18717 return kr;
18718 }
18719 }
18720
18721 /*
18722 * ... the starting address isn't allocated
18723 */
18724
18725 if (vm_map_lookup_entry(map, start, &temp_entry)) {
18726 return KERN_NO_SPACE;
18727 }
18728
18729 entry = temp_entry;
18730
18731 /*
18732 * ... the next region doesn't overlap the
18733 * end point.
18734 */
18735
18736 if ((entry->vme_next != vm_map_to_entry(map)) &&
18737 (entry->vme_next->vme_start < end)) {
18738 return KERN_NO_SPACE;
18739 }
18740 }
18741 *map_entry = entry;
18742 return KERN_SUCCESS;
18743 }
18744
18745 /*
18746 * vm_map_switch:
18747 *
18748 * Set the address map for the current thread to the specified map
18749 */
18750
18751 vm_map_t
vm_map_switch(vm_map_t map)18752 vm_map_switch(
18753 vm_map_t map)
18754 {
18755 int mycpu;
18756 thread_t thread = current_thread();
18757 vm_map_t oldmap = thread->map;
18758
18759 mp_disable_preemption();
18760 mycpu = cpu_number();
18761
18762 /*
18763 * Deactivate the current map and activate the requested map
18764 */
18765 PMAP_SWITCH_USER(thread, map, mycpu);
18766
18767 mp_enable_preemption();
18768 return oldmap;
18769 }
18770
18771
18772 /*
18773 * Routine: vm_map_write_user
18774 *
18775 * Description:
18776 * Copy out data from a kernel space into space in the
18777 * destination map. The space must already exist in the
18778 * destination map.
18779 * NOTE: This routine should only be called by threads
18780 * which can block on a page fault. i.e. kernel mode user
18781 * threads.
18782 *
18783 */
18784 kern_return_t
vm_map_write_user(vm_map_t map,void * src_p,vm_map_address_t dst_addr,vm_size_t size)18785 vm_map_write_user(
18786 vm_map_t map,
18787 void *src_p,
18788 vm_map_address_t dst_addr,
18789 vm_size_t size)
18790 {
18791 kern_return_t kr = KERN_SUCCESS;
18792
18793 if (current_map() == map) {
18794 if (copyout(src_p, dst_addr, size)) {
18795 kr = KERN_INVALID_ADDRESS;
18796 }
18797 } else {
18798 vm_map_t oldmap;
18799
18800 /* take on the identity of the target map while doing */
18801 /* the transfer */
18802
18803 vm_map_reference(map);
18804 oldmap = vm_map_switch(map);
18805 if (copyout(src_p, dst_addr, size)) {
18806 kr = KERN_INVALID_ADDRESS;
18807 }
18808 vm_map_switch(oldmap);
18809 vm_map_deallocate(map);
18810 }
18811 return kr;
18812 }
18813
18814 /*
18815 * Routine: vm_map_read_user
18816 *
18817 * Description:
18818 * Copy in data from a user space source map into the
18819 * kernel map. The space must already exist in the
18820 * kernel map.
18821 * NOTE: This routine should only be called by threads
18822 * which can block on a page fault. i.e. kernel mode user
18823 * threads.
18824 *
18825 */
18826 kern_return_t
vm_map_read_user(vm_map_t map,vm_map_address_t src_addr,void * dst_p,vm_size_t size)18827 vm_map_read_user(
18828 vm_map_t map,
18829 vm_map_address_t src_addr,
18830 void *dst_p,
18831 vm_size_t size)
18832 {
18833 kern_return_t kr = KERN_SUCCESS;
18834
18835 if (current_map() == map) {
18836 if (copyin(src_addr, dst_p, size)) {
18837 kr = KERN_INVALID_ADDRESS;
18838 }
18839 } else {
18840 vm_map_t oldmap;
18841
18842 /* take on the identity of the target map while doing */
18843 /* the transfer */
18844
18845 vm_map_reference(map);
18846 oldmap = vm_map_switch(map);
18847 if (copyin(src_addr, dst_p, size)) {
18848 kr = KERN_INVALID_ADDRESS;
18849 }
18850 vm_map_switch(oldmap);
18851 vm_map_deallocate(map);
18852 }
18853 return kr;
18854 }
18855
18856
18857 /*
18858 * vm_map_check_protection:
18859 *
18860 * Assert that the target map allows the specified
18861 * privilege on the entire address region given.
18862 * The entire region must be allocated.
18863 */
18864 boolean_t
vm_map_check_protection(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t protection)18865 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
18866 vm_map_offset_t end, vm_prot_t protection)
18867 {
18868 vm_map_entry_t entry;
18869 vm_map_entry_t tmp_entry;
18870
18871 vm_map_lock(map);
18872
18873 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
18874 vm_map_unlock(map);
18875 return FALSE;
18876 }
18877
18878 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
18879 vm_map_unlock(map);
18880 return FALSE;
18881 }
18882
18883 entry = tmp_entry;
18884
18885 while (start < end) {
18886 if (entry == vm_map_to_entry(map)) {
18887 vm_map_unlock(map);
18888 return FALSE;
18889 }
18890
18891 /*
18892 * No holes allowed!
18893 */
18894
18895 if (start < entry->vme_start) {
18896 vm_map_unlock(map);
18897 return FALSE;
18898 }
18899
18900 /*
18901 * Check protection associated with entry.
18902 */
18903
18904 if ((entry->protection & protection) != protection) {
18905 vm_map_unlock(map);
18906 return FALSE;
18907 }
18908
18909 /* go to next entry */
18910
18911 start = entry->vme_end;
18912 entry = entry->vme_next;
18913 }
18914 vm_map_unlock(map);
18915 return TRUE;
18916 }
18917
18918 kern_return_t
vm_map_purgable_control(vm_map_t map,vm_map_offset_t address,vm_purgable_t control,int * state)18919 vm_map_purgable_control(
18920 vm_map_t map,
18921 vm_map_offset_t address,
18922 vm_purgable_t control,
18923 int *state)
18924 {
18925 vm_map_entry_t entry;
18926 vm_object_t object;
18927 kern_return_t kr;
18928 boolean_t was_nonvolatile;
18929
18930 /*
18931 * Vet all the input parameters and current type and state of the
18932 * underlaying object. Return with an error if anything is amiss.
18933 */
18934 if (map == VM_MAP_NULL) {
18935 return KERN_INVALID_ARGUMENT;
18936 }
18937
18938 if (control != VM_PURGABLE_SET_STATE &&
18939 control != VM_PURGABLE_GET_STATE &&
18940 control != VM_PURGABLE_PURGE_ALL &&
18941 control != VM_PURGABLE_SET_STATE_FROM_KERNEL) {
18942 return KERN_INVALID_ARGUMENT;
18943 }
18944
18945 if (control == VM_PURGABLE_PURGE_ALL) {
18946 vm_purgeable_object_purge_all();
18947 return KERN_SUCCESS;
18948 }
18949
18950 if ((control == VM_PURGABLE_SET_STATE ||
18951 control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
18952 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
18953 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) {
18954 return KERN_INVALID_ARGUMENT;
18955 }
18956
18957 vm_map_lock_read(map);
18958
18959 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
18960 /*
18961 * Must pass a valid non-submap address.
18962 */
18963 vm_map_unlock_read(map);
18964 return KERN_INVALID_ADDRESS;
18965 }
18966
18967 if ((entry->protection & VM_PROT_WRITE) == 0 &&
18968 control != VM_PURGABLE_GET_STATE) {
18969 /*
18970 * Can't apply purgable controls to something you can't write.
18971 */
18972 vm_map_unlock_read(map);
18973 return KERN_PROTECTION_FAILURE;
18974 }
18975
18976 object = VME_OBJECT(entry);
18977 if (object == VM_OBJECT_NULL ||
18978 object->purgable == VM_PURGABLE_DENY) {
18979 /*
18980 * Object must already be present and be purgeable.
18981 */
18982 vm_map_unlock_read(map);
18983 return KERN_INVALID_ARGUMENT;
18984 }
18985
18986 vm_object_lock(object);
18987
18988 #if 00
18989 if (VME_OFFSET(entry) != 0 ||
18990 entry->vme_end - entry->vme_start != object->vo_size) {
18991 /*
18992 * Can only apply purgable controls to the whole (existing)
18993 * object at once.
18994 */
18995 vm_map_unlock_read(map);
18996 vm_object_unlock(object);
18997 return KERN_INVALID_ARGUMENT;
18998 }
18999 #endif
19000
19001 assert(!entry->is_sub_map);
19002 assert(!entry->use_pmap); /* purgeable has its own accounting */
19003
19004 vm_map_unlock_read(map);
19005
19006 was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
19007
19008 kr = vm_object_purgable_control(object, control, state);
19009
19010 if (was_nonvolatile &&
19011 object->purgable != VM_PURGABLE_NONVOLATILE &&
19012 map->pmap == kernel_pmap) {
19013 #if DEBUG
19014 object->vo_purgeable_volatilizer = kernel_task;
19015 #endif /* DEBUG */
19016 }
19017
19018 vm_object_unlock(object);
19019
19020 return kr;
19021 }
19022
19023 void
vm_map_footprint_query_page_info(vm_map_t map,vm_map_entry_t map_entry,vm_map_offset_t curr_s_offset,int * disposition_p)19024 vm_map_footprint_query_page_info(
19025 vm_map_t map,
19026 vm_map_entry_t map_entry,
19027 vm_map_offset_t curr_s_offset,
19028 int *disposition_p)
19029 {
19030 int pmap_disp;
19031 vm_object_t object = VM_OBJECT_NULL;
19032 int disposition;
19033 int effective_page_size;
19034
19035 vm_map_lock_assert_held(map);
19036 assert(!map->has_corpse_footprint);
19037 assert(curr_s_offset >= map_entry->vme_start);
19038 assert(curr_s_offset < map_entry->vme_end);
19039
19040 if (map_entry->is_sub_map) {
19041 if (!map_entry->use_pmap) {
19042 /* nested pmap: no footprint */
19043 *disposition_p = 0;
19044 return;
19045 }
19046 } else {
19047 object = VME_OBJECT(map_entry);
19048 if (object == VM_OBJECT_NULL) {
19049 /* nothing mapped here: no need to ask */
19050 *disposition_p = 0;
19051 return;
19052 }
19053 }
19054
19055 effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(map));
19056
19057 pmap_disp = 0;
19058
19059 /*
19060 * Query the pmap.
19061 */
19062 pmap_query_page_info(map->pmap, curr_s_offset, &pmap_disp);
19063
19064 /*
19065 * Compute this page's disposition.
19066 */
19067 disposition = 0;
19068
19069 /* deal with "alternate accounting" first */
19070 if (!map_entry->is_sub_map &&
19071 object->vo_no_footprint) {
19072 /* does not count in footprint */
19073 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
19074 } else if (!map_entry->is_sub_map &&
19075 (object->purgable == VM_PURGABLE_NONVOLATILE ||
19076 (object->purgable == VM_PURGABLE_DENY &&
19077 object->vo_ledger_tag)) &&
19078 VM_OBJECT_OWNER(object) != NULL &&
19079 VM_OBJECT_OWNER(object)->map == map) {
19080 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
19081 if ((((curr_s_offset
19082 - map_entry->vme_start
19083 + VME_OFFSET(map_entry))
19084 / effective_page_size) <
19085 (object->resident_page_count +
19086 vm_compressor_pager_get_count(object->pager)))) {
19087 /*
19088 * Non-volatile purgeable object owned
19089 * by this task: report the first
19090 * "#resident + #compressed" pages as
19091 * "resident" (to show that they
19092 * contribute to the footprint) but not
19093 * "dirty" (to avoid double-counting
19094 * with the fake "non-volatile" region
19095 * we'll report at the end of the
19096 * address space to account for all
19097 * (mapped or not) non-volatile memory
19098 * owned by this task.
19099 */
19100 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19101 }
19102 } else if (!map_entry->is_sub_map &&
19103 (object->purgable == VM_PURGABLE_VOLATILE ||
19104 object->purgable == VM_PURGABLE_EMPTY) &&
19105 VM_OBJECT_OWNER(object) != NULL &&
19106 VM_OBJECT_OWNER(object)->map == map) {
19107 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
19108 if ((((curr_s_offset
19109 - map_entry->vme_start
19110 + VME_OFFSET(map_entry))
19111 / effective_page_size) <
19112 object->wired_page_count)) {
19113 /*
19114 * Volatile|empty purgeable object owned
19115 * by this task: report the first
19116 * "#wired" pages as "resident" (to
19117 * show that they contribute to the
19118 * footprint) but not "dirty" (to avoid
19119 * double-counting with the fake
19120 * "non-volatile" region we'll report
19121 * at the end of the address space to
19122 * account for all (mapped or not)
19123 * non-volatile memory owned by this
19124 * task.
19125 */
19126 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19127 }
19128 } else if (!map_entry->is_sub_map &&
19129 map_entry->iokit_acct &&
19130 object->internal &&
19131 object->purgable == VM_PURGABLE_DENY) {
19132 /*
19133 * Non-purgeable IOKit memory: phys_footprint
19134 * includes the entire virtual mapping.
19135 */
19136 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
19137 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19138 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
19139 } else if (pmap_disp & (PMAP_QUERY_PAGE_ALTACCT |
19140 PMAP_QUERY_PAGE_COMPRESSED_ALTACCT)) {
19141 /* alternate accounting */
19142 #if __arm64__ && (DEVELOPMENT || DEBUG)
19143 if (map->pmap->footprint_was_suspended) {
19144 /*
19145 * The assertion below can fail if dyld
19146 * suspended footprint accounting
19147 * while doing some adjustments to
19148 * this page; the mapping would say
19149 * "use pmap accounting" but the page
19150 * would be marked "alternate
19151 * accounting".
19152 */
19153 } else
19154 #endif /* __arm64__ && (DEVELOPMENT || DEBUG) */
19155 {
19156 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
19157 }
19158 disposition = 0;
19159 } else {
19160 if (pmap_disp & PMAP_QUERY_PAGE_PRESENT) {
19161 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
19162 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19163 disposition |= VM_PAGE_QUERY_PAGE_REF;
19164 if (pmap_disp & PMAP_QUERY_PAGE_INTERNAL) {
19165 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
19166 } else {
19167 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
19168 }
19169 if (pmap_disp & PMAP_QUERY_PAGE_REUSABLE) {
19170 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
19171 }
19172 } else if (pmap_disp & PMAP_QUERY_PAGE_COMPRESSED) {
19173 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
19174 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
19175 }
19176 }
19177
19178 *disposition_p = disposition;
19179 }
19180
19181 kern_return_t
vm_map_page_query_internal(vm_map_t target_map,vm_map_offset_t offset,int * disposition,int * ref_count)19182 vm_map_page_query_internal(
19183 vm_map_t target_map,
19184 vm_map_offset_t offset,
19185 int *disposition,
19186 int *ref_count)
19187 {
19188 kern_return_t kr;
19189 vm_page_info_basic_data_t info;
19190 mach_msg_type_number_t count;
19191
19192 count = VM_PAGE_INFO_BASIC_COUNT;
19193 kr = vm_map_page_info(target_map,
19194 offset,
19195 VM_PAGE_INFO_BASIC,
19196 (vm_page_info_t) &info,
19197 &count);
19198 if (kr == KERN_SUCCESS) {
19199 *disposition = info.disposition;
19200 *ref_count = info.ref_count;
19201 } else {
19202 *disposition = 0;
19203 *ref_count = 0;
19204 }
19205
19206 return kr;
19207 }
19208
19209 kern_return_t
vm_map_page_info(vm_map_t map,vm_map_offset_t offset,vm_page_info_flavor_t flavor,vm_page_info_t info,mach_msg_type_number_t * count)19210 vm_map_page_info(
19211 vm_map_t map,
19212 vm_map_offset_t offset,
19213 vm_page_info_flavor_t flavor,
19214 vm_page_info_t info,
19215 mach_msg_type_number_t *count)
19216 {
19217 return vm_map_page_range_info_internal(map,
19218 offset, /* start of range */
19219 (offset + 1), /* this will get rounded in the call to the page boundary */
19220 (int)-1, /* effective_page_shift: unspecified */
19221 flavor,
19222 info,
19223 count);
19224 }
19225
19226 kern_return_t
vm_map_page_range_info_internal(vm_map_t map,vm_map_offset_t start_offset,vm_map_offset_t end_offset,int effective_page_shift,vm_page_info_flavor_t flavor,vm_page_info_t info,mach_msg_type_number_t * count)19227 vm_map_page_range_info_internal(
19228 vm_map_t map,
19229 vm_map_offset_t start_offset,
19230 vm_map_offset_t end_offset,
19231 int effective_page_shift,
19232 vm_page_info_flavor_t flavor,
19233 vm_page_info_t info,
19234 mach_msg_type_number_t *count)
19235 {
19236 vm_map_entry_t map_entry = VM_MAP_ENTRY_NULL;
19237 vm_object_t object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
19238 vm_page_t m = VM_PAGE_NULL;
19239 kern_return_t retval = KERN_SUCCESS;
19240 int disposition = 0;
19241 int ref_count = 0;
19242 int depth = 0, info_idx = 0;
19243 vm_page_info_basic_t basic_info = 0;
19244 vm_map_offset_t offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
19245 vm_map_offset_t start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
19246 boolean_t do_region_footprint;
19247 ledger_amount_t ledger_resident, ledger_compressed;
19248 int effective_page_size;
19249 vm_map_offset_t effective_page_mask;
19250
19251 switch (flavor) {
19252 case VM_PAGE_INFO_BASIC:
19253 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
19254 /*
19255 * The "vm_page_info_basic_data" structure was not
19256 * properly padded, so allow the size to be off by
19257 * one to maintain backwards binary compatibility...
19258 */
19259 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1) {
19260 return KERN_INVALID_ARGUMENT;
19261 }
19262 }
19263 break;
19264 default:
19265 return KERN_INVALID_ARGUMENT;
19266 }
19267
19268 if (effective_page_shift == -1) {
19269 effective_page_shift = vm_self_region_page_shift_safely(map);
19270 if (effective_page_shift == -1) {
19271 return KERN_INVALID_ARGUMENT;
19272 }
19273 }
19274 effective_page_size = (1 << effective_page_shift);
19275 effective_page_mask = effective_page_size - 1;
19276
19277 do_region_footprint = task_self_region_footprint();
19278 disposition = 0;
19279 ref_count = 0;
19280 depth = 0;
19281 info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
19282 retval = KERN_SUCCESS;
19283
19284 offset_in_page = start_offset & effective_page_mask;
19285 start = vm_map_trunc_page(start_offset, effective_page_mask);
19286 end = vm_map_round_page(end_offset, effective_page_mask);
19287
19288 if (end < start) {
19289 return KERN_INVALID_ARGUMENT;
19290 }
19291
19292 assert((end - start) <= MAX_PAGE_RANGE_QUERY);
19293
19294 vm_map_lock_read(map);
19295
19296 task_ledgers_footprint(map->pmap->ledger, &ledger_resident, &ledger_compressed);
19297
19298 for (curr_s_offset = start; curr_s_offset < end;) {
19299 /*
19300 * New lookup needs reset of these variables.
19301 */
19302 curr_object = object = VM_OBJECT_NULL;
19303 offset_in_object = 0;
19304 ref_count = 0;
19305 depth = 0;
19306
19307 if (do_region_footprint &&
19308 curr_s_offset >= vm_map_last_entry(map)->vme_end) {
19309 /*
19310 * Request for "footprint" info about a page beyond
19311 * the end of address space: this must be for
19312 * the fake region vm_map_region_recurse_64()
19313 * reported to account for non-volatile purgeable
19314 * memory owned by this task.
19315 */
19316 disposition = 0;
19317
19318 if (curr_s_offset - vm_map_last_entry(map)->vme_end <=
19319 (unsigned) ledger_compressed) {
19320 /*
19321 * We haven't reported all the "non-volatile
19322 * compressed" pages yet, so report this fake
19323 * page as "compressed".
19324 */
19325 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
19326 } else {
19327 /*
19328 * We've reported all the non-volatile
19329 * compressed page but not all the non-volatile
19330 * pages , so report this fake page as
19331 * "resident dirty".
19332 */
19333 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19334 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
19335 disposition |= VM_PAGE_QUERY_PAGE_REF;
19336 }
19337 switch (flavor) {
19338 case VM_PAGE_INFO_BASIC:
19339 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19340 basic_info->disposition = disposition;
19341 basic_info->ref_count = 1;
19342 basic_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
19343 basic_info->offset = 0;
19344 basic_info->depth = 0;
19345
19346 info_idx++;
19347 break;
19348 }
19349 curr_s_offset += effective_page_size;
19350 continue;
19351 }
19352
19353 /*
19354 * First, find the map entry covering "curr_s_offset", going down
19355 * submaps if necessary.
19356 */
19357 if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
19358 /* no entry -> no object -> no page */
19359
19360 if (curr_s_offset < vm_map_min(map)) {
19361 /*
19362 * Illegal address that falls below map min.
19363 */
19364 curr_e_offset = MIN(end, vm_map_min(map));
19365 } else if (curr_s_offset >= vm_map_max(map)) {
19366 /*
19367 * Illegal address that falls on/after map max.
19368 */
19369 curr_e_offset = end;
19370 } else if (map_entry == vm_map_to_entry(map)) {
19371 /*
19372 * Hit a hole.
19373 */
19374 if (map_entry->vme_next == vm_map_to_entry(map)) {
19375 /*
19376 * Empty map.
19377 */
19378 curr_e_offset = MIN(map->max_offset, end);
19379 } else {
19380 /*
19381 * Hole at start of the map.
19382 */
19383 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
19384 }
19385 } else {
19386 if (map_entry->vme_next == vm_map_to_entry(map)) {
19387 /*
19388 * Hole at the end of the map.
19389 */
19390 curr_e_offset = MIN(map->max_offset, end);
19391 } else {
19392 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
19393 }
19394 }
19395
19396 assert(curr_e_offset >= curr_s_offset);
19397
19398 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> effective_page_shift;
19399
19400 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19401
19402 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
19403
19404 curr_s_offset = curr_e_offset;
19405
19406 info_idx += num_pages;
19407
19408 continue;
19409 }
19410
19411 /* compute offset from this map entry's start */
19412 offset_in_object = curr_s_offset - map_entry->vme_start;
19413
19414 /* compute offset into this map entry's object (or submap) */
19415 offset_in_object += VME_OFFSET(map_entry);
19416
19417 if (map_entry->is_sub_map) {
19418 vm_map_t sub_map = VM_MAP_NULL;
19419 vm_page_info_t submap_info = 0;
19420 vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
19421
19422 range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
19423
19424 submap_s_offset = offset_in_object;
19425 submap_e_offset = submap_s_offset + range_len;
19426
19427 sub_map = VME_SUBMAP(map_entry);
19428
19429 vm_map_reference(sub_map);
19430 vm_map_unlock_read(map);
19431
19432 submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19433
19434 assertf(VM_MAP_PAGE_SHIFT(sub_map) >= VM_MAP_PAGE_SHIFT(map),
19435 "Submap page size (%d) differs from current map (%d)\n", VM_MAP_PAGE_SIZE(sub_map), VM_MAP_PAGE_SIZE(map));
19436
19437 retval = vm_map_page_range_info_internal(sub_map,
19438 submap_s_offset,
19439 submap_e_offset,
19440 effective_page_shift,
19441 VM_PAGE_INFO_BASIC,
19442 (vm_page_info_t) submap_info,
19443 count);
19444
19445 assert(retval == KERN_SUCCESS);
19446
19447 vm_map_lock_read(map);
19448 vm_map_deallocate(sub_map);
19449
19450 /* Move the "info" index by the number of pages we inspected.*/
19451 info_idx += range_len >> effective_page_shift;
19452
19453 /* Move our current offset by the size of the range we inspected.*/
19454 curr_s_offset += range_len;
19455
19456 continue;
19457 }
19458
19459 object = VME_OBJECT(map_entry);
19460
19461 if (object == VM_OBJECT_NULL) {
19462 /*
19463 * We don't have an object here and, hence,
19464 * no pages to inspect. We'll fill up the
19465 * info structure appropriately.
19466 */
19467
19468 curr_e_offset = MIN(map_entry->vme_end, end);
19469
19470 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> effective_page_shift;
19471
19472 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19473
19474 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
19475
19476 curr_s_offset = curr_e_offset;
19477
19478 info_idx += num_pages;
19479
19480 continue;
19481 }
19482
19483 if (do_region_footprint) {
19484 disposition = 0;
19485 if (map->has_corpse_footprint) {
19486 /*
19487 * Query the page info data we saved
19488 * while forking the corpse.
19489 */
19490 vm_map_corpse_footprint_query_page_info(
19491 map,
19492 curr_s_offset,
19493 &disposition);
19494 } else {
19495 /*
19496 * Query the live pmap for footprint info
19497 * about this page.
19498 */
19499 vm_map_footprint_query_page_info(
19500 map,
19501 map_entry,
19502 curr_s_offset,
19503 &disposition);
19504 }
19505 switch (flavor) {
19506 case VM_PAGE_INFO_BASIC:
19507 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19508 basic_info->disposition = disposition;
19509 basic_info->ref_count = 1;
19510 basic_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
19511 basic_info->offset = 0;
19512 basic_info->depth = 0;
19513
19514 info_idx++;
19515 break;
19516 }
19517 curr_s_offset += effective_page_size;
19518 continue;
19519 }
19520
19521 vm_object_reference(object);
19522 /*
19523 * Shared mode -- so we can allow other readers
19524 * to grab the lock too.
19525 */
19526 vm_object_lock_shared(object);
19527
19528 curr_e_offset = MIN(map_entry->vme_end, end);
19529
19530 vm_map_unlock_read(map);
19531
19532 map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
19533
19534 curr_object = object;
19535
19536 for (; curr_s_offset < curr_e_offset;) {
19537 if (object == curr_object) {
19538 ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
19539 } else {
19540 ref_count = curr_object->ref_count;
19541 }
19542
19543 curr_offset_in_object = offset_in_object;
19544
19545 for (;;) {
19546 m = vm_page_lookup(curr_object, vm_object_trunc_page(curr_offset_in_object));
19547
19548 if (m != VM_PAGE_NULL) {
19549 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19550 break;
19551 } else {
19552 if (curr_object->internal &&
19553 curr_object->alive &&
19554 !curr_object->terminating &&
19555 curr_object->pager_ready) {
19556 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, vm_object_trunc_page(curr_offset_in_object))
19557 == VM_EXTERNAL_STATE_EXISTS) {
19558 /* the pager has that page */
19559 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
19560 break;
19561 }
19562 }
19563
19564 /*
19565 * Go down the VM object shadow chain until we find the page
19566 * we're looking for.
19567 */
19568
19569 if (curr_object->shadow != VM_OBJECT_NULL) {
19570 vm_object_t shadow = VM_OBJECT_NULL;
19571
19572 curr_offset_in_object += curr_object->vo_shadow_offset;
19573 shadow = curr_object->shadow;
19574
19575 vm_object_lock_shared(shadow);
19576 vm_object_unlock(curr_object);
19577
19578 curr_object = shadow;
19579 depth++;
19580 continue;
19581 } else {
19582 break;
19583 }
19584 }
19585 }
19586
19587 /* The ref_count is not strictly accurate, it measures the number */
19588 /* of entities holding a ref on the object, they may not be mapping */
19589 /* the object or may not be mapping the section holding the */
19590 /* target page but its still a ball park number and though an over- */
19591 /* count, it picks up the copy-on-write cases */
19592
19593 /* We could also get a picture of page sharing from pmap_attributes */
19594 /* but this would under count as only faulted-in mappings would */
19595 /* show up. */
19596
19597 if ((curr_object == object) && curr_object->shadow) {
19598 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
19599 }
19600
19601 if (!curr_object->internal) {
19602 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
19603 }
19604
19605 if (m != VM_PAGE_NULL) {
19606 if (m->vmp_fictitious) {
19607 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
19608 } else {
19609 if (m->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m))) {
19610 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
19611 }
19612
19613 if (m->vmp_reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m))) {
19614 disposition |= VM_PAGE_QUERY_PAGE_REF;
19615 }
19616
19617 if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
19618 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
19619 }
19620
19621 /*
19622 * XXX TODO4K:
19623 * when this routine deals with 4k
19624 * pages, check the appropriate CS bit
19625 * here.
19626 */
19627 if (m->vmp_cs_validated) {
19628 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
19629 }
19630 if (m->vmp_cs_tainted) {
19631 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
19632 }
19633 if (m->vmp_cs_nx) {
19634 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
19635 }
19636 if (m->vmp_reusable || curr_object->all_reusable) {
19637 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
19638 }
19639 }
19640 }
19641
19642 switch (flavor) {
19643 case VM_PAGE_INFO_BASIC:
19644 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19645 basic_info->disposition = disposition;
19646 basic_info->ref_count = ref_count;
19647 basic_info->object_id = (vm_object_id_t) (uintptr_t)
19648 VM_KERNEL_ADDRPERM(curr_object);
19649 basic_info->offset =
19650 (memory_object_offset_t) curr_offset_in_object + offset_in_page;
19651 basic_info->depth = depth;
19652
19653 info_idx++;
19654 break;
19655 }
19656
19657 disposition = 0;
19658 offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
19659
19660 /*
19661 * Move to next offset in the range and in our object.
19662 */
19663 curr_s_offset += effective_page_size;
19664 offset_in_object += effective_page_size;
19665 curr_offset_in_object = offset_in_object;
19666
19667 if (curr_object != object) {
19668 vm_object_unlock(curr_object);
19669
19670 curr_object = object;
19671
19672 vm_object_lock_shared(curr_object);
19673 } else {
19674 vm_object_lock_yield_shared(curr_object);
19675 }
19676 }
19677
19678 vm_object_unlock(curr_object);
19679 vm_object_deallocate(curr_object);
19680
19681 vm_map_lock_read(map);
19682 }
19683
19684 vm_map_unlock_read(map);
19685 return retval;
19686 }
19687
19688 /*
19689 * vm_map_msync
19690 *
19691 * Synchronises the memory range specified with its backing store
19692 * image by either flushing or cleaning the contents to the appropriate
19693 * memory manager engaging in a memory object synchronize dialog with
19694 * the manager. The client doesn't return until the manager issues
19695 * m_o_s_completed message. MIG Magically converts user task parameter
19696 * to the task's address map.
19697 *
19698 * interpretation of sync_flags
19699 * VM_SYNC_INVALIDATE - discard pages, only return precious
19700 * pages to manager.
19701 *
19702 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
19703 * - discard pages, write dirty or precious
19704 * pages back to memory manager.
19705 *
19706 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
19707 * - write dirty or precious pages back to
19708 * the memory manager.
19709 *
19710 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
19711 * is a hole in the region, and we would
19712 * have returned KERN_SUCCESS, return
19713 * KERN_INVALID_ADDRESS instead.
19714 *
19715 * NOTE
19716 * The memory object attributes have not yet been implemented, this
19717 * function will have to deal with the invalidate attribute
19718 *
19719 * RETURNS
19720 * KERN_INVALID_TASK Bad task parameter
19721 * KERN_INVALID_ARGUMENT both sync and async were specified.
19722 * KERN_SUCCESS The usual.
19723 * KERN_INVALID_ADDRESS There was a hole in the region.
19724 */
19725
19726 kern_return_t
vm_map_msync(vm_map_t map,vm_map_address_t address,vm_map_size_t size,vm_sync_t sync_flags)19727 vm_map_msync(
19728 vm_map_t map,
19729 vm_map_address_t address,
19730 vm_map_size_t size,
19731 vm_sync_t sync_flags)
19732 {
19733 vm_map_entry_t entry;
19734 vm_map_size_t amount_left;
19735 vm_object_offset_t offset;
19736 vm_object_offset_t start_offset, end_offset;
19737 boolean_t do_sync_req;
19738 boolean_t had_hole = FALSE;
19739 vm_map_offset_t pmap_offset;
19740
19741 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
19742 (sync_flags & VM_SYNC_SYNCHRONOUS)) {
19743 return KERN_INVALID_ARGUMENT;
19744 }
19745
19746 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19747 DEBUG4K_SHARE("map %p address 0x%llx size 0x%llx flags 0x%x\n", map, (uint64_t)address, (uint64_t)size, sync_flags);
19748 }
19749
19750 /*
19751 * align address and size on page boundaries
19752 */
19753 size = (vm_map_round_page(address + size,
19754 VM_MAP_PAGE_MASK(map)) -
19755 vm_map_trunc_page(address,
19756 VM_MAP_PAGE_MASK(map)));
19757 address = vm_map_trunc_page(address,
19758 VM_MAP_PAGE_MASK(map));
19759
19760 if (map == VM_MAP_NULL) {
19761 return KERN_INVALID_TASK;
19762 }
19763
19764 if (size == 0) {
19765 return KERN_SUCCESS;
19766 }
19767
19768 amount_left = size;
19769
19770 while (amount_left > 0) {
19771 vm_object_size_t flush_size;
19772 vm_object_t object;
19773
19774 vm_map_lock(map);
19775 if (!vm_map_lookup_entry(map,
19776 address,
19777 &entry)) {
19778 vm_map_size_t skip;
19779
19780 /*
19781 * hole in the address map.
19782 */
19783 had_hole = TRUE;
19784
19785 if (sync_flags & VM_SYNC_KILLPAGES) {
19786 /*
19787 * For VM_SYNC_KILLPAGES, there should be
19788 * no holes in the range, since we couldn't
19789 * prevent someone else from allocating in
19790 * that hole and we wouldn't want to "kill"
19791 * their pages.
19792 */
19793 vm_map_unlock(map);
19794 break;
19795 }
19796
19797 /*
19798 * Check for empty map.
19799 */
19800 if (entry == vm_map_to_entry(map) &&
19801 entry->vme_next == entry) {
19802 vm_map_unlock(map);
19803 break;
19804 }
19805 /*
19806 * Check that we don't wrap and that
19807 * we have at least one real map entry.
19808 */
19809 if ((map->hdr.nentries == 0) ||
19810 (entry->vme_next->vme_start < address)) {
19811 vm_map_unlock(map);
19812 break;
19813 }
19814 /*
19815 * Move up to the next entry if needed
19816 */
19817 skip = (entry->vme_next->vme_start - address);
19818 if (skip >= amount_left) {
19819 amount_left = 0;
19820 } else {
19821 amount_left -= skip;
19822 }
19823 address = entry->vme_next->vme_start;
19824 vm_map_unlock(map);
19825 continue;
19826 }
19827
19828 offset = address - entry->vme_start;
19829 pmap_offset = address;
19830
19831 /*
19832 * do we have more to flush than is contained in this
19833 * entry ?
19834 */
19835 if (amount_left + entry->vme_start + offset > entry->vme_end) {
19836 flush_size = entry->vme_end -
19837 (entry->vme_start + offset);
19838 } else {
19839 flush_size = amount_left;
19840 }
19841 amount_left -= flush_size;
19842 address += flush_size;
19843
19844 if (entry->is_sub_map == TRUE) {
19845 vm_map_t local_map;
19846 vm_map_offset_t local_offset;
19847
19848 local_map = VME_SUBMAP(entry);
19849 local_offset = VME_OFFSET(entry);
19850 vm_map_reference(local_map);
19851 vm_map_unlock(map);
19852 if (vm_map_msync(
19853 local_map,
19854 local_offset,
19855 flush_size,
19856 sync_flags) == KERN_INVALID_ADDRESS) {
19857 had_hole = TRUE;
19858 }
19859 vm_map_deallocate(local_map);
19860 continue;
19861 }
19862 object = VME_OBJECT(entry);
19863
19864 /*
19865 * We can't sync this object if the object has not been
19866 * created yet
19867 */
19868 if (object == VM_OBJECT_NULL) {
19869 vm_map_unlock(map);
19870 continue;
19871 }
19872 offset += VME_OFFSET(entry);
19873
19874 vm_object_lock(object);
19875
19876 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
19877 int kill_pages = 0;
19878 boolean_t reusable_pages = FALSE;
19879
19880 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19881 /*
19882 * This is a destructive operation and so we
19883 * err on the side of limiting the range of
19884 * the operation.
19885 */
19886 start_offset = vm_object_round_page(offset);
19887 end_offset = vm_object_trunc_page(offset + flush_size);
19888
19889 if (end_offset <= start_offset) {
19890 vm_object_unlock(object);
19891 vm_map_unlock(map);
19892 continue;
19893 }
19894
19895 pmap_offset += start_offset - offset;
19896 } else {
19897 start_offset = offset;
19898 end_offset = offset + flush_size;
19899 }
19900
19901 if (sync_flags & VM_SYNC_KILLPAGES) {
19902 if (((object->ref_count == 1) ||
19903 ((object->copy_strategy !=
19904 MEMORY_OBJECT_COPY_SYMMETRIC) &&
19905 (object->copy == VM_OBJECT_NULL))) &&
19906 (object->shadow == VM_OBJECT_NULL)) {
19907 if (object->ref_count != 1) {
19908 vm_page_stats_reusable.free_shared++;
19909 }
19910 kill_pages = 1;
19911 } else {
19912 kill_pages = -1;
19913 }
19914 }
19915 if (kill_pages != -1) {
19916 vm_object_deactivate_pages(
19917 object,
19918 start_offset,
19919 (vm_object_size_t) (end_offset - start_offset),
19920 kill_pages,
19921 reusable_pages,
19922 map->pmap,
19923 pmap_offset);
19924 }
19925 vm_object_unlock(object);
19926 vm_map_unlock(map);
19927 continue;
19928 }
19929 /*
19930 * We can't sync this object if there isn't a pager.
19931 * Don't bother to sync internal objects, since there can't
19932 * be any "permanent" storage for these objects anyway.
19933 */
19934 if ((object->pager == MEMORY_OBJECT_NULL) ||
19935 (object->internal) || (object->private)) {
19936 vm_object_unlock(object);
19937 vm_map_unlock(map);
19938 continue;
19939 }
19940 /*
19941 * keep reference on the object until syncing is done
19942 */
19943 vm_object_reference_locked(object);
19944 vm_object_unlock(object);
19945
19946 vm_map_unlock(map);
19947
19948 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19949 start_offset = vm_object_trunc_page(offset);
19950 end_offset = vm_object_round_page(offset + flush_size);
19951 } else {
19952 start_offset = offset;
19953 end_offset = offset + flush_size;
19954 }
19955
19956 do_sync_req = vm_object_sync(object,
19957 start_offset,
19958 (end_offset - start_offset),
19959 sync_flags & VM_SYNC_INVALIDATE,
19960 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
19961 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
19962 sync_flags & VM_SYNC_SYNCHRONOUS);
19963
19964 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
19965 /*
19966 * clear out the clustering and read-ahead hints
19967 */
19968 vm_object_lock(object);
19969
19970 object->pages_created = 0;
19971 object->pages_used = 0;
19972 object->sequential = 0;
19973 object->last_alloc = 0;
19974
19975 vm_object_unlock(object);
19976 }
19977 vm_object_deallocate(object);
19978 } /* while */
19979
19980 /* for proper msync() behaviour */
19981 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS)) {
19982 return KERN_INVALID_ADDRESS;
19983 }
19984
19985 return KERN_SUCCESS;
19986 }/* vm_msync */
19987
19988 void
vm_named_entry_associate_vm_object(vm_named_entry_t named_entry,vm_object_t object,vm_object_offset_t offset,vm_object_size_t size,vm_prot_t prot)19989 vm_named_entry_associate_vm_object(
19990 vm_named_entry_t named_entry,
19991 vm_object_t object,
19992 vm_object_offset_t offset,
19993 vm_object_size_t size,
19994 vm_prot_t prot)
19995 {
19996 vm_map_copy_t copy;
19997 vm_map_entry_t copy_entry;
19998
19999 assert(!named_entry->is_sub_map);
20000 assert(!named_entry->is_copy);
20001 assert(!named_entry->is_object);
20002 assert(!named_entry->internal);
20003 assert(named_entry->backing.copy == VM_MAP_COPY_NULL);
20004
20005 copy = vm_map_copy_allocate();
20006 copy->type = VM_MAP_COPY_ENTRY_LIST;
20007 copy->offset = offset;
20008 copy->size = size;
20009 copy->cpy_hdr.page_shift = (uint16_t)PAGE_SHIFT;
20010 vm_map_store_init(©->cpy_hdr);
20011
20012 copy_entry = vm_map_copy_entry_create(copy);
20013 copy_entry->protection = prot;
20014 copy_entry->max_protection = prot;
20015 copy_entry->use_pmap = TRUE;
20016 copy_entry->vme_start = VM_MAP_TRUNC_PAGE(offset, PAGE_MASK);
20017 copy_entry->vme_end = VM_MAP_ROUND_PAGE(offset + size, PAGE_MASK);
20018 VME_OBJECT_SET(copy_entry, object, false, 0);
20019 VME_OFFSET_SET(copy_entry, vm_object_trunc_page(offset));
20020 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy), copy_entry);
20021
20022 named_entry->backing.copy = copy;
20023 named_entry->is_object = TRUE;
20024 if (object->internal) {
20025 named_entry->internal = TRUE;
20026 }
20027
20028 DEBUG4K_MEMENTRY("named_entry %p copy %p object %p offset 0x%llx size 0x%llx prot 0x%x\n",
20029 named_entry, copy, object, offset, size, prot);
20030 }
20031
20032 vm_object_t
vm_named_entry_to_vm_object(vm_named_entry_t named_entry)20033 vm_named_entry_to_vm_object(
20034 vm_named_entry_t named_entry)
20035 {
20036 vm_map_copy_t copy;
20037 vm_map_entry_t copy_entry;
20038 vm_object_t object;
20039
20040 assert(!named_entry->is_sub_map);
20041 assert(!named_entry->is_copy);
20042 assert(named_entry->is_object);
20043 copy = named_entry->backing.copy;
20044 assert(copy != VM_MAP_COPY_NULL);
20045 /*
20046 * Assert that the vm_map_copy is coming from the right
20047 * zone and hasn't been forged
20048 */
20049 vm_map_copy_require(copy);
20050 assert(copy->cpy_hdr.nentries == 1);
20051 copy_entry = vm_map_copy_first_entry(copy);
20052 object = VME_OBJECT(copy_entry);
20053
20054 DEBUG4K_MEMENTRY("%p -> %p -> %p [0x%llx 0x%llx 0x%llx 0x%x/0x%x ] -> %p offset 0x%llx size 0x%llx prot 0x%x\n", named_entry, copy, copy_entry, (uint64_t)copy_entry->vme_start, (uint64_t)copy_entry->vme_end, copy_entry->vme_offset, copy_entry->protection, copy_entry->max_protection, object, named_entry->offset, named_entry->size, named_entry->protection);
20055
20056 return object;
20057 }
20058
20059 /*
20060 * Routine: convert_port_entry_to_map
20061 * Purpose:
20062 * Convert from a port specifying an entry or a task
20063 * to a map. Doesn't consume the port ref; produces a map ref,
20064 * which may be null. Unlike convert_port_to_map, the
20065 * port may be task or a named entry backed.
20066 * Conditions:
20067 * Nothing locked.
20068 */
20069
20070 vm_map_t
convert_port_entry_to_map(ipc_port_t port)20071 convert_port_entry_to_map(
20072 ipc_port_t port)
20073 {
20074 vm_map_t map = VM_MAP_NULL;
20075 vm_named_entry_t named_entry;
20076
20077 if (!IP_VALID(port)) {
20078 return VM_MAP_NULL;
20079 }
20080
20081 if (ip_kotype(port) != IKOT_NAMED_ENTRY) {
20082 return convert_port_to_map(port);
20083 }
20084
20085 named_entry = mach_memory_entry_from_port(port);
20086
20087 if ((named_entry->is_sub_map) &&
20088 (named_entry->protection & VM_PROT_WRITE)) {
20089 map = named_entry->backing.map;
20090 if (map->pmap != PMAP_NULL) {
20091 if (map->pmap == kernel_pmap) {
20092 panic("userspace has access "
20093 "to a kernel map %p", map);
20094 }
20095 pmap_require(map->pmap);
20096 }
20097 vm_map_reference(map);
20098 }
20099
20100 return map;
20101 }
20102
20103 /*
20104 * Export routines to other components for the things we access locally through
20105 * macros.
20106 */
20107 #undef current_map
20108 vm_map_t
current_map(void)20109 current_map(void)
20110 {
20111 return current_map_fast();
20112 }
20113
20114 /*
20115 * vm_map_reference:
20116 *
20117 * Takes a reference on the specified map.
20118 */
20119 void
vm_map_reference(vm_map_t map)20120 vm_map_reference(
20121 vm_map_t map)
20122 {
20123 if (__probable(map != VM_MAP_NULL)) {
20124 vm_map_require(map);
20125 os_ref_retain_raw(&map->map_refcnt, &map_refgrp);
20126 }
20127 }
20128
20129 /*
20130 * vm_map_deallocate:
20131 *
20132 * Removes a reference from the specified map,
20133 * destroying it if no references remain.
20134 * The map should not be locked.
20135 */
20136 void
vm_map_deallocate(vm_map_t map)20137 vm_map_deallocate(
20138 vm_map_t map)
20139 {
20140 if (__probable(map != VM_MAP_NULL)) {
20141 vm_map_require(map);
20142 if (os_ref_release_raw(&map->map_refcnt, &map_refgrp) == 0) {
20143 vm_map_destroy(map);
20144 }
20145 }
20146 }
20147
20148 void
vm_map_inspect_deallocate(vm_map_inspect_t map)20149 vm_map_inspect_deallocate(
20150 vm_map_inspect_t map)
20151 {
20152 vm_map_deallocate((vm_map_t)map);
20153 }
20154
20155 void
vm_map_read_deallocate(vm_map_read_t map)20156 vm_map_read_deallocate(
20157 vm_map_read_t map)
20158 {
20159 vm_map_deallocate((vm_map_t)map);
20160 }
20161
20162
20163 void
vm_map_disable_NX(vm_map_t map)20164 vm_map_disable_NX(vm_map_t map)
20165 {
20166 if (map == NULL) {
20167 return;
20168 }
20169 if (map->pmap == NULL) {
20170 return;
20171 }
20172
20173 pmap_disable_NX(map->pmap);
20174 }
20175
20176 void
vm_map_disallow_data_exec(vm_map_t map)20177 vm_map_disallow_data_exec(vm_map_t map)
20178 {
20179 if (map == NULL) {
20180 return;
20181 }
20182
20183 map->map_disallow_data_exec = TRUE;
20184 }
20185
20186 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
20187 * more descriptive.
20188 */
20189 void
vm_map_set_32bit(vm_map_t map)20190 vm_map_set_32bit(vm_map_t map)
20191 {
20192 #if defined(__arm64__)
20193 map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
20194 #else
20195 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
20196 #endif
20197 }
20198
20199
20200 void
vm_map_set_64bit(vm_map_t map)20201 vm_map_set_64bit(vm_map_t map)
20202 {
20203 #if defined(__arm64__)
20204 map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
20205 #else
20206 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
20207 #endif
20208 }
20209
20210 /*
20211 * Expand the maximum size of an existing map to the maximum supported.
20212 */
20213 void
vm_map_set_jumbo(vm_map_t map)20214 vm_map_set_jumbo(vm_map_t map)
20215 {
20216 #if defined (__arm64__) && !XNU_TARGET_OS_OSX
20217 vm_map_set_max_addr(map, ~0);
20218 #else /* arm64 */
20219 (void) map;
20220 #endif
20221 }
20222
20223 /*
20224 * This map has a JIT entitlement
20225 */
20226 void
vm_map_set_jit_entitled(vm_map_t map)20227 vm_map_set_jit_entitled(vm_map_t map)
20228 {
20229 #if defined (__arm64__)
20230 pmap_set_jit_entitled(map->pmap);
20231 #else /* arm64 */
20232 (void) map;
20233 #endif
20234 }
20235
20236 /*
20237 * This map has TPRO enabled
20238 */
20239 void
vm_map_set_tpro(vm_map_t map)20240 vm_map_set_tpro(vm_map_t map)
20241 {
20242 #if defined (__arm64e__)
20243 pmap_set_tpro(map->pmap);
20244 #else /* arm64e */
20245 (void) map;
20246 #endif
20247 }
20248
20249 /*
20250 * Expand the maximum size of an existing map.
20251 */
20252 void
vm_map_set_max_addr(vm_map_t map,vm_map_offset_t new_max_offset)20253 vm_map_set_max_addr(vm_map_t map, vm_map_offset_t new_max_offset)
20254 {
20255 #if defined(__arm64__)
20256 vm_map_offset_t max_supported_offset;
20257 vm_map_offset_t old_max_offset;
20258
20259 vm_map_lock(map);
20260
20261 old_max_offset = map->max_offset;
20262 max_supported_offset = pmap_max_offset(vm_map_is_64bit(map), ARM_PMAP_MAX_OFFSET_JUMBO);
20263
20264 new_max_offset = trunc_page(new_max_offset);
20265
20266 /* The address space cannot be shrunk using this routine. */
20267 if (old_max_offset >= new_max_offset) {
20268 vm_map_unlock(map);
20269 return;
20270 }
20271
20272 if (max_supported_offset < new_max_offset) {
20273 new_max_offset = max_supported_offset;
20274 }
20275
20276 map->max_offset = new_max_offset;
20277
20278 if (map->holelistenabled) {
20279 if (map->holes_list->prev->vme_end == old_max_offset) {
20280 /*
20281 * There is already a hole at the end of the map; simply make it bigger.
20282 */
20283 map->holes_list->prev->vme_end = map->max_offset;
20284 } else {
20285 /*
20286 * There is no hole at the end, so we need to create a new hole
20287 * for the new empty space we're creating.
20288 */
20289 struct vm_map_links *new_hole;
20290
20291 new_hole = zalloc_id(ZONE_ID_VM_MAP_HOLES, Z_WAITOK | Z_NOFAIL);
20292 new_hole->start = old_max_offset;
20293 new_hole->end = map->max_offset;
20294 new_hole->prev = map->holes_list->prev;
20295 new_hole->next = (struct vm_map_entry *)map->holes_list;
20296 map->holes_list->prev->links.next = (struct vm_map_entry *)new_hole;
20297 map->holes_list->prev = (struct vm_map_entry *)new_hole;
20298 }
20299 }
20300
20301 vm_map_unlock(map);
20302 #else
20303 (void)map;
20304 (void)new_max_offset;
20305 #endif
20306 }
20307
20308 vm_map_offset_t
vm_compute_max_offset(boolean_t is64)20309 vm_compute_max_offset(boolean_t is64)
20310 {
20311 #if defined(__arm64__)
20312 return pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE);
20313 #else
20314 return is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS;
20315 #endif
20316 }
20317
20318 void
vm_map_get_max_aslr_slide_section(vm_map_t map __unused,int64_t * max_sections,int64_t * section_size)20319 vm_map_get_max_aslr_slide_section(
20320 vm_map_t map __unused,
20321 int64_t *max_sections,
20322 int64_t *section_size)
20323 {
20324 #if defined(__arm64__)
20325 *max_sections = 3;
20326 *section_size = ARM_TT_TWIG_SIZE;
20327 #else
20328 *max_sections = 1;
20329 *section_size = 0;
20330 #endif
20331 }
20332
20333 uint64_t
vm_map_get_max_aslr_slide_pages(vm_map_t map)20334 vm_map_get_max_aslr_slide_pages(vm_map_t map)
20335 {
20336 #if defined(__arm64__)
20337 /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
20338 * limited embedded address space; this is also meant to minimize pmap
20339 * memory usage on 16KB page systems.
20340 */
20341 return 1 << (24 - VM_MAP_PAGE_SHIFT(map));
20342 #else
20343 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
20344 #endif
20345 }
20346
20347 uint64_t
vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)20348 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
20349 {
20350 #if defined(__arm64__)
20351 /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
20352 * of independent entropy on 16KB page systems.
20353 */
20354 return 1 << (22 - VM_MAP_PAGE_SHIFT(map));
20355 #else
20356 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
20357 #endif
20358 }
20359
20360 boolean_t
vm_map_is_64bit(vm_map_t map)20361 vm_map_is_64bit(
20362 vm_map_t map)
20363 {
20364 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
20365 }
20366
20367 boolean_t
vm_map_has_hard_pagezero(vm_map_t map,vm_map_offset_t pagezero_size)20368 vm_map_has_hard_pagezero(
20369 vm_map_t map,
20370 vm_map_offset_t pagezero_size)
20371 {
20372 /*
20373 * XXX FBDP
20374 * We should lock the VM map (for read) here but we can get away
20375 * with it for now because there can't really be any race condition:
20376 * the VM map's min_offset is changed only when the VM map is created
20377 * and when the zero page is established (when the binary gets loaded),
20378 * and this routine gets called only when the task terminates and the
20379 * VM map is being torn down, and when a new map is created via
20380 * load_machfile()/execve().
20381 */
20382 return map->min_offset >= pagezero_size;
20383 }
20384
20385 /*
20386 * Raise a VM map's maximun offset.
20387 */
20388 kern_return_t
vm_map_raise_max_offset(vm_map_t map,vm_map_offset_t new_max_offset)20389 vm_map_raise_max_offset(
20390 vm_map_t map,
20391 vm_map_offset_t new_max_offset)
20392 {
20393 kern_return_t ret;
20394
20395 vm_map_lock(map);
20396 ret = KERN_INVALID_ADDRESS;
20397
20398 if (new_max_offset >= map->max_offset) {
20399 if (!vm_map_is_64bit(map)) {
20400 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
20401 map->max_offset = new_max_offset;
20402 ret = KERN_SUCCESS;
20403 }
20404 } else {
20405 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
20406 map->max_offset = new_max_offset;
20407 ret = KERN_SUCCESS;
20408 }
20409 }
20410 }
20411
20412 vm_map_unlock(map);
20413 return ret;
20414 }
20415
20416
20417 /*
20418 * Raise a VM map's minimum offset.
20419 * To strictly enforce "page zero" reservation.
20420 */
20421 kern_return_t
vm_map_raise_min_offset(vm_map_t map,vm_map_offset_t new_min_offset)20422 vm_map_raise_min_offset(
20423 vm_map_t map,
20424 vm_map_offset_t new_min_offset)
20425 {
20426 vm_map_entry_t first_entry;
20427
20428 new_min_offset = vm_map_round_page(new_min_offset,
20429 VM_MAP_PAGE_MASK(map));
20430
20431 vm_map_lock(map);
20432
20433 if (new_min_offset < map->min_offset) {
20434 /*
20435 * Can't move min_offset backwards, as that would expose
20436 * a part of the address space that was previously, and for
20437 * possibly good reasons, inaccessible.
20438 */
20439 vm_map_unlock(map);
20440 return KERN_INVALID_ADDRESS;
20441 }
20442 if (new_min_offset >= map->max_offset) {
20443 /* can't go beyond the end of the address space */
20444 vm_map_unlock(map);
20445 return KERN_INVALID_ADDRESS;
20446 }
20447
20448 first_entry = vm_map_first_entry(map);
20449 if (first_entry != vm_map_to_entry(map) &&
20450 first_entry->vme_start < new_min_offset) {
20451 /*
20452 * Some memory was already allocated below the new
20453 * minimun offset. It's too late to change it now...
20454 */
20455 vm_map_unlock(map);
20456 return KERN_NO_SPACE;
20457 }
20458
20459 map->min_offset = new_min_offset;
20460
20461 if (map->holelistenabled) {
20462 assert(map->holes_list);
20463 map->holes_list->start = new_min_offset;
20464 assert(new_min_offset < map->holes_list->end);
20465 }
20466
20467 vm_map_unlock(map);
20468
20469 return KERN_SUCCESS;
20470 }
20471
20472 /*
20473 * Set the limit on the maximum amount of address space and user wired memory allowed for this map.
20474 * This is basically a copy of the RLIMIT_AS and RLIMIT_MEMLOCK rlimit value maintained by the BSD
20475 * side of the kernel. The limits are checked in the mach VM side, so we keep a copy so we don't
20476 * have to reach over to the BSD data structures.
20477 */
20478
20479 uint64_t vm_map_set_size_limit_count = 0;
20480 kern_return_t
vm_map_set_size_limit(vm_map_t map,uint64_t new_size_limit)20481 vm_map_set_size_limit(vm_map_t map, uint64_t new_size_limit)
20482 {
20483 kern_return_t kr;
20484
20485 vm_map_lock(map);
20486 if (new_size_limit < map->size) {
20487 /* new limit should not be lower than its current size */
20488 DTRACE_VM2(vm_map_set_size_limit_fail,
20489 vm_map_size_t, map->size,
20490 uint64_t, new_size_limit);
20491 kr = KERN_FAILURE;
20492 } else if (new_size_limit == map->size_limit) {
20493 /* no change */
20494 kr = KERN_SUCCESS;
20495 } else {
20496 /* set new limit */
20497 DTRACE_VM2(vm_map_set_size_limit,
20498 vm_map_size_t, map->size,
20499 uint64_t, new_size_limit);
20500 if (new_size_limit != RLIM_INFINITY) {
20501 vm_map_set_size_limit_count++;
20502 }
20503 map->size_limit = new_size_limit;
20504 kr = KERN_SUCCESS;
20505 }
20506 vm_map_unlock(map);
20507 return kr;
20508 }
20509
20510 uint64_t vm_map_set_data_limit_count = 0;
20511 kern_return_t
vm_map_set_data_limit(vm_map_t map,uint64_t new_data_limit)20512 vm_map_set_data_limit(vm_map_t map, uint64_t new_data_limit)
20513 {
20514 kern_return_t kr;
20515
20516 vm_map_lock(map);
20517 if (new_data_limit < map->size) {
20518 /* new limit should not be lower than its current size */
20519 DTRACE_VM2(vm_map_set_data_limit_fail,
20520 vm_map_size_t, map->size,
20521 uint64_t, new_data_limit);
20522 kr = KERN_FAILURE;
20523 } else if (new_data_limit == map->data_limit) {
20524 /* no change */
20525 kr = KERN_SUCCESS;
20526 } else {
20527 /* set new limit */
20528 DTRACE_VM2(vm_map_set_data_limit,
20529 vm_map_size_t, map->size,
20530 uint64_t, new_data_limit);
20531 if (new_data_limit != RLIM_INFINITY) {
20532 vm_map_set_data_limit_count++;
20533 }
20534 map->data_limit = new_data_limit;
20535 kr = KERN_SUCCESS;
20536 }
20537 vm_map_unlock(map);
20538 return kr;
20539 }
20540
20541 void
vm_map_set_user_wire_limit(vm_map_t map,vm_size_t limit)20542 vm_map_set_user_wire_limit(vm_map_t map,
20543 vm_size_t limit)
20544 {
20545 vm_map_lock(map);
20546 map->user_wire_limit = limit;
20547 vm_map_unlock(map);
20548 }
20549
20550
20551 void
vm_map_switch_protect(vm_map_t map,boolean_t val)20552 vm_map_switch_protect(vm_map_t map,
20553 boolean_t val)
20554 {
20555 vm_map_lock(map);
20556 map->switch_protect = val;
20557 vm_map_unlock(map);
20558 }
20559
20560 extern int cs_process_enforcement_enable;
20561 boolean_t
vm_map_cs_enforcement(vm_map_t map)20562 vm_map_cs_enforcement(
20563 vm_map_t map)
20564 {
20565 if (cs_process_enforcement_enable) {
20566 return TRUE;
20567 }
20568 return map->cs_enforcement;
20569 }
20570
20571 kern_return_t
vm_map_cs_wx_enable(vm_map_t map)20572 vm_map_cs_wx_enable(
20573 vm_map_t map)
20574 {
20575 return pmap_cs_allow_invalid(vm_map_pmap(map));
20576 }
20577
20578 void
vm_map_cs_debugged_set(vm_map_t map,boolean_t val)20579 vm_map_cs_debugged_set(
20580 vm_map_t map,
20581 boolean_t val)
20582 {
20583 vm_map_lock(map);
20584 map->cs_debugged = val;
20585 vm_map_unlock(map);
20586 }
20587
20588 void
vm_map_cs_enforcement_set(vm_map_t map,boolean_t val)20589 vm_map_cs_enforcement_set(
20590 vm_map_t map,
20591 boolean_t val)
20592 {
20593 vm_map_lock(map);
20594 map->cs_enforcement = val;
20595 pmap_set_vm_map_cs_enforced(map->pmap, val);
20596 vm_map_unlock(map);
20597 }
20598
20599 /*
20600 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
20601 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
20602 * bump both counters.
20603 */
20604 void
vm_map_iokit_mapped_region(vm_map_t map,vm_size_t bytes)20605 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
20606 {
20607 pmap_t pmap = vm_map_pmap(map);
20608
20609 ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
20610 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
20611 }
20612
20613 void
vm_map_iokit_unmapped_region(vm_map_t map,vm_size_t bytes)20614 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
20615 {
20616 pmap_t pmap = vm_map_pmap(map);
20617
20618 ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
20619 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
20620 }
20621
20622 /* Add (generate) code signature for memory range */
20623 #if CONFIG_DYNAMIC_CODE_SIGNING
20624 kern_return_t
vm_map_sign(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)20625 vm_map_sign(vm_map_t map,
20626 vm_map_offset_t start,
20627 vm_map_offset_t end)
20628 {
20629 vm_map_entry_t entry;
20630 vm_page_t m;
20631 vm_object_t object;
20632
20633 /*
20634 * Vet all the input parameters and current type and state of the
20635 * underlaying object. Return with an error if anything is amiss.
20636 */
20637 if (map == VM_MAP_NULL) {
20638 return KERN_INVALID_ARGUMENT;
20639 }
20640
20641 vm_map_lock_read(map);
20642
20643 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
20644 /*
20645 * Must pass a valid non-submap address.
20646 */
20647 vm_map_unlock_read(map);
20648 return KERN_INVALID_ADDRESS;
20649 }
20650
20651 if ((entry->vme_start > start) || (entry->vme_end < end)) {
20652 /*
20653 * Map entry doesn't cover the requested range. Not handling
20654 * this situation currently.
20655 */
20656 vm_map_unlock_read(map);
20657 return KERN_INVALID_ARGUMENT;
20658 }
20659
20660 object = VME_OBJECT(entry);
20661 if (object == VM_OBJECT_NULL) {
20662 /*
20663 * Object must already be present or we can't sign.
20664 */
20665 vm_map_unlock_read(map);
20666 return KERN_INVALID_ARGUMENT;
20667 }
20668
20669 vm_object_lock(object);
20670 vm_map_unlock_read(map);
20671
20672 while (start < end) {
20673 uint32_t refmod;
20674
20675 m = vm_page_lookup(object,
20676 start - entry->vme_start + VME_OFFSET(entry));
20677 if (m == VM_PAGE_NULL) {
20678 /* shoud we try to fault a page here? we can probably
20679 * demand it exists and is locked for this request */
20680 vm_object_unlock(object);
20681 return KERN_FAILURE;
20682 }
20683 /* deal with special page status */
20684 if (m->vmp_busy ||
20685 (m->vmp_unusual && (VMP_ERROR_GET(m) || m->vmp_restart || m->vmp_private || m->vmp_absent))) {
20686 vm_object_unlock(object);
20687 return KERN_FAILURE;
20688 }
20689
20690 /* Page is OK... now "validate" it */
20691 /* This is the place where we'll call out to create a code
20692 * directory, later */
20693 /* XXX TODO4K: deal with 4k subpages individually? */
20694 m->vmp_cs_validated = VMP_CS_ALL_TRUE;
20695
20696 /* The page is now "clean" for codesigning purposes. That means
20697 * we don't consider it as modified (wpmapped) anymore. But
20698 * we'll disconnect the page so we note any future modification
20699 * attempts. */
20700 m->vmp_wpmapped = FALSE;
20701 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
20702
20703 /* Pull the dirty status from the pmap, since we cleared the
20704 * wpmapped bit */
20705 if ((refmod & VM_MEM_MODIFIED) && !m->vmp_dirty) {
20706 SET_PAGE_DIRTY(m, FALSE);
20707 }
20708
20709 /* On to the next page */
20710 start += PAGE_SIZE;
20711 }
20712 vm_object_unlock(object);
20713
20714 return KERN_SUCCESS;
20715 }
20716 #endif
20717
20718 kern_return_t
vm_map_partial_reap(vm_map_t map,unsigned int * reclaimed_resident,unsigned int * reclaimed_compressed)20719 vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
20720 {
20721 vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
20722 vm_map_entry_t next_entry;
20723 kern_return_t kr = KERN_SUCCESS;
20724 VM_MAP_ZAP_DECLARE(zap_list);
20725
20726 vm_map_lock(map);
20727
20728 for (entry = vm_map_first_entry(map);
20729 entry != vm_map_to_entry(map);
20730 entry = next_entry) {
20731 next_entry = entry->vme_next;
20732
20733 if (!entry->is_sub_map &&
20734 VME_OBJECT(entry) &&
20735 (VME_OBJECT(entry)->internal == TRUE) &&
20736 (VME_OBJECT(entry)->ref_count == 1)) {
20737 *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
20738 *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
20739
20740 (void)vm_map_delete(map, entry->vme_start,
20741 entry->vme_end, VM_MAP_REMOVE_NO_YIELD,
20742 KMEM_GUARD_NONE, &zap_list);
20743 }
20744 }
20745
20746 vm_map_unlock(map);
20747
20748 vm_map_zap_dispose(&zap_list);
20749
20750 return kr;
20751 }
20752
20753
20754 #if DEVELOPMENT || DEBUG
20755
20756 int
vm_map_disconnect_page_mappings(vm_map_t map,boolean_t do_unnest)20757 vm_map_disconnect_page_mappings(
20758 vm_map_t map,
20759 boolean_t do_unnest)
20760 {
20761 vm_map_entry_t entry;
20762 ledger_amount_t byte_count = 0;
20763
20764 if (do_unnest == TRUE) {
20765 #ifndef NO_NESTED_PMAP
20766 vm_map_lock(map);
20767
20768 for (entry = vm_map_first_entry(map);
20769 entry != vm_map_to_entry(map);
20770 entry = entry->vme_next) {
20771 if (entry->is_sub_map && entry->use_pmap) {
20772 /*
20773 * Make sure the range between the start of this entry and
20774 * the end of this entry is no longer nested, so that
20775 * we will only remove mappings from the pmap in use by this
20776 * this task
20777 */
20778 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
20779 }
20780 }
20781 vm_map_unlock(map);
20782 #endif
20783 }
20784 vm_map_lock_read(map);
20785
20786 ledger_get_balance(map->pmap->ledger, task_ledgers.phys_mem, &byte_count);
20787
20788 for (entry = vm_map_first_entry(map);
20789 entry != vm_map_to_entry(map);
20790 entry = entry->vme_next) {
20791 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
20792 (VME_OBJECT(entry)->phys_contiguous))) {
20793 continue;
20794 }
20795 if (entry->is_sub_map) {
20796 assert(!entry->use_pmap);
20797 }
20798
20799 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
20800 }
20801 vm_map_unlock_read(map);
20802
20803 return (int) (byte_count / VM_MAP_PAGE_SIZE(map));
20804 }
20805
20806 kern_return_t
vm_map_inject_error(vm_map_t map,vm_map_offset_t vaddr)20807 vm_map_inject_error(vm_map_t map, vm_map_offset_t vaddr)
20808 {
20809 vm_object_t object = NULL;
20810 vm_object_offset_t offset;
20811 vm_prot_t prot;
20812 boolean_t wired;
20813 vm_map_version_t version;
20814 vm_map_t real_map;
20815 int result = KERN_FAILURE;
20816
20817 vaddr = vm_map_trunc_page(vaddr, PAGE_MASK);
20818 vm_map_lock(map);
20819
20820 result = vm_map_lookup_and_lock_object(&map, vaddr, VM_PROT_READ,
20821 OBJECT_LOCK_EXCLUSIVE, &version, &object, &offset, &prot, &wired,
20822 NULL, &real_map, NULL);
20823 if (object == NULL) {
20824 result = KERN_MEMORY_ERROR;
20825 } else if (object->pager) {
20826 result = vm_compressor_pager_inject_error(object->pager,
20827 offset);
20828 } else {
20829 result = KERN_MEMORY_PRESENT;
20830 }
20831
20832 if (object != NULL) {
20833 vm_object_unlock(object);
20834 }
20835
20836 if (real_map != map) {
20837 vm_map_unlock(real_map);
20838 }
20839 vm_map_unlock(map);
20840
20841 return result;
20842 }
20843
20844 #endif
20845
20846
20847 #if CONFIG_FREEZE
20848
20849
20850 extern struct freezer_context freezer_context_global;
20851 AbsoluteTime c_freezer_last_yield_ts = 0;
20852
20853 extern unsigned int memorystatus_freeze_private_shared_pages_ratio;
20854 extern unsigned int memorystatus_freeze_shared_mb_per_process_max;
20855
20856 kern_return_t
vm_map_freeze(task_t task,unsigned int * purgeable_count,unsigned int * wired_count,unsigned int * clean_count,unsigned int * dirty_count,unsigned int dirty_budget,unsigned int * shared_count,int * freezer_error_code,boolean_t eval_only)20857 vm_map_freeze(
20858 task_t task,
20859 unsigned int *purgeable_count,
20860 unsigned int *wired_count,
20861 unsigned int *clean_count,
20862 unsigned int *dirty_count,
20863 unsigned int dirty_budget,
20864 unsigned int *shared_count,
20865 int *freezer_error_code,
20866 boolean_t eval_only)
20867 {
20868 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
20869 kern_return_t kr = KERN_SUCCESS;
20870 boolean_t evaluation_phase = TRUE;
20871 vm_object_t cur_shared_object = NULL;
20872 int cur_shared_obj_ref_cnt = 0;
20873 unsigned int dirty_private_count = 0, dirty_shared_count = 0, obj_pages_snapshot = 0;
20874
20875 *purgeable_count = *wired_count = *clean_count = *dirty_count = *shared_count = 0;
20876
20877 /*
20878 * We need the exclusive lock here so that we can
20879 * block any page faults or lookups while we are
20880 * in the middle of freezing this vm map.
20881 */
20882 vm_map_t map = task->map;
20883
20884 vm_map_lock(map);
20885
20886 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
20887
20888 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
20889 if (vm_compressor_low_on_space()) {
20890 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
20891 }
20892
20893 if (vm_swap_low_on_space()) {
20894 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
20895 }
20896
20897 kr = KERN_NO_SPACE;
20898 goto done;
20899 }
20900
20901 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
20902 /*
20903 * In-memory compressor backing the freezer. No disk.
20904 * So no need to do the evaluation phase.
20905 */
20906 evaluation_phase = FALSE;
20907
20908 if (eval_only == TRUE) {
20909 /*
20910 * We don't support 'eval_only' mode
20911 * in this non-swap config.
20912 */
20913 *freezer_error_code = FREEZER_ERROR_GENERIC;
20914 kr = KERN_INVALID_ARGUMENT;
20915 goto done;
20916 }
20917
20918 freezer_context_global.freezer_ctx_uncompressed_pages = 0;
20919 clock_get_uptime(&c_freezer_last_yield_ts);
20920 }
20921 again:
20922
20923 for (entry2 = vm_map_first_entry(map);
20924 entry2 != vm_map_to_entry(map);
20925 entry2 = entry2->vme_next) {
20926 vm_object_t src_object;
20927
20928 if (entry2->is_sub_map) {
20929 continue;
20930 }
20931
20932 src_object = VME_OBJECT(entry2);
20933 if (!src_object ||
20934 src_object->phys_contiguous ||
20935 !src_object->internal) {
20936 continue;
20937 }
20938
20939 /* If eligible, scan the entry, moving eligible pages over to our parent object */
20940
20941 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
20942 /*
20943 * We skip purgeable objects during evaluation phase only.
20944 * If we decide to freeze this process, we'll explicitly
20945 * purge these objects before we go around again with
20946 * 'evaluation_phase' set to FALSE.
20947 */
20948
20949 if ((src_object->purgable == VM_PURGABLE_EMPTY) || (src_object->purgable == VM_PURGABLE_VOLATILE)) {
20950 /*
20951 * We want to purge objects that may not belong to this task but are mapped
20952 * in this task alone. Since we already purged this task's purgeable memory
20953 * at the end of a successful evaluation phase, we want to avoid doing no-op calls
20954 * on this task's purgeable objects. Hence the check for only volatile objects.
20955 */
20956 if (evaluation_phase == FALSE &&
20957 (src_object->purgable == VM_PURGABLE_VOLATILE) &&
20958 (src_object->ref_count == 1)) {
20959 vm_object_lock(src_object);
20960 vm_object_purge(src_object, 0);
20961 vm_object_unlock(src_object);
20962 }
20963 continue;
20964 }
20965
20966 /*
20967 * Pages belonging to this object could be swapped to disk.
20968 * Make sure it's not a shared object because we could end
20969 * up just bringing it back in again.
20970 *
20971 * We try to optimize somewhat by checking for objects that are mapped
20972 * more than once within our own map. But we don't do full searches,
20973 * we just look at the entries following our current entry.
20974 */
20975
20976 if (src_object->ref_count > 1) {
20977 if (src_object != cur_shared_object) {
20978 obj_pages_snapshot = (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
20979 dirty_shared_count += obj_pages_snapshot;
20980
20981 cur_shared_object = src_object;
20982 cur_shared_obj_ref_cnt = 1;
20983 continue;
20984 } else {
20985 cur_shared_obj_ref_cnt++;
20986 if (src_object->ref_count == cur_shared_obj_ref_cnt) {
20987 /*
20988 * Fall through to below and treat this object as private.
20989 * So deduct its pages from our shared total and add it to the
20990 * private total.
20991 */
20992
20993 dirty_shared_count -= obj_pages_snapshot;
20994 dirty_private_count += obj_pages_snapshot;
20995 } else {
20996 continue;
20997 }
20998 }
20999 }
21000
21001
21002 if (src_object->ref_count == 1) {
21003 dirty_private_count += (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
21004 }
21005
21006 if (evaluation_phase == TRUE) {
21007 continue;
21008 }
21009 }
21010
21011 uint32_t paged_out_count = vm_object_compressed_freezer_pageout(src_object, dirty_budget);
21012 *wired_count += src_object->wired_page_count;
21013
21014 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
21015 if (vm_compressor_low_on_space()) {
21016 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
21017 }
21018
21019 if (vm_swap_low_on_space()) {
21020 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
21021 }
21022
21023 kr = KERN_NO_SPACE;
21024 break;
21025 }
21026 if (paged_out_count >= dirty_budget) {
21027 break;
21028 }
21029 dirty_budget -= paged_out_count;
21030 }
21031
21032 *shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
21033 if (evaluation_phase) {
21034 unsigned int shared_pages_threshold = (memorystatus_freeze_shared_mb_per_process_max * 1024 * 1024ULL) / PAGE_SIZE_64;
21035
21036 if (dirty_shared_count > shared_pages_threshold) {
21037 *freezer_error_code = FREEZER_ERROR_EXCESS_SHARED_MEMORY;
21038 kr = KERN_FAILURE;
21039 goto done;
21040 }
21041
21042 if (dirty_shared_count &&
21043 ((dirty_private_count / dirty_shared_count) < memorystatus_freeze_private_shared_pages_ratio)) {
21044 *freezer_error_code = FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO;
21045 kr = KERN_FAILURE;
21046 goto done;
21047 }
21048
21049 evaluation_phase = FALSE;
21050 dirty_shared_count = dirty_private_count = 0;
21051
21052 freezer_context_global.freezer_ctx_uncompressed_pages = 0;
21053 clock_get_uptime(&c_freezer_last_yield_ts);
21054
21055 if (eval_only) {
21056 kr = KERN_SUCCESS;
21057 goto done;
21058 }
21059
21060 vm_purgeable_purge_task_owned(task);
21061
21062 goto again;
21063 } else {
21064 kr = KERN_SUCCESS;
21065 }
21066
21067 done:
21068 vm_map_unlock(map);
21069
21070 if ((eval_only == FALSE) && (kr == KERN_SUCCESS)) {
21071 vm_object_compressed_freezer_done();
21072 }
21073 return kr;
21074 }
21075
21076 #endif
21077
21078 /*
21079 * vm_map_entry_should_cow_for_true_share:
21080 *
21081 * Determines if the map entry should be clipped and setup for copy-on-write
21082 * to avoid applying "true_share" to a large VM object when only a subset is
21083 * targeted.
21084 *
21085 * For now, we target only the map entries created for the Objective C
21086 * Garbage Collector, which initially have the following properties:
21087 * - alias == VM_MEMORY_MALLOC
21088 * - wired_count == 0
21089 * - !needs_copy
21090 * and a VM object with:
21091 * - internal
21092 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
21093 * - !true_share
21094 * - vo_size == ANON_CHUNK_SIZE
21095 *
21096 * Only non-kernel map entries.
21097 */
21098 boolean_t
vm_map_entry_should_cow_for_true_share(vm_map_entry_t entry)21099 vm_map_entry_should_cow_for_true_share(
21100 vm_map_entry_t entry)
21101 {
21102 vm_object_t object;
21103
21104 if (entry->is_sub_map) {
21105 /* entry does not point at a VM object */
21106 return FALSE;
21107 }
21108
21109 if (entry->needs_copy) {
21110 /* already set for copy_on_write: done! */
21111 return FALSE;
21112 }
21113
21114 if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
21115 VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
21116 /* not a malloc heap or Obj-C Garbage Collector heap */
21117 return FALSE;
21118 }
21119
21120 if (entry->wired_count) {
21121 /* wired: can't change the map entry... */
21122 vm_counters.should_cow_but_wired++;
21123 return FALSE;
21124 }
21125
21126 object = VME_OBJECT(entry);
21127
21128 if (object == VM_OBJECT_NULL) {
21129 /* no object yet... */
21130 return FALSE;
21131 }
21132
21133 if (!object->internal) {
21134 /* not an internal object */
21135 return FALSE;
21136 }
21137
21138 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
21139 /* not the default copy strategy */
21140 return FALSE;
21141 }
21142
21143 if (object->true_share) {
21144 /* already true_share: too late to avoid it */
21145 return FALSE;
21146 }
21147
21148 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
21149 object->vo_size != ANON_CHUNK_SIZE) {
21150 /* ... not an object created for the ObjC Garbage Collector */
21151 return FALSE;
21152 }
21153
21154 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
21155 object->vo_size != 2048 * 4096) {
21156 /* ... not a "MALLOC_SMALL" heap */
21157 return FALSE;
21158 }
21159
21160 /*
21161 * All the criteria match: we have a large object being targeted for "true_share".
21162 * To limit the adverse side-effects linked with "true_share", tell the caller to
21163 * try and avoid setting up the entire object for "true_share" by clipping the
21164 * targeted range and setting it up for copy-on-write.
21165 */
21166 return TRUE;
21167 }
21168
21169 vm_map_offset_t
vm_map_round_page_mask(vm_map_offset_t offset,vm_map_offset_t mask)21170 vm_map_round_page_mask(
21171 vm_map_offset_t offset,
21172 vm_map_offset_t mask)
21173 {
21174 return VM_MAP_ROUND_PAGE(offset, mask);
21175 }
21176
21177 vm_map_offset_t
vm_map_trunc_page_mask(vm_map_offset_t offset,vm_map_offset_t mask)21178 vm_map_trunc_page_mask(
21179 vm_map_offset_t offset,
21180 vm_map_offset_t mask)
21181 {
21182 return VM_MAP_TRUNC_PAGE(offset, mask);
21183 }
21184
21185 boolean_t
vm_map_page_aligned(vm_map_offset_t offset,vm_map_offset_t mask)21186 vm_map_page_aligned(
21187 vm_map_offset_t offset,
21188 vm_map_offset_t mask)
21189 {
21190 return ((offset) & mask) == 0;
21191 }
21192
21193 int
vm_map_page_shift(vm_map_t map)21194 vm_map_page_shift(
21195 vm_map_t map)
21196 {
21197 return VM_MAP_PAGE_SHIFT(map);
21198 }
21199
21200 int
vm_map_page_size(vm_map_t map)21201 vm_map_page_size(
21202 vm_map_t map)
21203 {
21204 return VM_MAP_PAGE_SIZE(map);
21205 }
21206
21207 vm_map_offset_t
vm_map_page_mask(vm_map_t map)21208 vm_map_page_mask(
21209 vm_map_t map)
21210 {
21211 return VM_MAP_PAGE_MASK(map);
21212 }
21213
21214 kern_return_t
vm_map_set_page_shift(vm_map_t map,int pageshift)21215 vm_map_set_page_shift(
21216 vm_map_t map,
21217 int pageshift)
21218 {
21219 if (map->hdr.nentries != 0) {
21220 /* too late to change page size */
21221 return KERN_FAILURE;
21222 }
21223
21224 map->hdr.page_shift = (uint16_t)pageshift;
21225
21226 return KERN_SUCCESS;
21227 }
21228
21229 kern_return_t
vm_map_query_volatile(vm_map_t map,mach_vm_size_t * volatile_virtual_size_p,mach_vm_size_t * volatile_resident_size_p,mach_vm_size_t * volatile_compressed_size_p,mach_vm_size_t * volatile_pmap_size_p,mach_vm_size_t * volatile_compressed_pmap_size_p)21230 vm_map_query_volatile(
21231 vm_map_t map,
21232 mach_vm_size_t *volatile_virtual_size_p,
21233 mach_vm_size_t *volatile_resident_size_p,
21234 mach_vm_size_t *volatile_compressed_size_p,
21235 mach_vm_size_t *volatile_pmap_size_p,
21236 mach_vm_size_t *volatile_compressed_pmap_size_p)
21237 {
21238 mach_vm_size_t volatile_virtual_size;
21239 mach_vm_size_t volatile_resident_count;
21240 mach_vm_size_t volatile_compressed_count;
21241 mach_vm_size_t volatile_pmap_count;
21242 mach_vm_size_t volatile_compressed_pmap_count;
21243 mach_vm_size_t resident_count;
21244 vm_map_entry_t entry;
21245 vm_object_t object;
21246
21247 /* map should be locked by caller */
21248
21249 volatile_virtual_size = 0;
21250 volatile_resident_count = 0;
21251 volatile_compressed_count = 0;
21252 volatile_pmap_count = 0;
21253 volatile_compressed_pmap_count = 0;
21254
21255 for (entry = vm_map_first_entry(map);
21256 entry != vm_map_to_entry(map);
21257 entry = entry->vme_next) {
21258 mach_vm_size_t pmap_resident_bytes, pmap_compressed_bytes;
21259
21260 if (entry->is_sub_map) {
21261 continue;
21262 }
21263 if (!(entry->protection & VM_PROT_WRITE)) {
21264 continue;
21265 }
21266 object = VME_OBJECT(entry);
21267 if (object == VM_OBJECT_NULL) {
21268 continue;
21269 }
21270 if (object->purgable != VM_PURGABLE_VOLATILE &&
21271 object->purgable != VM_PURGABLE_EMPTY) {
21272 continue;
21273 }
21274 if (VME_OFFSET(entry)) {
21275 /*
21276 * If the map entry has been split and the object now
21277 * appears several times in the VM map, we don't want
21278 * to count the object's resident_page_count more than
21279 * once. We count it only for the first one, starting
21280 * at offset 0 and ignore the other VM map entries.
21281 */
21282 continue;
21283 }
21284 resident_count = object->resident_page_count;
21285 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
21286 resident_count = 0;
21287 } else {
21288 resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
21289 }
21290
21291 volatile_virtual_size += entry->vme_end - entry->vme_start;
21292 volatile_resident_count += resident_count;
21293 if (object->pager) {
21294 volatile_compressed_count +=
21295 vm_compressor_pager_get_count(object->pager);
21296 }
21297 pmap_compressed_bytes = 0;
21298 pmap_resident_bytes =
21299 pmap_query_resident(map->pmap,
21300 entry->vme_start,
21301 entry->vme_end,
21302 &pmap_compressed_bytes);
21303 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
21304 volatile_compressed_pmap_count += (pmap_compressed_bytes
21305 / PAGE_SIZE);
21306 }
21307
21308 /* map is still locked on return */
21309
21310 *volatile_virtual_size_p = volatile_virtual_size;
21311 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
21312 *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
21313 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
21314 *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
21315
21316 return KERN_SUCCESS;
21317 }
21318
21319 void
vm_map_sizes(vm_map_t map,vm_map_size_t * psize,vm_map_size_t * pfree,vm_map_size_t * plargest_free)21320 vm_map_sizes(vm_map_t map,
21321 vm_map_size_t * psize,
21322 vm_map_size_t * pfree,
21323 vm_map_size_t * plargest_free)
21324 {
21325 vm_map_entry_t entry;
21326 vm_map_offset_t prev;
21327 vm_map_size_t free, total_free, largest_free;
21328 boolean_t end;
21329
21330 if (!map) {
21331 *psize = *pfree = *plargest_free = 0;
21332 return;
21333 }
21334 total_free = largest_free = 0;
21335
21336 vm_map_lock_read(map);
21337 if (psize) {
21338 *psize = map->max_offset - map->min_offset;
21339 }
21340
21341 prev = map->min_offset;
21342 for (entry = vm_map_first_entry(map);; entry = entry->vme_next) {
21343 end = (entry == vm_map_to_entry(map));
21344
21345 if (end) {
21346 free = entry->vme_end - prev;
21347 } else {
21348 free = entry->vme_start - prev;
21349 }
21350
21351 total_free += free;
21352 if (free > largest_free) {
21353 largest_free = free;
21354 }
21355
21356 if (end) {
21357 break;
21358 }
21359 prev = entry->vme_end;
21360 }
21361 vm_map_unlock_read(map);
21362 if (pfree) {
21363 *pfree = total_free;
21364 }
21365 if (plargest_free) {
21366 *plargest_free = largest_free;
21367 }
21368 }
21369
21370 #if VM_SCAN_FOR_SHADOW_CHAIN
21371 int vm_map_shadow_max(vm_map_t map);
21372 int
vm_map_shadow_max(vm_map_t map)21373 vm_map_shadow_max(
21374 vm_map_t map)
21375 {
21376 int shadows, shadows_max;
21377 vm_map_entry_t entry;
21378 vm_object_t object, next_object;
21379
21380 if (map == NULL) {
21381 return 0;
21382 }
21383
21384 shadows_max = 0;
21385
21386 vm_map_lock_read(map);
21387
21388 for (entry = vm_map_first_entry(map);
21389 entry != vm_map_to_entry(map);
21390 entry = entry->vme_next) {
21391 if (entry->is_sub_map) {
21392 continue;
21393 }
21394 object = VME_OBJECT(entry);
21395 if (object == NULL) {
21396 continue;
21397 }
21398 vm_object_lock_shared(object);
21399 for (shadows = 0;
21400 object->shadow != NULL;
21401 shadows++, object = next_object) {
21402 next_object = object->shadow;
21403 vm_object_lock_shared(next_object);
21404 vm_object_unlock(object);
21405 }
21406 vm_object_unlock(object);
21407 if (shadows > shadows_max) {
21408 shadows_max = shadows;
21409 }
21410 }
21411
21412 vm_map_unlock_read(map);
21413
21414 return shadows_max;
21415 }
21416 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
21417
21418 void
vm_commit_pagezero_status(vm_map_t lmap)21419 vm_commit_pagezero_status(vm_map_t lmap)
21420 {
21421 pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
21422 }
21423
21424 #if XNU_TARGET_OS_OSX
21425 void
vm_map_set_high_start(vm_map_t map,vm_map_offset_t high_start)21426 vm_map_set_high_start(
21427 vm_map_t map,
21428 vm_map_offset_t high_start)
21429 {
21430 map->vmmap_high_start = high_start;
21431 }
21432 #endif /* XNU_TARGET_OS_OSX */
21433
21434
21435 /*
21436 * FORKED CORPSE FOOTPRINT
21437 *
21438 * A forked corpse gets a copy of the original VM map but its pmap is mostly
21439 * empty since it never ran and never got to fault in any pages.
21440 * Collecting footprint info (via "sysctl vm.self_region_footprint") for
21441 * a forked corpse would therefore return very little information.
21442 *
21443 * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
21444 * to vm_map_fork() to collect footprint information from the original VM map
21445 * and its pmap, and store it in the forked corpse's VM map. That information
21446 * is stored in place of the VM map's "hole list" since we'll never need to
21447 * lookup for holes in the corpse's map.
21448 *
21449 * The corpse's footprint info looks like this:
21450 *
21451 * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
21452 * as follows:
21453 * +---------------------------------------+
21454 * header-> | cf_size |
21455 * +-------------------+-------------------+
21456 * | cf_last_region | cf_last_zeroes |
21457 * +-------------------+-------------------+
21458 * region1-> | cfr_vaddr |
21459 * +-------------------+-------------------+
21460 * | cfr_num_pages | d0 | d1 | d2 | d3 |
21461 * +---------------------------------------+
21462 * | d4 | d5 | ... |
21463 * +---------------------------------------+
21464 * | ... |
21465 * +-------------------+-------------------+
21466 * | dy | dz | na | na | cfr_vaddr... | <-region2
21467 * +-------------------+-------------------+
21468 * | cfr_vaddr (ctd) | cfr_num_pages |
21469 * +---------------------------------------+
21470 * | d0 | d1 ... |
21471 * +---------------------------------------+
21472 * ...
21473 * +---------------------------------------+
21474 * last region-> | cfr_vaddr |
21475 * +---------------------------------------+
21476 * + cfr_num_pages | d0 | d1 | d2 | d3 |
21477 * +---------------------------------------+
21478 * ...
21479 * +---------------------------------------+
21480 * | dx | dy | dz | na | na | na | na | na |
21481 * +---------------------------------------+
21482 *
21483 * where:
21484 * cf_size: total size of the buffer (rounded to page size)
21485 * cf_last_region: offset in the buffer of the last "region" sub-header
21486 * cf_last_zeroes: number of trailing "zero" dispositions at the end
21487 * of last region
21488 * cfr_vaddr: virtual address of the start of the covered "region"
21489 * cfr_num_pages: number of pages in the covered "region"
21490 * d*: disposition of the page at that virtual address
21491 * Regions in the buffer are word-aligned.
21492 *
21493 * We estimate the size of the buffer based on the number of memory regions
21494 * and the virtual size of the address space. While copying each memory region
21495 * during vm_map_fork(), we also collect the footprint info for that region
21496 * and store it in the buffer, packing it as much as possible (coalescing
21497 * contiguous memory regions to avoid having too many region headers and
21498 * avoiding long streaks of "zero" page dispositions by splitting footprint
21499 * "regions", so the number of regions in the footprint buffer might not match
21500 * the number of memory regions in the address space.
21501 *
21502 * We also have to copy the original task's "nonvolatile" ledgers since that's
21503 * part of the footprint and will need to be reported to any tool asking for
21504 * the footprint information of the forked corpse.
21505 */
21506
21507 uint64_t vm_map_corpse_footprint_count = 0;
21508 uint64_t vm_map_corpse_footprint_size_avg = 0;
21509 uint64_t vm_map_corpse_footprint_size_max = 0;
21510 uint64_t vm_map_corpse_footprint_full = 0;
21511 uint64_t vm_map_corpse_footprint_no_buf = 0;
21512
21513 struct vm_map_corpse_footprint_header {
21514 vm_size_t cf_size; /* allocated buffer size */
21515 uint32_t cf_last_region; /* offset of last region in buffer */
21516 union {
21517 uint32_t cfu_last_zeroes; /* during creation:
21518 * number of "zero" dispositions at
21519 * end of last region */
21520 uint32_t cfu_hint_region; /* during lookup:
21521 * offset of last looked up region */
21522 #define cf_last_zeroes cfu.cfu_last_zeroes
21523 #define cf_hint_region cfu.cfu_hint_region
21524 } cfu;
21525 };
21526 typedef uint8_t cf_disp_t;
21527 struct vm_map_corpse_footprint_region {
21528 vm_map_offset_t cfr_vaddr; /* region start virtual address */
21529 uint32_t cfr_num_pages; /* number of pages in this "region" */
21530 cf_disp_t cfr_disposition[0]; /* disposition of each page */
21531 } __attribute__((packed));
21532
21533 static cf_disp_t
vm_page_disposition_to_cf_disp(int disposition)21534 vm_page_disposition_to_cf_disp(
21535 int disposition)
21536 {
21537 assert(sizeof(cf_disp_t) == 1);
21538 /* relocate bits that don't fit in a "uint8_t" */
21539 if (disposition & VM_PAGE_QUERY_PAGE_REUSABLE) {
21540 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
21541 }
21542 /* cast gets rid of extra bits */
21543 return (cf_disp_t) disposition;
21544 }
21545
21546 static int
vm_page_cf_disp_to_disposition(cf_disp_t cf_disp)21547 vm_page_cf_disp_to_disposition(
21548 cf_disp_t cf_disp)
21549 {
21550 int disposition;
21551
21552 assert(sizeof(cf_disp_t) == 1);
21553 disposition = (int) cf_disp;
21554 /* move relocated bits back in place */
21555 if (cf_disp & VM_PAGE_QUERY_PAGE_FICTITIOUS) {
21556 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
21557 disposition &= ~VM_PAGE_QUERY_PAGE_FICTITIOUS;
21558 }
21559 return disposition;
21560 }
21561
21562 /*
21563 * vm_map_corpse_footprint_new_region:
21564 * closes the current footprint "region" and creates a new one
21565 *
21566 * Returns NULL if there's not enough space in the buffer for a new region.
21567 */
21568 static struct vm_map_corpse_footprint_region *
vm_map_corpse_footprint_new_region(struct vm_map_corpse_footprint_header * footprint_header)21569 vm_map_corpse_footprint_new_region(
21570 struct vm_map_corpse_footprint_header *footprint_header)
21571 {
21572 uintptr_t footprint_edge;
21573 uint32_t new_region_offset;
21574 struct vm_map_corpse_footprint_region *footprint_region;
21575 struct vm_map_corpse_footprint_region *new_footprint_region;
21576
21577 footprint_edge = ((uintptr_t)footprint_header +
21578 footprint_header->cf_size);
21579 footprint_region = ((struct vm_map_corpse_footprint_region *)
21580 ((char *)footprint_header +
21581 footprint_header->cf_last_region));
21582 assert((uintptr_t)footprint_region + sizeof(*footprint_region) <=
21583 footprint_edge);
21584
21585 /* get rid of trailing zeroes in the last region */
21586 assert(footprint_region->cfr_num_pages >=
21587 footprint_header->cf_last_zeroes);
21588 footprint_region->cfr_num_pages -=
21589 footprint_header->cf_last_zeroes;
21590 footprint_header->cf_last_zeroes = 0;
21591
21592 /* reuse this region if it's now empty */
21593 if (footprint_region->cfr_num_pages == 0) {
21594 return footprint_region;
21595 }
21596
21597 /* compute offset of new region */
21598 new_region_offset = footprint_header->cf_last_region;
21599 new_region_offset += sizeof(*footprint_region);
21600 new_region_offset += (footprint_region->cfr_num_pages * sizeof(cf_disp_t));
21601 new_region_offset = roundup(new_region_offset, sizeof(int));
21602
21603 /* check if we're going over the edge */
21604 if (((uintptr_t)footprint_header +
21605 new_region_offset +
21606 sizeof(*footprint_region)) >=
21607 footprint_edge) {
21608 /* over the edge: no new region */
21609 return NULL;
21610 }
21611
21612 /* adjust offset of last region in header */
21613 footprint_header->cf_last_region = new_region_offset;
21614
21615 new_footprint_region = (struct vm_map_corpse_footprint_region *)
21616 ((char *)footprint_header +
21617 footprint_header->cf_last_region);
21618 new_footprint_region->cfr_vaddr = 0;
21619 new_footprint_region->cfr_num_pages = 0;
21620 /* caller needs to initialize new region */
21621
21622 return new_footprint_region;
21623 }
21624
21625 /*
21626 * vm_map_corpse_footprint_collect:
21627 * collect footprint information for "old_entry" in "old_map" and
21628 * stores it in "new_map"'s vmmap_footprint_info.
21629 */
21630 kern_return_t
vm_map_corpse_footprint_collect(vm_map_t old_map,vm_map_entry_t old_entry,vm_map_t new_map)21631 vm_map_corpse_footprint_collect(
21632 vm_map_t old_map,
21633 vm_map_entry_t old_entry,
21634 vm_map_t new_map)
21635 {
21636 vm_map_offset_t va;
21637 kern_return_t kr;
21638 struct vm_map_corpse_footprint_header *footprint_header;
21639 struct vm_map_corpse_footprint_region *footprint_region;
21640 struct vm_map_corpse_footprint_region *new_footprint_region;
21641 cf_disp_t *next_disp_p;
21642 uintptr_t footprint_edge;
21643 uint32_t num_pages_tmp;
21644 int effective_page_size;
21645
21646 effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(old_map));
21647
21648 va = old_entry->vme_start;
21649
21650 vm_map_lock_assert_exclusive(old_map);
21651 vm_map_lock_assert_exclusive(new_map);
21652
21653 assert(new_map->has_corpse_footprint);
21654 assert(!old_map->has_corpse_footprint);
21655 if (!new_map->has_corpse_footprint ||
21656 old_map->has_corpse_footprint) {
21657 /*
21658 * This can only transfer footprint info from a
21659 * map with a live pmap to a map with a corpse footprint.
21660 */
21661 return KERN_NOT_SUPPORTED;
21662 }
21663
21664 if (new_map->vmmap_corpse_footprint == NULL) {
21665 vm_offset_t buf;
21666 vm_size_t buf_size;
21667
21668 buf = 0;
21669 buf_size = (sizeof(*footprint_header) +
21670 (old_map->hdr.nentries
21671 *
21672 (sizeof(*footprint_region) +
21673 +3)) /* potential alignment for each region */
21674 +
21675 ((old_map->size / effective_page_size)
21676 *
21677 sizeof(cf_disp_t))); /* disposition for each page */
21678 // printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
21679 buf_size = round_page(buf_size);
21680
21681 /* limit buffer to 1 page to validate overflow detection */
21682 // buf_size = PAGE_SIZE;
21683
21684 /* limit size to a somewhat sane amount */
21685 #if XNU_TARGET_OS_OSX
21686 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (8*1024*1024) /* 8MB */
21687 #else /* XNU_TARGET_OS_OSX */
21688 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (256*1024) /* 256KB */
21689 #endif /* XNU_TARGET_OS_OSX */
21690 if (buf_size > VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE) {
21691 buf_size = VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE;
21692 }
21693
21694 /*
21695 * Allocate the pageable buffer (with a trailing guard page).
21696 * It will be zero-filled on demand.
21697 */
21698 kr = kmem_alloc(kernel_map, &buf, buf_size + PAGE_SIZE,
21699 KMA_DATA | KMA_PAGEABLE | KMA_GUARD_LAST,
21700 VM_KERN_MEMORY_DIAG);
21701 if (kr != KERN_SUCCESS) {
21702 vm_map_corpse_footprint_no_buf++;
21703 return kr;
21704 }
21705
21706 /* initialize header and 1st region */
21707 footprint_header = (struct vm_map_corpse_footprint_header *)buf;
21708 new_map->vmmap_corpse_footprint = footprint_header;
21709
21710 footprint_header->cf_size = buf_size;
21711 footprint_header->cf_last_region =
21712 sizeof(*footprint_header);
21713 footprint_header->cf_last_zeroes = 0;
21714
21715 footprint_region = (struct vm_map_corpse_footprint_region *)
21716 ((char *)footprint_header +
21717 footprint_header->cf_last_region);
21718 footprint_region->cfr_vaddr = 0;
21719 footprint_region->cfr_num_pages = 0;
21720 } else {
21721 /* retrieve header and last region */
21722 footprint_header = (struct vm_map_corpse_footprint_header *)
21723 new_map->vmmap_corpse_footprint;
21724 footprint_region = (struct vm_map_corpse_footprint_region *)
21725 ((char *)footprint_header +
21726 footprint_header->cf_last_region);
21727 }
21728 footprint_edge = ((uintptr_t)footprint_header +
21729 footprint_header->cf_size);
21730
21731 if ((footprint_region->cfr_vaddr +
21732 (((vm_map_offset_t)footprint_region->cfr_num_pages) *
21733 effective_page_size))
21734 != old_entry->vme_start) {
21735 uint64_t num_pages_delta, num_pages_delta_size;
21736 uint32_t region_offset_delta_size;
21737
21738 /*
21739 * Not the next contiguous virtual address:
21740 * start a new region or store "zero" dispositions for
21741 * the missing pages?
21742 */
21743 /* size of gap in actual page dispositions */
21744 num_pages_delta = ((old_entry->vme_start -
21745 footprint_region->cfr_vaddr) / effective_page_size)
21746 - footprint_region->cfr_num_pages;
21747 num_pages_delta_size = num_pages_delta * sizeof(cf_disp_t);
21748 /* size of gap as a new footprint region header */
21749 region_offset_delta_size =
21750 (sizeof(*footprint_region) +
21751 roundup(((footprint_region->cfr_num_pages -
21752 footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)),
21753 sizeof(int)) -
21754 ((footprint_region->cfr_num_pages -
21755 footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)));
21756 // printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
21757 if (region_offset_delta_size < num_pages_delta_size ||
21758 os_add3_overflow(footprint_region->cfr_num_pages,
21759 (uint32_t) num_pages_delta,
21760 1,
21761 &num_pages_tmp)) {
21762 /*
21763 * Storing data for this gap would take more space
21764 * than inserting a new footprint region header:
21765 * let's start a new region and save space. If it's a
21766 * tie, let's avoid using a new region, since that
21767 * would require more region hops to find the right
21768 * range during lookups.
21769 *
21770 * If the current region's cfr_num_pages would overflow
21771 * if we added "zero" page dispositions for the gap,
21772 * no choice but to start a new region.
21773 */
21774 // printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
21775 new_footprint_region =
21776 vm_map_corpse_footprint_new_region(footprint_header);
21777 /* check that we're not going over the edge */
21778 if (new_footprint_region == NULL) {
21779 goto over_the_edge;
21780 }
21781 footprint_region = new_footprint_region;
21782 /* initialize new region as empty */
21783 footprint_region->cfr_vaddr = old_entry->vme_start;
21784 footprint_region->cfr_num_pages = 0;
21785 } else {
21786 /*
21787 * Store "zero" page dispositions for the missing
21788 * pages.
21789 */
21790 // printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
21791 for (; num_pages_delta > 0; num_pages_delta--) {
21792 next_disp_p = (cf_disp_t *)
21793 ((uintptr_t) footprint_region +
21794 sizeof(*footprint_region));
21795 next_disp_p += footprint_region->cfr_num_pages;
21796 /* check that we're not going over the edge */
21797 if ((uintptr_t)next_disp_p >= footprint_edge) {
21798 goto over_the_edge;
21799 }
21800 /* store "zero" disposition for this gap page */
21801 footprint_region->cfr_num_pages++;
21802 *next_disp_p = (cf_disp_t) 0;
21803 footprint_header->cf_last_zeroes++;
21804 }
21805 }
21806 }
21807
21808 for (va = old_entry->vme_start;
21809 va < old_entry->vme_end;
21810 va += effective_page_size) {
21811 int disposition;
21812 cf_disp_t cf_disp;
21813
21814 vm_map_footprint_query_page_info(old_map,
21815 old_entry,
21816 va,
21817 &disposition);
21818 cf_disp = vm_page_disposition_to_cf_disp(disposition);
21819
21820 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
21821
21822 if (cf_disp == 0 && footprint_region->cfr_num_pages == 0) {
21823 /*
21824 * Ignore "zero" dispositions at start of
21825 * region: just move start of region.
21826 */
21827 footprint_region->cfr_vaddr += effective_page_size;
21828 continue;
21829 }
21830
21831 /* would region's cfr_num_pages overflow? */
21832 if (os_add_overflow(footprint_region->cfr_num_pages, 1,
21833 &num_pages_tmp)) {
21834 /* overflow: create a new region */
21835 new_footprint_region =
21836 vm_map_corpse_footprint_new_region(
21837 footprint_header);
21838 if (new_footprint_region == NULL) {
21839 goto over_the_edge;
21840 }
21841 footprint_region = new_footprint_region;
21842 footprint_region->cfr_vaddr = va;
21843 footprint_region->cfr_num_pages = 0;
21844 }
21845
21846 next_disp_p = (cf_disp_t *) ((uintptr_t) footprint_region +
21847 sizeof(*footprint_region));
21848 next_disp_p += footprint_region->cfr_num_pages;
21849 /* check that we're not going over the edge */
21850 if ((uintptr_t)next_disp_p >= footprint_edge) {
21851 goto over_the_edge;
21852 }
21853 /* store this dispostion */
21854 *next_disp_p = cf_disp;
21855 footprint_region->cfr_num_pages++;
21856
21857 if (cf_disp != 0) {
21858 /* non-zero disp: break the current zero streak */
21859 footprint_header->cf_last_zeroes = 0;
21860 /* done */
21861 continue;
21862 }
21863
21864 /* zero disp: add to the current streak of zeroes */
21865 footprint_header->cf_last_zeroes++;
21866 if ((footprint_header->cf_last_zeroes +
21867 roundup(((footprint_region->cfr_num_pages -
21868 footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)) &
21869 (sizeof(int) - 1),
21870 sizeof(int))) <
21871 (sizeof(*footprint_header))) {
21872 /*
21873 * There are not enough trailing "zero" dispositions
21874 * (+ the extra padding we would need for the previous
21875 * region); creating a new region would not save space
21876 * at this point, so let's keep this "zero" disposition
21877 * in this region and reconsider later.
21878 */
21879 continue;
21880 }
21881 /*
21882 * Create a new region to avoid having too many consecutive
21883 * "zero" dispositions.
21884 */
21885 new_footprint_region =
21886 vm_map_corpse_footprint_new_region(footprint_header);
21887 if (new_footprint_region == NULL) {
21888 goto over_the_edge;
21889 }
21890 footprint_region = new_footprint_region;
21891 /* initialize the new region as empty ... */
21892 footprint_region->cfr_num_pages = 0;
21893 /* ... and skip this "zero" disp */
21894 footprint_region->cfr_vaddr = va + effective_page_size;
21895 }
21896
21897 return KERN_SUCCESS;
21898
21899 over_the_edge:
21900 // printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
21901 vm_map_corpse_footprint_full++;
21902 return KERN_RESOURCE_SHORTAGE;
21903 }
21904
21905 /*
21906 * vm_map_corpse_footprint_collect_done:
21907 * completes the footprint collection by getting rid of any remaining
21908 * trailing "zero" dispositions and trimming the unused part of the
21909 * kernel buffer
21910 */
21911 void
vm_map_corpse_footprint_collect_done(vm_map_t new_map)21912 vm_map_corpse_footprint_collect_done(
21913 vm_map_t new_map)
21914 {
21915 struct vm_map_corpse_footprint_header *footprint_header;
21916 struct vm_map_corpse_footprint_region *footprint_region;
21917 vm_size_t buf_size, actual_size;
21918 kern_return_t kr;
21919
21920 assert(new_map->has_corpse_footprint);
21921 if (!new_map->has_corpse_footprint ||
21922 new_map->vmmap_corpse_footprint == NULL) {
21923 return;
21924 }
21925
21926 footprint_header = (struct vm_map_corpse_footprint_header *)
21927 new_map->vmmap_corpse_footprint;
21928 buf_size = footprint_header->cf_size;
21929
21930 footprint_region = (struct vm_map_corpse_footprint_region *)
21931 ((char *)footprint_header +
21932 footprint_header->cf_last_region);
21933
21934 /* get rid of trailing zeroes in last region */
21935 assert(footprint_region->cfr_num_pages >= footprint_header->cf_last_zeroes);
21936 footprint_region->cfr_num_pages -= footprint_header->cf_last_zeroes;
21937 footprint_header->cf_last_zeroes = 0;
21938
21939 actual_size = (vm_size_t)(footprint_header->cf_last_region +
21940 sizeof(*footprint_region) +
21941 (footprint_region->cfr_num_pages * sizeof(cf_disp_t)));
21942
21943 // printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
21944 vm_map_corpse_footprint_size_avg =
21945 (((vm_map_corpse_footprint_size_avg *
21946 vm_map_corpse_footprint_count) +
21947 actual_size) /
21948 (vm_map_corpse_footprint_count + 1));
21949 vm_map_corpse_footprint_count++;
21950 if (actual_size > vm_map_corpse_footprint_size_max) {
21951 vm_map_corpse_footprint_size_max = actual_size;
21952 }
21953
21954 actual_size = round_page(actual_size);
21955 if (buf_size > actual_size) {
21956 kr = vm_deallocate(kernel_map,
21957 ((vm_address_t)footprint_header +
21958 actual_size +
21959 PAGE_SIZE), /* trailing guard page */
21960 (buf_size - actual_size));
21961 assertf(kr == KERN_SUCCESS,
21962 "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21963 footprint_header,
21964 (uint64_t) buf_size,
21965 (uint64_t) actual_size,
21966 kr);
21967 kr = vm_protect(kernel_map,
21968 ((vm_address_t)footprint_header +
21969 actual_size),
21970 PAGE_SIZE,
21971 FALSE, /* set_maximum */
21972 VM_PROT_NONE);
21973 assertf(kr == KERN_SUCCESS,
21974 "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21975 footprint_header,
21976 (uint64_t) buf_size,
21977 (uint64_t) actual_size,
21978 kr);
21979 }
21980
21981 footprint_header->cf_size = actual_size;
21982 }
21983
21984 /*
21985 * vm_map_corpse_footprint_query_page_info:
21986 * retrieves the disposition of the page at virtual address "vaddr"
21987 * in the forked corpse's VM map
21988 *
21989 * This is the equivalent of vm_map_footprint_query_page_info() for a forked corpse.
21990 */
21991 kern_return_t
vm_map_corpse_footprint_query_page_info(vm_map_t map,vm_map_offset_t va,int * disposition_p)21992 vm_map_corpse_footprint_query_page_info(
21993 vm_map_t map,
21994 vm_map_offset_t va,
21995 int *disposition_p)
21996 {
21997 struct vm_map_corpse_footprint_header *footprint_header;
21998 struct vm_map_corpse_footprint_region *footprint_region;
21999 uint32_t footprint_region_offset;
22000 vm_map_offset_t region_start, region_end;
22001 int disp_idx;
22002 kern_return_t kr;
22003 int effective_page_size;
22004 cf_disp_t cf_disp;
22005
22006 if (!map->has_corpse_footprint) {
22007 *disposition_p = 0;
22008 kr = KERN_INVALID_ARGUMENT;
22009 goto done;
22010 }
22011
22012 footprint_header = map->vmmap_corpse_footprint;
22013 if (footprint_header == NULL) {
22014 *disposition_p = 0;
22015 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
22016 kr = KERN_INVALID_ARGUMENT;
22017 goto done;
22018 }
22019
22020 /* start looking at the hint ("cf_hint_region") */
22021 footprint_region_offset = footprint_header->cf_hint_region;
22022
22023 effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(map));
22024
22025 lookup_again:
22026 if (footprint_region_offset < sizeof(*footprint_header)) {
22027 /* hint too low: start from 1st region */
22028 footprint_region_offset = sizeof(*footprint_header);
22029 }
22030 if (footprint_region_offset >= footprint_header->cf_last_region) {
22031 /* hint too high: re-start from 1st region */
22032 footprint_region_offset = sizeof(*footprint_header);
22033 }
22034 footprint_region = (struct vm_map_corpse_footprint_region *)
22035 ((char *)footprint_header + footprint_region_offset);
22036 region_start = footprint_region->cfr_vaddr;
22037 region_end = (region_start +
22038 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
22039 effective_page_size));
22040 if (va < region_start &&
22041 footprint_region_offset != sizeof(*footprint_header)) {
22042 /* our range starts before the hint region */
22043
22044 /* reset the hint (in a racy way...) */
22045 footprint_header->cf_hint_region = sizeof(*footprint_header);
22046 /* lookup "va" again from 1st region */
22047 footprint_region_offset = sizeof(*footprint_header);
22048 goto lookup_again;
22049 }
22050
22051 while (va >= region_end) {
22052 if (footprint_region_offset >= footprint_header->cf_last_region) {
22053 break;
22054 }
22055 /* skip the region's header */
22056 footprint_region_offset += sizeof(*footprint_region);
22057 /* skip the region's page dispositions */
22058 footprint_region_offset += (footprint_region->cfr_num_pages * sizeof(cf_disp_t));
22059 /* align to next word boundary */
22060 footprint_region_offset =
22061 roundup(footprint_region_offset,
22062 sizeof(int));
22063 footprint_region = (struct vm_map_corpse_footprint_region *)
22064 ((char *)footprint_header + footprint_region_offset);
22065 region_start = footprint_region->cfr_vaddr;
22066 region_end = (region_start +
22067 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
22068 effective_page_size));
22069 }
22070 if (va < region_start || va >= region_end) {
22071 /* page not found */
22072 *disposition_p = 0;
22073 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
22074 kr = KERN_SUCCESS;
22075 goto done;
22076 }
22077
22078 /* "va" found: set the lookup hint for next lookup (in a racy way...) */
22079 footprint_header->cf_hint_region = footprint_region_offset;
22080
22081 /* get page disposition for "va" in this region */
22082 disp_idx = (int) ((va - footprint_region->cfr_vaddr) / effective_page_size);
22083 cf_disp = footprint_region->cfr_disposition[disp_idx];
22084 *disposition_p = vm_page_cf_disp_to_disposition(cf_disp);
22085 kr = KERN_SUCCESS;
22086 done:
22087 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
22088 /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
22089 DTRACE_VM4(footprint_query_page_info,
22090 vm_map_t, map,
22091 vm_map_offset_t, va,
22092 int, *disposition_p,
22093 kern_return_t, kr);
22094
22095 return kr;
22096 }
22097
22098 void
vm_map_corpse_footprint_destroy(vm_map_t map)22099 vm_map_corpse_footprint_destroy(
22100 vm_map_t map)
22101 {
22102 if (map->has_corpse_footprint &&
22103 map->vmmap_corpse_footprint != 0) {
22104 struct vm_map_corpse_footprint_header *footprint_header;
22105 vm_size_t buf_size;
22106 kern_return_t kr;
22107
22108 footprint_header = map->vmmap_corpse_footprint;
22109 buf_size = footprint_header->cf_size;
22110 kr = vm_deallocate(kernel_map,
22111 (vm_offset_t) map->vmmap_corpse_footprint,
22112 ((vm_size_t) buf_size
22113 + PAGE_SIZE)); /* trailing guard page */
22114 assertf(kr == KERN_SUCCESS, "kr=0x%x\n", kr);
22115 map->vmmap_corpse_footprint = 0;
22116 map->has_corpse_footprint = FALSE;
22117 }
22118 }
22119
22120 /*
22121 * vm_map_copy_footprint_ledgers:
22122 * copies any ledger that's relevant to the memory footprint of "old_task"
22123 * into the forked corpse's task ("new_task")
22124 */
22125 void
vm_map_copy_footprint_ledgers(task_t old_task,task_t new_task)22126 vm_map_copy_footprint_ledgers(
22127 task_t old_task,
22128 task_t new_task)
22129 {
22130 vm_map_copy_ledger(old_task, new_task, task_ledgers.phys_footprint);
22131 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile);
22132 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile_compressed);
22133 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal);
22134 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal_compressed);
22135 vm_map_copy_ledger(old_task, new_task, task_ledgers.iokit_mapped);
22136 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting);
22137 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting_compressed);
22138 vm_map_copy_ledger(old_task, new_task, task_ledgers.page_table);
22139 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint);
22140 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint_compressed);
22141 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile);
22142 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile_compressed);
22143 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint);
22144 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint_compressed);
22145 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint);
22146 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint_compressed);
22147 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint);
22148 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint_compressed);
22149 vm_map_copy_ledger(old_task, new_task, task_ledgers.wired_mem);
22150 }
22151
22152 /*
22153 * vm_map_copy_ledger:
22154 * copy a single ledger from "old_task" to "new_task"
22155 */
22156 void
vm_map_copy_ledger(task_t old_task,task_t new_task,int ledger_entry)22157 vm_map_copy_ledger(
22158 task_t old_task,
22159 task_t new_task,
22160 int ledger_entry)
22161 {
22162 ledger_amount_t old_balance, new_balance, delta;
22163
22164 assert(new_task->map->has_corpse_footprint);
22165 if (!new_task->map->has_corpse_footprint) {
22166 return;
22167 }
22168
22169 /* turn off sanity checks for the ledger we're about to mess with */
22170 ledger_disable_panic_on_negative(new_task->ledger,
22171 ledger_entry);
22172
22173 /* adjust "new_task" to match "old_task" */
22174 ledger_get_balance(old_task->ledger,
22175 ledger_entry,
22176 &old_balance);
22177 ledger_get_balance(new_task->ledger,
22178 ledger_entry,
22179 &new_balance);
22180 if (new_balance == old_balance) {
22181 /* new == old: done */
22182 } else if (new_balance > old_balance) {
22183 /* new > old ==> new -= new - old */
22184 delta = new_balance - old_balance;
22185 ledger_debit(new_task->ledger,
22186 ledger_entry,
22187 delta);
22188 } else {
22189 /* new < old ==> new += old - new */
22190 delta = old_balance - new_balance;
22191 ledger_credit(new_task->ledger,
22192 ledger_entry,
22193 delta);
22194 }
22195 }
22196
22197 /*
22198 * vm_map_get_pmap:
22199 * returns the pmap associated with the vm_map
22200 */
22201 pmap_t
vm_map_get_pmap(vm_map_t map)22202 vm_map_get_pmap(vm_map_t map)
22203 {
22204 return vm_map_pmap(map);
22205 }
22206
22207 #if CONFIG_MAP_RANGES
22208 /*
22209 * vm_map_range_map_init:
22210 * initializes the VM range ID map to enable index lookup
22211 * of user VM ranges based on VM tag from userspace.
22212 */
22213 static void
vm_map_range_map_init(void)22214 vm_map_range_map_init(void)
22215 {
22216 /* maintain status quo by default */
22217 for (int i = 0; i < VM_MEMORY_COUNT; i++) {
22218 vm_map_range_id_map[i] = UMEM_RANGE_ID_DEFAULT;
22219 }
22220
22221 /* move all MALLOC allocations to heap range */
22222 vm_map_range_id_map[VM_MEMORY_MALLOC] = UMEM_RANGE_ID_HEAP;
22223 vm_map_range_id_map[VM_MEMORY_MALLOC_HUGE] = UMEM_RANGE_ID_HEAP;
22224 vm_map_range_id_map[VM_MEMORY_MALLOC_LARGE] = UMEM_RANGE_ID_HEAP;
22225 vm_map_range_id_map[VM_MEMORY_MALLOC_LARGE_REUSABLE] = UMEM_RANGE_ID_HEAP;
22226 vm_map_range_id_map[VM_MEMORY_MALLOC_LARGE_REUSED] = UMEM_RANGE_ID_HEAP;
22227 vm_map_range_id_map[VM_MEMORY_MALLOC_MEDIUM] = UMEM_RANGE_ID_HEAP;
22228 vm_map_range_id_map[VM_MEMORY_MALLOC_NANO] = UMEM_RANGE_ID_HEAP;
22229 vm_map_range_id_map[VM_MEMORY_MALLOC_PGUARD] = UMEM_RANGE_ID_HEAP;
22230 vm_map_range_id_map[VM_MEMORY_MALLOC_PROB_GUARD] = UMEM_RANGE_ID_HEAP;
22231 vm_map_range_id_map[VM_MEMORY_MALLOC_SMALL] = UMEM_RANGE_ID_HEAP;
22232 vm_map_range_id_map[VM_MEMORY_MALLOC_TINY] = UMEM_RANGE_ID_HEAP;
22233 }
22234
22235 /*
22236 * vm_map_range_configure:
22237 * configures the user vm_map ranges by increasing the maximum VA range of
22238 * the map and carving out a range at the end of VA space (searching backwards
22239 * in the newly expanded map).
22240 */
22241 kern_return_t
vm_map_range_configure(vm_map_t map)22242 vm_map_range_configure(vm_map_t map)
22243 {
22244 vm_map_size_t addr_space_size;
22245 vm_map_offset_t start, end, saved_max, random_addr;
22246
22247 if (!vm_map_user_ranges) {
22248 return KERN_SUCCESS;
22249 }
22250
22251 /* Should not be applying ranges to kernel map or kernel map submaps */
22252 assert(map != kernel_map);
22253 assert(vm_map_pmap(map) != kernel_pmap);
22254
22255 /* save the existing max offset */
22256 vm_map_lock_read(map);
22257 saved_max = map->max_offset;
22258 vm_map_unlock_read(map);
22259
22260 /*
22261 * Check that we're not already jumbo'd. If so we cannot guarantee that
22262 * we can set up the ranges safely without interfering with the existing
22263 * map.
22264 */
22265 if (saved_max > vm_compute_max_offset(vm_map_is_64bit(map))) {
22266 return KERN_NO_SPACE;
22267 }
22268
22269 /* expand the default VM space to the largest possible address */
22270 vm_map_set_jumbo(map);
22271
22272 vm_map_lock(map);
22273 addr_space_size = map->max_offset - saved_max;
22274
22275 if (addr_space_size <= VM_MAP_USER_RANGE_MAX) {
22276 vm_map_unlock(map);
22277 return KERN_NO_SPACE;
22278 }
22279
22280 addr_space_size -= VM_MAP_USER_RANGE_MAX;
22281 random_addr = (vm_map_offset_t)random();
22282 random_addr <<= VM_MAP_PAGE_SHIFT(map);
22283 random_addr %= addr_space_size;
22284
22285 /*
22286 * round off the start so we begin on a L2 TT boundary and ensure we have
22287 * at least a ARM_TT_L2_SIZE sized hole between existing map range and
22288 * new range(s).
22289 */
22290 start = vm_map_round_page(saved_max + random_addr + 1, ARM_TT_L2_OFFMASK);
22291 end = MIN(map->max_offset, start + VM_MAP_USER_RANGE_MAX);
22292 assert(start > saved_max);
22293 assert(end <= map->max_offset);
22294
22295 /* default range covers the "normal" heap range */
22296 map->user_range[UMEM_RANGE_ID_DEFAULT].min_address = map->min_offset;
22297 map->user_range[UMEM_RANGE_ID_DEFAULT].max_address = saved_max;
22298
22299 /* heap range covers the new extended range */
22300 map->user_range[UMEM_RANGE_ID_HEAP].min_address = start;
22301 map->user_range[UMEM_RANGE_ID_HEAP].max_address = end;
22302 map->uses_user_ranges = true;
22303 vm_map_unlock(map);
22304
22305 return KERN_SUCCESS;
22306 }
22307
22308 /*
22309 * vm_map_range_fork:
22310 * clones the array of ranges from old_map to new_map in support
22311 * of a VM map fork.
22312 */
22313 void
vm_map_range_fork(vm_map_t new_map,vm_map_t old_map)22314 vm_map_range_fork(vm_map_t new_map, vm_map_t old_map)
22315 {
22316 int i = 0;
22317
22318 if (!old_map->uses_user_ranges) {
22319 /* nothing to do */
22320 return;
22321 }
22322
22323 for (i = 0; i < UMEM_RANGE_COUNT; i++) {
22324 new_map->user_range[i].min_address = old_map->user_range[i].min_address;
22325 new_map->user_range[i].max_address = old_map->user_range[i].max_address;
22326 }
22327
22328 new_map->uses_user_ranges = true;
22329 }
22330
22331 /*
22332 * vm_map_get_user_range_id:
22333 * looks up the vm_map_range_id_map lookup table to determine which range ID to
22334 * utilize for any given user memory tag. If no ranges are present return the
22335 * default range.
22336 */
22337 __attribute__((overloadable))
22338 vm_map_range_id_t
vm_map_get_user_range_id(vm_map_t map,uint16_t tag)22339 vm_map_get_user_range_id(vm_map_t map, uint16_t tag)
22340 {
22341 vm_map_range_id_t range_id = UMEM_RANGE_ID_DEFAULT;
22342
22343 if (map != NULL && map->uses_user_ranges && tag < VM_MEMORY_COUNT) {
22344 range_id = vm_map_range_id_map[tag];
22345 }
22346
22347 return range_id;
22348 }
22349
22350 /*
22351 * vm_map_get_user_range_id:
22352 * determines which range ID the given addr/size combination maps to. If
22353 * range ID cannot be determined return the default range.
22354 */
22355 __attribute__((overloadable))
22356 vm_map_range_id_t
vm_map_get_user_range_id(vm_map_t map,mach_vm_offset_t addr,mach_vm_size_t size)22357 vm_map_get_user_range_id(
22358 vm_map_t map,
22359 mach_vm_offset_t addr,
22360 mach_vm_size_t size)
22361 {
22362 vm_map_range_id_t range_id = UMEM_RANGE_ID_MAX;
22363
22364 if (map == NULL || !map->uses_user_ranges) {
22365 return UMEM_RANGE_ID_DEFAULT;
22366 }
22367
22368 for (; range_id > UMEM_RANGE_ID_DEFAULT; --range_id) {
22369 if (mach_vm_range_contains(&map->user_range[range_id], addr, size)) {
22370 break;
22371 }
22372 }
22373
22374 assert(range_id < UMEM_RANGE_COUNT);
22375 return range_id;
22376 }
22377
22378 /*
22379 * vm_map_get_user_range:
22380 * copy the VM user range for the given VM map and range ID.
22381 */
22382 kern_return_t
vm_map_get_user_range(vm_map_t map,vm_map_range_id_t range_id,mach_vm_range_t range)22383 vm_map_get_user_range(
22384 vm_map_t map,
22385 vm_map_range_id_t range_id,
22386 mach_vm_range_t range)
22387 {
22388 if (map == NULL ||
22389 !map->uses_user_ranges ||
22390 range_id > UMEM_RANGE_ID_MAX ||
22391 range == NULL) {
22392 return KERN_INVALID_ARGUMENT;
22393 }
22394
22395 *range = map->user_range[range_id];
22396 return KERN_SUCCESS;
22397 }
22398 #endif /* CONFIG_MAP_RANGES */
22399
22400 /*
22401 * vm_map_entry_has_device_pager:
22402 * Check if the vm map entry specified by the virtual address has a device pager.
22403 * If the vm map entry does not exist or if the map is NULL, this returns FALSE.
22404 */
22405 boolean_t
vm_map_entry_has_device_pager(vm_map_t map,vm_map_offset_t vaddr)22406 vm_map_entry_has_device_pager(vm_map_t map, vm_map_offset_t vaddr)
22407 {
22408 vm_map_entry_t entry;
22409 vm_object_t object;
22410 boolean_t result;
22411
22412 if (map == NULL) {
22413 return FALSE;
22414 }
22415
22416 vm_map_lock(map);
22417 while (TRUE) {
22418 if (!vm_map_lookup_entry(map, vaddr, &entry)) {
22419 result = FALSE;
22420 break;
22421 }
22422 if (entry->is_sub_map) {
22423 // Check the submap
22424 vm_map_t submap = VME_SUBMAP(entry);
22425 assert(submap != NULL);
22426 vm_map_lock(submap);
22427 vm_map_unlock(map);
22428 map = submap;
22429 continue;
22430 }
22431 object = VME_OBJECT(entry);
22432 if (object != NULL && object->pager != NULL && is_device_pager_ops(object->pager->mo_pager_ops)) {
22433 result = TRUE;
22434 break;
22435 }
22436 result = FALSE;
22437 break;
22438 }
22439
22440 vm_map_unlock(map);
22441 return result;
22442 }
22443
22444
22445 #if MACH_ASSERT
22446
22447 extern int pmap_ledgers_panic;
22448 extern int pmap_ledgers_panic_leeway;
22449
22450 #define LEDGER_DRIFT(__LEDGER) \
22451 int __LEDGER##_over; \
22452 ledger_amount_t __LEDGER##_over_total; \
22453 ledger_amount_t __LEDGER##_over_max; \
22454 int __LEDGER##_under; \
22455 ledger_amount_t __LEDGER##_under_total; \
22456 ledger_amount_t __LEDGER##_under_max
22457
22458 struct {
22459 uint64_t num_pmaps_checked;
22460
22461 LEDGER_DRIFT(phys_footprint);
22462 LEDGER_DRIFT(internal);
22463 LEDGER_DRIFT(internal_compressed);
22464 LEDGER_DRIFT(external);
22465 LEDGER_DRIFT(reusable);
22466 LEDGER_DRIFT(iokit_mapped);
22467 LEDGER_DRIFT(alternate_accounting);
22468 LEDGER_DRIFT(alternate_accounting_compressed);
22469 LEDGER_DRIFT(page_table);
22470 LEDGER_DRIFT(purgeable_volatile);
22471 LEDGER_DRIFT(purgeable_nonvolatile);
22472 LEDGER_DRIFT(purgeable_volatile_compressed);
22473 LEDGER_DRIFT(purgeable_nonvolatile_compressed);
22474 LEDGER_DRIFT(tagged_nofootprint);
22475 LEDGER_DRIFT(tagged_footprint);
22476 LEDGER_DRIFT(tagged_nofootprint_compressed);
22477 LEDGER_DRIFT(tagged_footprint_compressed);
22478 LEDGER_DRIFT(network_volatile);
22479 LEDGER_DRIFT(network_nonvolatile);
22480 LEDGER_DRIFT(network_volatile_compressed);
22481 LEDGER_DRIFT(network_nonvolatile_compressed);
22482 LEDGER_DRIFT(media_nofootprint);
22483 LEDGER_DRIFT(media_footprint);
22484 LEDGER_DRIFT(media_nofootprint_compressed);
22485 LEDGER_DRIFT(media_footprint_compressed);
22486 LEDGER_DRIFT(graphics_nofootprint);
22487 LEDGER_DRIFT(graphics_footprint);
22488 LEDGER_DRIFT(graphics_nofootprint_compressed);
22489 LEDGER_DRIFT(graphics_footprint_compressed);
22490 LEDGER_DRIFT(neural_nofootprint);
22491 LEDGER_DRIFT(neural_footprint);
22492 LEDGER_DRIFT(neural_nofootprint_compressed);
22493 LEDGER_DRIFT(neural_footprint_compressed);
22494 } pmap_ledgers_drift;
22495
22496 void
vm_map_pmap_check_ledgers(pmap_t pmap,ledger_t ledger,int pid,char * procname)22497 vm_map_pmap_check_ledgers(
22498 pmap_t pmap,
22499 ledger_t ledger,
22500 int pid,
22501 char *procname)
22502 {
22503 ledger_amount_t bal;
22504 boolean_t do_panic;
22505
22506 do_panic = FALSE;
22507
22508 pmap_ledgers_drift.num_pmaps_checked++;
22509
22510 #define LEDGER_CHECK_BALANCE(__LEDGER) \
22511 MACRO_BEGIN \
22512 int panic_on_negative = TRUE; \
22513 ledger_get_balance(ledger, \
22514 task_ledgers.__LEDGER, \
22515 &bal); \
22516 ledger_get_panic_on_negative(ledger, \
22517 task_ledgers.__LEDGER, \
22518 &panic_on_negative); \
22519 if (bal != 0) { \
22520 if (panic_on_negative || \
22521 (pmap_ledgers_panic && \
22522 pmap_ledgers_panic_leeway > 0 && \
22523 (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) || \
22524 bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
22525 do_panic = TRUE; \
22526 } \
22527 printf("LEDGER BALANCE proc %d (%s) " \
22528 "\"%s\" = %lld\n", \
22529 pid, procname, #__LEDGER, bal); \
22530 if (bal > 0) { \
22531 pmap_ledgers_drift.__LEDGER##_over++; \
22532 pmap_ledgers_drift.__LEDGER##_over_total += bal; \
22533 if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
22534 pmap_ledgers_drift.__LEDGER##_over_max = bal; \
22535 } \
22536 } else if (bal < 0) { \
22537 pmap_ledgers_drift.__LEDGER##_under++; \
22538 pmap_ledgers_drift.__LEDGER##_under_total += bal; \
22539 if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
22540 pmap_ledgers_drift.__LEDGER##_under_max = bal; \
22541 } \
22542 } \
22543 } \
22544 MACRO_END
22545
22546 LEDGER_CHECK_BALANCE(phys_footprint);
22547 LEDGER_CHECK_BALANCE(internal);
22548 LEDGER_CHECK_BALANCE(internal_compressed);
22549 LEDGER_CHECK_BALANCE(external);
22550 LEDGER_CHECK_BALANCE(reusable);
22551 LEDGER_CHECK_BALANCE(iokit_mapped);
22552 LEDGER_CHECK_BALANCE(alternate_accounting);
22553 LEDGER_CHECK_BALANCE(alternate_accounting_compressed);
22554 LEDGER_CHECK_BALANCE(page_table);
22555 LEDGER_CHECK_BALANCE(purgeable_volatile);
22556 LEDGER_CHECK_BALANCE(purgeable_nonvolatile);
22557 LEDGER_CHECK_BALANCE(purgeable_volatile_compressed);
22558 LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed);
22559 LEDGER_CHECK_BALANCE(tagged_nofootprint);
22560 LEDGER_CHECK_BALANCE(tagged_footprint);
22561 LEDGER_CHECK_BALANCE(tagged_nofootprint_compressed);
22562 LEDGER_CHECK_BALANCE(tagged_footprint_compressed);
22563 LEDGER_CHECK_BALANCE(network_volatile);
22564 LEDGER_CHECK_BALANCE(network_nonvolatile);
22565 LEDGER_CHECK_BALANCE(network_volatile_compressed);
22566 LEDGER_CHECK_BALANCE(network_nonvolatile_compressed);
22567 LEDGER_CHECK_BALANCE(media_nofootprint);
22568 LEDGER_CHECK_BALANCE(media_footprint);
22569 LEDGER_CHECK_BALANCE(media_nofootprint_compressed);
22570 LEDGER_CHECK_BALANCE(media_footprint_compressed);
22571 LEDGER_CHECK_BALANCE(graphics_nofootprint);
22572 LEDGER_CHECK_BALANCE(graphics_footprint);
22573 LEDGER_CHECK_BALANCE(graphics_nofootprint_compressed);
22574 LEDGER_CHECK_BALANCE(graphics_footprint_compressed);
22575 LEDGER_CHECK_BALANCE(neural_nofootprint);
22576 LEDGER_CHECK_BALANCE(neural_footprint);
22577 LEDGER_CHECK_BALANCE(neural_nofootprint_compressed);
22578 LEDGER_CHECK_BALANCE(neural_footprint_compressed);
22579
22580 if (do_panic) {
22581 if (pmap_ledgers_panic) {
22582 panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers",
22583 pmap, pid, procname);
22584 } else {
22585 printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
22586 pmap, pid, procname);
22587 }
22588 }
22589 }
22590
22591 void
vm_map_pmap_set_process(vm_map_t map,int pid,char * procname)22592 vm_map_pmap_set_process(
22593 vm_map_t map,
22594 int pid,
22595 char *procname)
22596 {
22597 pmap_set_process(vm_map_pmap(map), pid, procname);
22598 }
22599
22600 #endif /* MACH_ASSERT */
22601