1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66 #include <mach_assert.h>
67
68 #include <vm/vm_options.h>
69
70 #include <libkern/OSAtomic.h>
71
72 #include <mach/kern_return.h>
73 #include <mach/port.h>
74 #include <mach/vm_attributes.h>
75 #include <mach/vm_param.h>
76 #include <mach/vm_behavior.h>
77 #include <mach/vm_statistics.h>
78 #include <mach/memory_object.h>
79 #include <mach/mach_vm.h>
80 #include <machine/cpu_capabilities.h>
81 #include <mach/sdt.h>
82
83 #include <kern/assert.h>
84 #include <kern/backtrace.h>
85 #include <kern/counter.h>
86 #include <kern/exc_guard.h>
87 #include <kern/kalloc.h>
88 #include <kern/zalloc_internal.h>
89
90 #include <vm/cpm.h>
91 #include <vm/vm_compressor.h>
92 #include <vm/vm_compressor_pager.h>
93 #include <vm/vm_init.h>
94 #include <vm/vm_fault.h>
95 #include <vm/vm_map.h>
96 #include <vm/vm_object.h>
97 #include <vm/vm_page.h>
98 #include <vm/vm_pageout.h>
99 #include <vm/pmap.h>
100 #include <vm/vm_kern.h>
101 #include <ipc/ipc_port.h>
102 #include <kern/sched_prim.h>
103 #include <kern/misc_protos.h>
104
105 #include <mach/vm_map_server.h>
106 #include <mach/mach_host_server.h>
107 #include <vm/vm_protos.h>
108 #include <vm/vm_purgeable_internal.h>
109
110 #include <vm/vm_protos.h>
111 #include <vm/vm_shared_region.h>
112 #include <vm/vm_map_store.h>
113
114 #include <san/kasan.h>
115
116 #include <sys/resource.h>
117 #include <sys/codesign.h>
118 #include <sys/mman.h>
119 #include <sys/reboot.h>
120 #include <sys/kdebug_triage.h>
121
122 #if __LP64__
123 #define HAVE_VM_MAP_RESERVED_ENTRY_ZONE 0
124 #else
125 #define HAVE_VM_MAP_RESERVED_ENTRY_ZONE 1
126 #endif
127
128 #include <libkern/section_keywords.h>
129 #if DEVELOPMENT || DEBUG
130 extern int proc_selfcsflags(void);
131 int panic_on_unsigned_execute = 0;
132 int panic_on_mlock_failure = 0;
133 #endif /* DEVELOPMENT || DEBUG */
134
135 #if MACH_ASSERT
136 int debug4k_filter = 0;
137 char debug4k_proc_name[1024] = "";
138 int debug4k_proc_filter = (int)-1 & ~(1 << __DEBUG4K_FAULT);
139 int debug4k_panic_on_misaligned_sharing = 0;
140 const char *debug4k_category_name[] = {
141 "error", /* 0 */
142 "life", /* 1 */
143 "load", /* 2 */
144 "fault", /* 3 */
145 "copy", /* 4 */
146 "share", /* 5 */
147 "adjust", /* 6 */
148 "pmap", /* 7 */
149 "mementry", /* 8 */
150 "iokit", /* 9 */
151 "upl", /* 10 */
152 "exc", /* 11 */
153 "vfs" /* 12 */
154 };
155 #endif /* MACH_ASSERT */
156 int debug4k_no_cow_copyin = 0;
157
158
159 #if __arm64__
160 extern const int fourk_binary_compatibility_unsafe;
161 extern const int fourk_binary_compatibility_allow_wx;
162 #endif /* __arm64__ */
163 extern int proc_selfpid(void);
164 extern char *proc_name_address(void *p);
165
166 #if VM_MAP_DEBUG_APPLE_PROTECT
167 int vm_map_debug_apple_protect = 0;
168 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
169 #if VM_MAP_DEBUG_FOURK
170 int vm_map_debug_fourk = 0;
171 #endif /* VM_MAP_DEBUG_FOURK */
172
173 SECURITY_READ_ONLY_LATE(int) vm_map_executable_immutable = 1;
174 int vm_map_executable_immutable_verbose = 0;
175
176 os_refgrp_decl(static, map_refgrp, "vm_map", NULL);
177
178 extern u_int32_t random(void); /* from <libkern/libkern.h> */
179 /* Internal prototypes
180 */
181
182 static void vm_map_simplify_range(
183 vm_map_t map,
184 vm_map_offset_t start,
185 vm_map_offset_t end); /* forward */
186
187 static boolean_t vm_map_range_check(
188 vm_map_t map,
189 vm_map_offset_t start,
190 vm_map_offset_t end,
191 vm_map_entry_t *entry);
192
193 static vm_map_entry_t _vm_map_entry_create(
194 struct vm_map_header *map_header, boolean_t map_locked);
195
196 static void _vm_map_entry_dispose(
197 struct vm_map_header *map_header,
198 vm_map_entry_t entry);
199
200 static void vm_map_pmap_enter(
201 vm_map_t map,
202 vm_map_offset_t addr,
203 vm_map_offset_t end_addr,
204 vm_object_t object,
205 vm_object_offset_t offset,
206 vm_prot_t protection);
207
208 static void _vm_map_clip_end(
209 struct vm_map_header *map_header,
210 vm_map_entry_t entry,
211 vm_map_offset_t end);
212
213 static void _vm_map_clip_start(
214 struct vm_map_header *map_header,
215 vm_map_entry_t entry,
216 vm_map_offset_t start);
217
218 static void vm_map_entry_delete(
219 vm_map_t map,
220 vm_map_entry_t entry);
221
222 static kern_return_t vm_map_delete(
223 vm_map_t map,
224 vm_map_offset_t start,
225 vm_map_offset_t end,
226 int flags,
227 vm_map_t zap_map);
228
229 static void vm_map_copy_insert(
230 vm_map_t map,
231 vm_map_entry_t after_where,
232 vm_map_copy_t copy);
233
234 static kern_return_t vm_map_copy_overwrite_unaligned(
235 vm_map_t dst_map,
236 vm_map_entry_t entry,
237 vm_map_copy_t copy,
238 vm_map_address_t start,
239 boolean_t discard_on_success);
240
241 static kern_return_t vm_map_copy_overwrite_aligned(
242 vm_map_t dst_map,
243 vm_map_entry_t tmp_entry,
244 vm_map_copy_t copy,
245 vm_map_offset_t start,
246 pmap_t pmap);
247
248 static kern_return_t vm_map_copyin_kernel_buffer(
249 vm_map_t src_map,
250 vm_map_address_t src_addr,
251 vm_map_size_t len,
252 boolean_t src_destroy,
253 vm_map_copy_t *copy_result); /* OUT */
254
255 static kern_return_t vm_map_copyout_kernel_buffer(
256 vm_map_t map,
257 vm_map_address_t *addr, /* IN/OUT */
258 vm_map_copy_t copy,
259 vm_map_size_t copy_size,
260 boolean_t overwrite,
261 boolean_t consume_on_success);
262
263 static void vm_map_fork_share(
264 vm_map_t old_map,
265 vm_map_entry_t old_entry,
266 vm_map_t new_map);
267
268 static boolean_t vm_map_fork_copy(
269 vm_map_t old_map,
270 vm_map_entry_t *old_entry_p,
271 vm_map_t new_map,
272 int vm_map_copyin_flags);
273
274 static kern_return_t vm_map_wire_nested(
275 vm_map_t map,
276 vm_map_offset_t start,
277 vm_map_offset_t end,
278 vm_prot_t caller_prot,
279 vm_tag_t tag,
280 boolean_t user_wire,
281 pmap_t map_pmap,
282 vm_map_offset_t pmap_addr,
283 ppnum_t *physpage_p);
284
285 static kern_return_t vm_map_unwire_nested(
286 vm_map_t map,
287 vm_map_offset_t start,
288 vm_map_offset_t end,
289 boolean_t user_wire,
290 pmap_t map_pmap,
291 vm_map_offset_t pmap_addr);
292
293 static kern_return_t vm_map_overwrite_submap_recurse(
294 vm_map_t dst_map,
295 vm_map_offset_t dst_addr,
296 vm_map_size_t dst_size);
297
298 static kern_return_t vm_map_copy_overwrite_nested(
299 vm_map_t dst_map,
300 vm_map_offset_t dst_addr,
301 vm_map_copy_t copy,
302 boolean_t interruptible,
303 pmap_t pmap,
304 boolean_t discard_on_success);
305
306 static kern_return_t vm_map_remap_extract(
307 vm_map_t map,
308 vm_map_offset_t addr,
309 vm_map_size_t size,
310 boolean_t copy,
311 struct vm_map_header *map_header,
312 vm_prot_t *cur_protection,
313 vm_prot_t *max_protection,
314 vm_inherit_t inheritance,
315 vm_map_kernel_flags_t vmk_flags);
316
317 static kern_return_t vm_map_remap_range_allocate(
318 vm_map_t map,
319 vm_map_address_t *address,
320 vm_map_size_t size,
321 vm_map_offset_t mask,
322 int flags,
323 vm_map_kernel_flags_t vmk_flags,
324 vm_tag_t tag,
325 vm_map_entry_t *map_entry);
326
327 static void vm_map_region_look_for_page(
328 vm_map_t map,
329 vm_map_offset_t va,
330 vm_object_t object,
331 vm_object_offset_t offset,
332 int max_refcnt,
333 unsigned short depth,
334 vm_region_extended_info_t extended,
335 mach_msg_type_number_t count);
336
337 static int vm_map_region_count_obj_refs(
338 vm_map_entry_t entry,
339 vm_object_t object);
340
341
342 static kern_return_t vm_map_willneed(
343 vm_map_t map,
344 vm_map_offset_t start,
345 vm_map_offset_t end);
346
347 static kern_return_t vm_map_reuse_pages(
348 vm_map_t map,
349 vm_map_offset_t start,
350 vm_map_offset_t end);
351
352 static kern_return_t vm_map_reusable_pages(
353 vm_map_t map,
354 vm_map_offset_t start,
355 vm_map_offset_t end);
356
357 static kern_return_t vm_map_can_reuse(
358 vm_map_t map,
359 vm_map_offset_t start,
360 vm_map_offset_t end);
361
362 #if MACH_ASSERT
363 static kern_return_t vm_map_pageout(
364 vm_map_t map,
365 vm_map_offset_t start,
366 vm_map_offset_t end);
367 #endif /* MACH_ASSERT */
368
369 kern_return_t vm_map_corpse_footprint_collect(
370 vm_map_t old_map,
371 vm_map_entry_t old_entry,
372 vm_map_t new_map);
373 void vm_map_corpse_footprint_collect_done(
374 vm_map_t new_map);
375 void vm_map_corpse_footprint_destroy(
376 vm_map_t map);
377 kern_return_t vm_map_corpse_footprint_query_page_info(
378 vm_map_t map,
379 vm_map_offset_t va,
380 int *disposition_p);
381 void vm_map_footprint_query_page_info(
382 vm_map_t map,
383 vm_map_entry_t map_entry,
384 vm_map_offset_t curr_s_offset,
385 int *disposition_p);
386
387 pid_t find_largest_process_vm_map_entries(void);
388
389 extern int exit_with_guard_exception(void *p, mach_exception_data_type_t code,
390 mach_exception_data_type_t subcode);
391
392 /*
393 * Macros to copy a vm_map_entry. We must be careful to correctly
394 * manage the wired page count. vm_map_entry_copy() creates a new
395 * map entry to the same memory - the wired count in the new entry
396 * must be set to zero. vm_map_entry_copy_full() creates a new
397 * entry that is identical to the old entry. This preserves the
398 * wire count; it's used for map splitting and zone changing in
399 * vm_map_copyout.
400 */
401
402 static inline void
vm_map_entry_copy_pmap_cs_assoc(vm_map_t map __unused,vm_map_entry_t new __unused,vm_map_entry_t old __unused)403 vm_map_entry_copy_pmap_cs_assoc(
404 vm_map_t map __unused,
405 vm_map_entry_t new __unused,
406 vm_map_entry_t old __unused)
407 {
408 /* when pmap_cs is not enabled, assert as a sanity check */
409 assert(new->pmap_cs_associated == FALSE);
410 }
411
412 /*
413 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
414 * But for security reasons on some platforms, we don't want the
415 * new mapping to be "used for jit", so we reset the flag here.
416 */
417 static inline void
vm_map_entry_copy_code_signing(vm_map_t map,vm_map_entry_t new,vm_map_entry_t old __unused)418 vm_map_entry_copy_code_signing(
419 vm_map_t map,
420 vm_map_entry_t new,
421 vm_map_entry_t old __unused)
422 {
423 if (VM_MAP_POLICY_ALLOW_JIT_COPY(map)) {
424 assert(new->used_for_jit == old->used_for_jit);
425 } else {
426 new->used_for_jit = FALSE;
427 }
428 }
429
430 static inline void
vm_map_entry_copy_full(vm_map_entry_t new,vm_map_entry_t old)431 vm_map_entry_copy_full(
432 vm_map_entry_t new,
433 vm_map_entry_t old)
434 {
435 #if MAP_ENTRY_CREATION_DEBUG
436 btref_put(new->vme_creation_bt);
437 btref_retain(old->vme_creation_bt);
438 #endif
439 #if MAP_ENTRY_INSERTION_DEBUG
440 btref_put(new->vme_insertion_bt);
441 btref_retain(old->vme_insertion_bt);
442 #endif
443 *new = *old;
444 }
445
446 static inline void
vm_map_entry_copy(vm_map_t map,vm_map_entry_t new,vm_map_entry_t old)447 vm_map_entry_copy(
448 vm_map_t map,
449 vm_map_entry_t new,
450 vm_map_entry_t old)
451 {
452 vm_map_entry_copy_full(new, old);
453
454 new->is_shared = FALSE;
455 new->needs_wakeup = FALSE;
456 new->in_transition = FALSE;
457 new->wired_count = 0;
458 new->user_wired_count = 0;
459 new->permanent = FALSE;
460 vm_map_entry_copy_code_signing(map, new, old);
461 vm_map_entry_copy_pmap_cs_assoc(map, new, old);
462 if (new->iokit_acct) {
463 assertf(!new->use_pmap, "old %p new %p\n", old, new);
464 new->iokit_acct = FALSE;
465 new->use_pmap = TRUE;
466 }
467 new->vme_resilient_codesign = FALSE;
468 new->vme_resilient_media = FALSE;
469 new->vme_atomic = FALSE;
470 new->vme_no_copy_on_read = FALSE;
471 }
472
473 /*
474 * Normal lock_read_to_write() returns FALSE/0 on failure.
475 * These functions evaluate to zero on success and non-zero value on failure.
476 */
477 __attribute__((always_inline))
478 int
vm_map_lock_read_to_write(vm_map_t map)479 vm_map_lock_read_to_write(vm_map_t map)
480 {
481 if (lck_rw_lock_shared_to_exclusive(&(map)->lock)) {
482 DTRACE_VM(vm_map_lock_upgrade);
483 return 0;
484 }
485 return 1;
486 }
487
488 __attribute__((always_inline))
489 boolean_t
vm_map_try_lock(vm_map_t map)490 vm_map_try_lock(vm_map_t map)
491 {
492 if (lck_rw_try_lock_exclusive(&(map)->lock)) {
493 DTRACE_VM(vm_map_lock_w);
494 return TRUE;
495 }
496 return FALSE;
497 }
498
499 __attribute__((always_inline))
500 boolean_t
vm_map_try_lock_read(vm_map_t map)501 vm_map_try_lock_read(vm_map_t map)
502 {
503 if (lck_rw_try_lock_shared(&(map)->lock)) {
504 DTRACE_VM(vm_map_lock_r);
505 return TRUE;
506 }
507 return FALSE;
508 }
509
510 /*
511 * Routines to get the page size the caller should
512 * use while inspecting the target address space.
513 * Use the "_safely" variant if the caller is dealing with a user-provided
514 * array whose size depends on the page size, to avoid any overflow or
515 * underflow of a user-allocated buffer.
516 */
517 int
vm_self_region_page_shift_safely(vm_map_t target_map)518 vm_self_region_page_shift_safely(
519 vm_map_t target_map)
520 {
521 int effective_page_shift = 0;
522
523 if (PAGE_SIZE == (4096)) {
524 /* x86_64 and 4k watches: always use 4k */
525 return PAGE_SHIFT;
526 }
527 /* did caller provide an explicit page size for this thread to use? */
528 effective_page_shift = thread_self_region_page_shift();
529 if (effective_page_shift) {
530 /* use the explicitly-provided page size */
531 return effective_page_shift;
532 }
533 /* no explicit page size: use the caller's page size... */
534 effective_page_shift = VM_MAP_PAGE_SHIFT(current_map());
535 if (effective_page_shift == VM_MAP_PAGE_SHIFT(target_map)) {
536 /* page size match: safe to use */
537 return effective_page_shift;
538 }
539 /* page size mismatch */
540 return -1;
541 }
542 int
vm_self_region_page_shift(vm_map_t target_map)543 vm_self_region_page_shift(
544 vm_map_t target_map)
545 {
546 int effective_page_shift;
547
548 effective_page_shift = vm_self_region_page_shift_safely(target_map);
549 if (effective_page_shift == -1) {
550 /* no safe value but OK to guess for caller */
551 effective_page_shift = MIN(VM_MAP_PAGE_SHIFT(current_map()),
552 VM_MAP_PAGE_SHIFT(target_map));
553 }
554 return effective_page_shift;
555 }
556
557
558 /*
559 * Decide if we want to allow processes to execute from their data or stack areas.
560 * override_nx() returns true if we do. Data/stack execution can be enabled independently
561 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
562 * or allow_stack_exec to enable data execution for that type of data area for that particular
563 * ABI (or both by or'ing the flags together). These are initialized in the architecture
564 * specific pmap files since the default behavior varies according to architecture. The
565 * main reason it varies is because of the need to provide binary compatibility with old
566 * applications that were written before these restrictions came into being. In the old
567 * days, an app could execute anything it could read, but this has slowly been tightened
568 * up over time. The default behavior is:
569 *
570 * 32-bit PPC apps may execute from both stack and data areas
571 * 32-bit Intel apps may exeucte from data areas but not stack
572 * 64-bit PPC/Intel apps may not execute from either data or stack
573 *
574 * An application on any architecture may override these defaults by explicitly
575 * adding PROT_EXEC permission to the page in question with the mprotect(2)
576 * system call. This code here just determines what happens when an app tries to
577 * execute from a page that lacks execute permission.
578 *
579 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
580 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
581 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
582 * execution from data areas for a particular binary even if the arch normally permits it. As
583 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
584 * to support some complicated use cases, notably browsers with out-of-process plugins that
585 * are not all NX-safe.
586 */
587
588 extern int allow_data_exec, allow_stack_exec;
589
590 int
override_nx(vm_map_t map,uint32_t user_tag)591 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
592 {
593 int current_abi;
594
595 if (map->pmap == kernel_pmap) {
596 return FALSE;
597 }
598
599 /*
600 * Determine if the app is running in 32 or 64 bit mode.
601 */
602
603 if (vm_map_is_64bit(map)) {
604 current_abi = VM_ABI_64;
605 } else {
606 current_abi = VM_ABI_32;
607 }
608
609 /*
610 * Determine if we should allow the execution based on whether it's a
611 * stack or data area and the current architecture.
612 */
613
614 if (user_tag == VM_MEMORY_STACK) {
615 return allow_stack_exec & current_abi;
616 }
617
618 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
619 }
620
621
622 /*
623 * Virtual memory maps provide for the mapping, protection,
624 * and sharing of virtual memory objects. In addition,
625 * this module provides for an efficient virtual copy of
626 * memory from one map to another.
627 *
628 * Synchronization is required prior to most operations.
629 *
630 * Maps consist of an ordered doubly-linked list of simple
631 * entries; a single hint is used to speed up lookups.
632 *
633 * Sharing maps have been deleted from this version of Mach.
634 * All shared objects are now mapped directly into the respective
635 * maps. This requires a change in the copy on write strategy;
636 * the asymmetric (delayed) strategy is used for shared temporary
637 * objects instead of the symmetric (shadow) strategy. All maps
638 * are now "top level" maps (either task map, kernel map or submap
639 * of the kernel map).
640 *
641 * Since portions of maps are specified by start/end addreses,
642 * which may not align with existing map entries, all
643 * routines merely "clip" entries to these start/end values.
644 * [That is, an entry is split into two, bordering at a
645 * start or end value.] Note that these clippings may not
646 * always be necessary (as the two resulting entries are then
647 * not changed); however, the clipping is done for convenience.
648 * No attempt is currently made to "glue back together" two
649 * abutting entries.
650 *
651 * The symmetric (shadow) copy strategy implements virtual copy
652 * by copying VM object references from one map to
653 * another, and then marking both regions as copy-on-write.
654 * It is important to note that only one writeable reference
655 * to a VM object region exists in any map when this strategy
656 * is used -- this means that shadow object creation can be
657 * delayed until a write operation occurs. The symmetric (delayed)
658 * strategy allows multiple maps to have writeable references to
659 * the same region of a vm object, and hence cannot delay creating
660 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
661 * Copying of permanent objects is completely different; see
662 * vm_object_copy_strategically() in vm_object.c.
663 */
664
665 static SECURITY_READ_ONLY_LATE(zone_t) vm_map_zone; /* zone for vm_map structures */
666 static SECURITY_READ_ONLY_LATE(zone_t) vm_map_copy_zone; /* zone for vm_map_copy structures */
667
668 SECURITY_READ_ONLY_LATE(zone_t) vm_map_entry_zone; /* zone for vm_map_entry structures */
669 SECURITY_READ_ONLY_LATE(zone_t) vm_map_holes_zone; /* zone for vm map holes (vm_map_links) structures */
670 #if HAVE_VM_MAP_RESERVED_ENTRY_ZONE
671 SECURITY_READ_ONLY_LATE(zone_t) vm_map_entry_reserved_zone;
672 #endif /* HAVE_VM_MAP_RESERVED_ENTRY_ZONE */
673
674 #define VM_MAP_ZONE_NAME "maps"
675 #define VM_MAP_ZFLAGS ( \
676 ZC_NOENCRYPT | \
677 ZC_NOGZALLOC | \
678 ZC_ALLOW_FOREIGN)
679
680 #define VM_MAP_ENTRY_ZONE_NAME "VM map entries"
681 #define VM_MAP_ENTRY_ZFLAGS ( \
682 ZC_NOENCRYPT | \
683 ZC_CACHING | \
684 ZC_NOGZALLOC | \
685 ZC_KASAN_NOQUARANTINE | \
686 ZC_VM_LP64 | \
687 ZC_ALLOW_FOREIGN)
688
689 #if HAVE_VM_MAP_RESERVED_ENTRY_ZONE
690 #define VM_MAP_ENTRY_RESERVED_ZONE_NAME "Reserved VM map entries"
691 #define VM_MAP_ENTRY_RESERVED_ZFLAGS ( \
692 ZC_NOENCRYPT | \
693 ZC_NOCACHING | \
694 ZC_NOGZALLOC | \
695 ZC_KASAN_NOQUARANTINE | \
696 ZC_VM)
697 #endif /* HAVE_VM_MAP_RESERVED_ENTRY_ZONE */
698
699 #define VM_MAP_HOLES_ZONE_NAME "VM map holes"
700 #define VM_MAP_HOLES_ZFLAGS ( \
701 ZC_NOENCRYPT | \
702 ZC_CACHING | \
703 ZC_NOGZALLOC | \
704 ZC_KASAN_NOQUARANTINE | \
705 ZC_VM_LP64 | \
706 ZC_ALLOW_FOREIGN)
707
708 /*
709 * Asserts that a vm_map_copy object is coming from the
710 * vm_map_copy_zone to ensure that it isn't a fake constructed
711 * anywhere else.
712 */
713 static inline void
vm_map_copy_require(struct vm_map_copy * copy)714 vm_map_copy_require(struct vm_map_copy *copy)
715 {
716 zone_id_require(ZONE_ID_VM_MAP_COPY, sizeof(struct vm_map_copy), copy);
717 }
718
719 /*
720 * vm_map_require:
721 *
722 * Ensures that the argument is memory allocated from the genuine
723 * vm map zone. (See zone_id_require_allow_foreign).
724 */
725 void
vm_map_require(vm_map_t map)726 vm_map_require(vm_map_t map)
727 {
728 zone_id_require_allow_foreign(ZONE_ID_VM_MAP, sizeof(struct _vm_map), map);
729 }
730
731 static __startup_data vm_offset_t map_data;
732 static __startup_data vm_size_t map_data_size;
733 static __startup_data vm_offset_t kentry_data;
734 static __startup_data vm_size_t kentry_data_size;
735 static __startup_data vm_offset_t map_holes_data;
736 static __startup_data vm_size_t map_holes_data_size;
737
738 #if XNU_TARGET_OS_OSX
739 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
740 #else /* XNU_TARGET_OS_OSX */
741 #define NO_COALESCE_LIMIT 0
742 #endif /* XNU_TARGET_OS_OSX */
743
744 /* Skip acquiring locks if we're in the midst of a kernel core dump */
745 unsigned int not_in_kdp = 1;
746
747 unsigned int vm_map_set_cache_attr_count = 0;
748
749 kern_return_t
vm_map_set_cache_attr(vm_map_t map,vm_map_offset_t va)750 vm_map_set_cache_attr(
751 vm_map_t map,
752 vm_map_offset_t va)
753 {
754 vm_map_entry_t map_entry;
755 vm_object_t object;
756 kern_return_t kr = KERN_SUCCESS;
757
758 vm_map_lock_read(map);
759
760 if (!vm_map_lookup_entry(map, va, &map_entry) ||
761 map_entry->is_sub_map) {
762 /*
763 * that memory is not properly mapped
764 */
765 kr = KERN_INVALID_ARGUMENT;
766 goto done;
767 }
768 object = VME_OBJECT(map_entry);
769
770 if (object == VM_OBJECT_NULL) {
771 /*
772 * there should be a VM object here at this point
773 */
774 kr = KERN_INVALID_ARGUMENT;
775 goto done;
776 }
777 vm_object_lock(object);
778 object->set_cache_attr = TRUE;
779 vm_object_unlock(object);
780
781 vm_map_set_cache_attr_count++;
782 done:
783 vm_map_unlock_read(map);
784
785 return kr;
786 }
787
788
789 #if CONFIG_CODE_DECRYPTION
790 /*
791 * vm_map_apple_protected:
792 * This remaps the requested part of the object with an object backed by
793 * the decrypting pager.
794 * crypt_info contains entry points and session data for the crypt module.
795 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
796 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
797 */
798 kern_return_t
vm_map_apple_protected(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_object_offset_t crypto_backing_offset,struct pager_crypt_info * crypt_info,uint32_t cryptid)799 vm_map_apple_protected(
800 vm_map_t map,
801 vm_map_offset_t start,
802 vm_map_offset_t end,
803 vm_object_offset_t crypto_backing_offset,
804 struct pager_crypt_info *crypt_info,
805 uint32_t cryptid)
806 {
807 boolean_t map_locked;
808 kern_return_t kr;
809 vm_map_entry_t map_entry;
810 struct vm_map_entry tmp_entry;
811 memory_object_t unprotected_mem_obj;
812 vm_object_t protected_object;
813 vm_map_offset_t map_addr;
814 vm_map_offset_t start_aligned, end_aligned;
815 vm_object_offset_t crypto_start, crypto_end;
816 int vm_flags;
817 vm_map_kernel_flags_t vmk_flags;
818 boolean_t cache_pager;
819
820 vm_flags = 0;
821 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
822
823 map_locked = FALSE;
824 unprotected_mem_obj = MEMORY_OBJECT_NULL;
825
826 start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
827 end_aligned = vm_map_round_page(end, PAGE_MASK_64);
828 start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
829 end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
830
831 #if __arm64__
832 /*
833 * "start" and "end" might be 4K-aligned but not 16K-aligned,
834 * so we might have to loop and establish up to 3 mappings:
835 *
836 * + the first 16K-page, which might overlap with the previous
837 * 4K-aligned mapping,
838 * + the center,
839 * + the last 16K-page, which might overlap with the next
840 * 4K-aligned mapping.
841 * Each of these mapping might be backed by a vnode pager (if
842 * properly page-aligned) or a "fourk_pager", itself backed by a
843 * vnode pager (if 4K-aligned but not page-aligned).
844 */
845 #endif /* __arm64__ */
846
847 map_addr = start_aligned;
848 for (map_addr = start_aligned;
849 map_addr < end;
850 map_addr = tmp_entry.vme_end) {
851 vm_map_lock(map);
852 map_locked = TRUE;
853
854 /* lookup the protected VM object */
855 if (!vm_map_lookup_entry(map,
856 map_addr,
857 &map_entry) ||
858 map_entry->is_sub_map ||
859 VME_OBJECT(map_entry) == VM_OBJECT_NULL) {
860 /* that memory is not properly mapped */
861 kr = KERN_INVALID_ARGUMENT;
862 goto done;
863 }
864
865 /* ensure mapped memory is mapped as executable except
866 * except for model decryption flow */
867 if ((cryptid != CRYPTID_MODEL_ENCRYPTION) &&
868 !(map_entry->protection & VM_PROT_EXECUTE)) {
869 kr = KERN_INVALID_ARGUMENT;
870 goto done;
871 }
872
873 /* get the protected object to be decrypted */
874 protected_object = VME_OBJECT(map_entry);
875 if (protected_object == VM_OBJECT_NULL) {
876 /* there should be a VM object here at this point */
877 kr = KERN_INVALID_ARGUMENT;
878 goto done;
879 }
880 /* ensure protected object stays alive while map is unlocked */
881 vm_object_reference(protected_object);
882
883 /* limit the map entry to the area we want to cover */
884 vm_map_clip_start(map, map_entry, start_aligned);
885 vm_map_clip_end(map, map_entry, end_aligned);
886
887 tmp_entry = *map_entry;
888 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
889 vm_map_unlock(map);
890 map_locked = FALSE;
891
892 /*
893 * This map entry might be only partially encrypted
894 * (if not fully "page-aligned").
895 */
896 crypto_start = 0;
897 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
898 if (tmp_entry.vme_start < start) {
899 if (tmp_entry.vme_start != start_aligned) {
900 kr = KERN_INVALID_ADDRESS;
901 }
902 crypto_start += (start - tmp_entry.vme_start);
903 }
904 if (tmp_entry.vme_end > end) {
905 if (tmp_entry.vme_end != end_aligned) {
906 kr = KERN_INVALID_ADDRESS;
907 }
908 crypto_end -= (tmp_entry.vme_end - end);
909 }
910
911 /*
912 * This "extra backing offset" is needed to get the decryption
913 * routine to use the right key. It adjusts for the possibly
914 * relative offset of an interposed "4K" pager...
915 */
916 if (crypto_backing_offset == (vm_object_offset_t) -1) {
917 crypto_backing_offset = VME_OFFSET(&tmp_entry);
918 }
919
920 cache_pager = TRUE;
921 #if XNU_TARGET_OS_OSX
922 if (vm_map_is_alien(map)) {
923 cache_pager = FALSE;
924 }
925 #endif /* XNU_TARGET_OS_OSX */
926
927 /*
928 * Lookup (and create if necessary) the protected memory object
929 * matching that VM object.
930 * If successful, this also grabs a reference on the memory object,
931 * to guarantee that it doesn't go away before we get a chance to map
932 * it.
933 */
934 unprotected_mem_obj = apple_protect_pager_setup(
935 protected_object,
936 VME_OFFSET(&tmp_entry),
937 crypto_backing_offset,
938 crypt_info,
939 crypto_start,
940 crypto_end,
941 cache_pager);
942
943 /* release extra ref on protected object */
944 vm_object_deallocate(protected_object);
945
946 if (unprotected_mem_obj == NULL) {
947 kr = KERN_FAILURE;
948 goto done;
949 }
950
951 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
952 /* can overwrite an immutable mapping */
953 vmk_flags.vmkf_overwrite_immutable = TRUE;
954 #if __arm64__
955 if (tmp_entry.used_for_jit &&
956 (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
957 PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
958 fourk_binary_compatibility_unsafe &&
959 fourk_binary_compatibility_allow_wx) {
960 printf("** FOURK_COMPAT [%d]: "
961 "allowing write+execute at 0x%llx\n",
962 proc_selfpid(), tmp_entry.vme_start);
963 vmk_flags.vmkf_map_jit = TRUE;
964 }
965 #endif /* __arm64__ */
966
967 /* map this memory object in place of the current one */
968 map_addr = tmp_entry.vme_start;
969 kr = vm_map_enter_mem_object(map,
970 &map_addr,
971 (tmp_entry.vme_end -
972 tmp_entry.vme_start),
973 (mach_vm_offset_t) 0,
974 vm_flags,
975 vmk_flags,
976 VM_KERN_MEMORY_NONE,
977 (ipc_port_t)(uintptr_t) unprotected_mem_obj,
978 0,
979 TRUE,
980 tmp_entry.protection,
981 tmp_entry.max_protection,
982 tmp_entry.inheritance);
983 assertf(kr == KERN_SUCCESS,
984 "kr = 0x%x\n", kr);
985 assertf(map_addr == tmp_entry.vme_start,
986 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
987 (uint64_t)map_addr,
988 (uint64_t) tmp_entry.vme_start,
989 &tmp_entry);
990
991 #if VM_MAP_DEBUG_APPLE_PROTECT
992 if (vm_map_debug_apple_protect) {
993 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
994 " backing:[object:%p,offset:0x%llx,"
995 "crypto_backing_offset:0x%llx,"
996 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
997 map,
998 (uint64_t) map_addr,
999 (uint64_t) (map_addr + (tmp_entry.vme_end -
1000 tmp_entry.vme_start)),
1001 unprotected_mem_obj,
1002 protected_object,
1003 VME_OFFSET(&tmp_entry),
1004 crypto_backing_offset,
1005 crypto_start,
1006 crypto_end);
1007 }
1008 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1009
1010 /*
1011 * Release the reference obtained by
1012 * apple_protect_pager_setup().
1013 * The mapping (if it succeeded) is now holding a reference on
1014 * the memory object.
1015 */
1016 memory_object_deallocate(unprotected_mem_obj);
1017 unprotected_mem_obj = MEMORY_OBJECT_NULL;
1018
1019 /* continue with next map entry */
1020 crypto_backing_offset += (tmp_entry.vme_end -
1021 tmp_entry.vme_start);
1022 crypto_backing_offset -= crypto_start;
1023 }
1024 kr = KERN_SUCCESS;
1025
1026 done:
1027 if (map_locked) {
1028 vm_map_unlock(map);
1029 }
1030 return kr;
1031 }
1032 #endif /* CONFIG_CODE_DECRYPTION */
1033
1034
1035 LCK_GRP_DECLARE(vm_map_lck_grp, "vm_map");
1036 LCK_ATTR_DECLARE(vm_map_lck_attr, 0, 0);
1037 LCK_ATTR_DECLARE(vm_map_lck_rw_attr, 0, LCK_ATTR_DEBUG);
1038
1039 #if XNU_TARGET_OS_OSX
1040 int malloc_no_cow = 0;
1041 #else /* XNU_TARGET_OS_OSX */
1042 int malloc_no_cow = 1;
1043 #endif /* XNU_TARGET_OS_OSX */
1044 uint64_t vm_memory_malloc_no_cow_mask = 0ULL;
1045 #if DEBUG
1046 int vm_check_map_sanity = 0;
1047 #endif
1048
1049 /*
1050 * vm_map_init:
1051 *
1052 * Initialize the vm_map module. Must be called before
1053 * any other vm_map routines.
1054 *
1055 * Map and entry structures are allocated from zones -- we must
1056 * initialize those zones.
1057 *
1058 * There are three zones of interest:
1059 *
1060 * vm_map_zone: used to allocate maps.
1061 * vm_map_entry_zone: used to allocate map entries.
1062 *
1063 * LP32:
1064 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
1065 *
1066 * The kernel allocates map entries from a special zone that is initially
1067 * "crammed" with memory. It would be difficult (perhaps impossible) for
1068 * the kernel to allocate more memory to a entry zone when it became
1069 * empty since the very act of allocating memory implies the creation
1070 * of a new entry.
1071 */
1072 __startup_func
1073 void
vm_map_init(void)1074 vm_map_init(void)
1075 {
1076
1077 #if MACH_ASSERT
1078 PE_parse_boot_argn("debug4k_filter", &debug4k_filter,
1079 sizeof(debug4k_filter));
1080 #endif /* MACH_ASSERT */
1081
1082 vm_map_zone = zone_create_ext(VM_MAP_ZONE_NAME, sizeof(struct _vm_map),
1083 VM_MAP_ZFLAGS, ZONE_ID_VM_MAP, NULL);
1084
1085 /*
1086 * Don't quarantine because we always need elements available
1087 * Disallow GC on this zone... to aid the GC.
1088 */
1089 vm_map_entry_zone = zone_create_ext(VM_MAP_ENTRY_ZONE_NAME,
1090 sizeof(struct vm_map_entry), VM_MAP_ENTRY_ZFLAGS,
1091 ZONE_ID_VM_MAP_ENTRY, ^(zone_t z) {
1092 z->z_elems_rsv = (uint16_t)(32 *
1093 (ml_early_cpu_max_number() + 1));
1094 });
1095 #if HAVE_VM_MAP_RESERVED_ENTRY_ZONE
1096 vm_map_entry_reserved_zone = zone_create(VM_MAP_ENTRY_RESERVED_ZONE_NAME,
1097 sizeof(struct vm_map_entry), VM_MAP_ENTRY_RESERVED_ZFLAGS);
1098 #endif /* HAVE_VM_MAP_RESERVED_ENTRY_ZONE */
1099
1100 vm_map_holes_zone = zone_create_ext(VM_MAP_HOLES_ZONE_NAME,
1101 sizeof(struct vm_map_links), VM_MAP_HOLES_ZFLAGS,
1102 ZONE_ID_VM_MAP_HOLES, ^(zone_t z) {
1103 z->z_elems_rsv = (uint16_t)(16 * 1024 / zone_elem_size(z));
1104 });
1105
1106 vm_map_copy_zone = zone_create_ext("VM map copies", sizeof(struct vm_map_copy),
1107 ZC_NOENCRYPT | ZC_CACHING, ZONE_ID_VM_MAP_COPY, NULL);
1108
1109 /*
1110 * Add the stolen memory to zones, adjust zone size and stolen counts.
1111 */
1112 zone_cram_foreign(vm_map_zone, map_data, map_data_size);
1113 zone_cram_foreign(vm_map_entry_zone, kentry_data, kentry_data_size);
1114 zone_cram_foreign(vm_map_holes_zone, map_holes_data, map_holes_data_size);
1115 printf("VM boostrap: %d maps, %d entries and %d holes available\n",
1116 vm_map_zone->z_elems_free,
1117 vm_map_entry_zone->z_elems_free,
1118 vm_map_holes_zone->z_elems_free);
1119
1120 /*
1121 * Since these are covered by zones, remove them from stolen page accounting.
1122 */
1123 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
1124
1125 #if VM_MAP_DEBUG_APPLE_PROTECT
1126 PE_parse_boot_argn("vm_map_debug_apple_protect",
1127 &vm_map_debug_apple_protect,
1128 sizeof(vm_map_debug_apple_protect));
1129 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1130 #if VM_MAP_DEBUG_APPLE_FOURK
1131 PE_parse_boot_argn("vm_map_debug_fourk",
1132 &vm_map_debug_fourk,
1133 sizeof(vm_map_debug_fourk));
1134 #endif /* VM_MAP_DEBUG_FOURK */
1135 PE_parse_boot_argn("vm_map_executable_immutable",
1136 &vm_map_executable_immutable,
1137 sizeof(vm_map_executable_immutable));
1138 PE_parse_boot_argn("vm_map_executable_immutable_verbose",
1139 &vm_map_executable_immutable_verbose,
1140 sizeof(vm_map_executable_immutable_verbose));
1141
1142 PE_parse_boot_argn("malloc_no_cow",
1143 &malloc_no_cow,
1144 sizeof(malloc_no_cow));
1145 if (malloc_no_cow) {
1146 vm_memory_malloc_no_cow_mask = 0ULL;
1147 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC;
1148 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_SMALL;
1149 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_MEDIUM;
1150 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE;
1151 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
1152 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
1153 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_TINY;
1154 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE;
1155 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED;
1156 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_NANO;
1157 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
1158 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
1159 &vm_memory_malloc_no_cow_mask,
1160 sizeof(vm_memory_malloc_no_cow_mask));
1161 }
1162
1163 #if DEBUG
1164 PE_parse_boot_argn("vm_check_map_sanity", &vm_check_map_sanity, sizeof(vm_check_map_sanity));
1165 if (vm_check_map_sanity) {
1166 kprintf("VM sanity checking enabled\n");
1167 } else {
1168 kprintf("VM sanity checking disabled. Set bootarg vm_check_map_sanity=1 to enable\n");
1169 }
1170 #endif /* DEBUG */
1171
1172 #if DEVELOPMENT || DEBUG
1173 PE_parse_boot_argn("panic_on_unsigned_execute",
1174 &panic_on_unsigned_execute,
1175 sizeof(panic_on_unsigned_execute));
1176 PE_parse_boot_argn("panic_on_mlock_failure",
1177 &panic_on_mlock_failure,
1178 sizeof(panic_on_mlock_failure));
1179 #endif /* DEVELOPMENT || DEBUG */
1180 }
1181
1182 __startup_func
1183 static void
vm_map_steal_memory(void)1184 vm_map_steal_memory(void)
1185 {
1186 uint16_t kentry_initial_pages;
1187 uint16_t zone_foreign_pages;
1188 bool overloaded = false;
1189
1190 /*
1191 * 1 page of maps and holes is enough for early boot
1192 *
1193 * Those early crams are only needed to bootstrap zones
1194 * until zone_init() has run (STARTUP_RANK_FIRST of ZALLOC).
1195 * After that point, zones know how to allocate vm map entries,
1196 * holes, and maps.
1197 */
1198 map_data_size = zone_get_foreign_alloc_size(VM_MAP_ZONE_NAME,
1199 sizeof(struct _vm_map), VM_MAP_ZFLAGS, 1);
1200
1201 map_holes_data_size = zone_get_foreign_alloc_size(VM_MAP_HOLES_ZONE_NAME,
1202 sizeof(struct vm_map_links), VM_MAP_HOLES_ZFLAGS, 1);
1203
1204 /*
1205 * kentry_initial_pages corresponds to the number of kernel map entries
1206 * required during bootstrap for the duration of zone_init().
1207 */
1208 #if defined(__LP64__)
1209 kentry_initial_pages = (uint16_t)atop(10 * 4096);
1210 #else
1211 kentry_initial_pages = 6;
1212 #endif
1213
1214 #if CONFIG_GZALLOC
1215 /*
1216 * If using the guard allocator, reserve more memory for the kernel
1217 * reserved map entry pool.
1218 */
1219 if (gzalloc_enabled()) {
1220 kentry_initial_pages *= 100;
1221 overloaded = true;
1222 }
1223 #endif
1224 if (PE_parse_boot_argn("zone_foreign_pages", &zone_foreign_pages,
1225 sizeof(zone_foreign_pages))) {
1226 kentry_initial_pages = zone_foreign_pages;
1227 overloaded = true;
1228 }
1229
1230 kentry_data_size = zone_get_foreign_alloc_size(VM_MAP_ENTRY_ZONE_NAME,
1231 sizeof(struct vm_map_entry), VM_MAP_ENTRY_ZFLAGS,
1232 kentry_initial_pages);
1233
1234 /*
1235 * Steal a contiguous range of memory so that a simple range check
1236 * can validate foreign addresses being freed/crammed to these
1237 * zones
1238 */
1239 vm_size_t total_size;
1240 if (os_add3_overflow(map_data_size, kentry_data_size,
1241 map_holes_data_size, &total_size)) {
1242 panic("vm_map_steal_memory: overflow in amount of memory requested");
1243 }
1244 map_data = zone_foreign_mem_init(total_size, overloaded);
1245 kentry_data = map_data + map_data_size;
1246 map_holes_data = kentry_data + kentry_data_size;
1247 }
1248 STARTUP(PMAP_STEAL, STARTUP_RANK_FIRST, vm_map_steal_memory);
1249
1250 __startup_func
1251 static void
vm_kernel_boostraped(void)1252 vm_kernel_boostraped(void)
1253 {
1254 printf("VM bootstrap done: %d maps, %d entries and %d holes left\n",
1255 vm_map_zone->z_elems_free,
1256 vm_map_entry_zone->z_elems_free,
1257 vm_map_holes_zone->z_elems_free);
1258 }
1259 STARTUP(ZALLOC, STARTUP_RANK_SECOND, vm_kernel_boostraped);
1260
1261 void
vm_map_disable_hole_optimization(vm_map_t map)1262 vm_map_disable_hole_optimization(vm_map_t map)
1263 {
1264 vm_map_entry_t head_entry, hole_entry, next_hole_entry;
1265
1266 if (map->holelistenabled) {
1267 head_entry = hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1268
1269 while (hole_entry != NULL) {
1270 next_hole_entry = hole_entry->vme_next;
1271
1272 hole_entry->vme_next = NULL;
1273 hole_entry->vme_prev = NULL;
1274 zfree(vm_map_holes_zone, hole_entry);
1275
1276 if (next_hole_entry == head_entry) {
1277 hole_entry = NULL;
1278 } else {
1279 hole_entry = next_hole_entry;
1280 }
1281 }
1282
1283 map->holes_list = NULL;
1284 map->holelistenabled = FALSE;
1285
1286 map->first_free = vm_map_first_entry(map);
1287 SAVE_HINT_HOLE_WRITE(map, NULL);
1288 }
1289 }
1290
1291 boolean_t
vm_kernel_map_is_kernel(vm_map_t map)1292 vm_kernel_map_is_kernel(vm_map_t map)
1293 {
1294 return map->pmap == kernel_pmap;
1295 }
1296
1297 /*
1298 * vm_map_create:
1299 *
1300 * Creates and returns a new empty VM map with
1301 * the given physical map structure, and having
1302 * the given lower and upper address bounds.
1303 */
1304
1305 extern vm_map_t vm_map_create_external(
1306 pmap_t pmap,
1307 vm_map_offset_t min_off,
1308 vm_map_offset_t max_off,
1309 boolean_t pageable);
1310
1311 vm_map_t
vm_map_create_external(pmap_t pmap,vm_map_offset_t min,vm_map_offset_t max,boolean_t pageable)1312 vm_map_create_external(
1313 pmap_t pmap,
1314 vm_map_offset_t min,
1315 vm_map_offset_t max,
1316 boolean_t pageable)
1317 {
1318 vm_map_create_options_t options = VM_MAP_CREATE_DEFAULT;
1319
1320 if (pageable) {
1321 options |= VM_MAP_CREATE_PAGEABLE;
1322 }
1323 return vm_map_create_options(pmap, min, max, options);
1324 }
1325
1326 vm_map_t
vm_map_create_options(pmap_t pmap,vm_map_offset_t min,vm_map_offset_t max,vm_map_create_options_t options)1327 vm_map_create_options(
1328 pmap_t pmap,
1329 vm_map_offset_t min,
1330 vm_map_offset_t max,
1331 vm_map_create_options_t options)
1332 {
1333 vm_map_t result;
1334
1335 result = zalloc_flags(vm_map_zone, Z_WAITOK | Z_NOFAIL | Z_ZERO);
1336
1337 vm_map_first_entry(result) = vm_map_to_entry(result);
1338 vm_map_last_entry(result) = vm_map_to_entry(result);
1339
1340 vm_map_store_init(&result->hdr);
1341 result->hdr.entries_pageable = (bool)(options & VM_MAP_CREATE_PAGEABLE);
1342 vm_map_set_page_shift(result, PAGE_SHIFT);
1343
1344 result->size_limit = RLIM_INFINITY; /* default unlimited */
1345 result->data_limit = RLIM_INFINITY; /* default unlimited */
1346 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
1347 os_ref_init_count_raw(&result->map_refcnt, &map_refgrp, 1);
1348 result->pmap = pmap;
1349 result->min_offset = min;
1350 result->max_offset = max;
1351 result->first_free = vm_map_to_entry(result);
1352 result->hint = vm_map_to_entry(result);
1353
1354 if (options & VM_MAP_CREATE_NEVER_FAULTS) {
1355 assert(pmap == kernel_pmap);
1356 result->never_faults = true;
1357 }
1358
1359 /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1360 if (options & VM_MAP_CREATE_CORPSE_FOOTPRINT) {
1361 result->has_corpse_footprint = true;
1362 } else if (startup_phase >= STARTUP_SUB_ZALLOC &&
1363 !(options & VM_MAP_CREATE_DISABLE_HOLELIST)) {
1364 struct vm_map_links *hole_entry = zalloc(vm_map_holes_zone);
1365
1366 hole_entry->start = min;
1367 #if defined(__arm__) || defined(__arm64__)
1368 hole_entry->end = result->max_offset;
1369 #else
1370 hole_entry->end = MAX(max, (vm_map_offset_t)MACH_VM_MAX_ADDRESS);
1371 #endif
1372 result->holes_list = result->hole_hint = hole_entry;
1373 hole_entry->prev = hole_entry->next = CAST_TO_VM_MAP_ENTRY(hole_entry);
1374 result->holelistenabled = true;
1375 }
1376
1377 vm_map_lock_init(result);
1378
1379 return result;
1380 }
1381
1382 vm_map_size_t
vm_map_adjusted_size(vm_map_t map)1383 vm_map_adjusted_size(vm_map_t map)
1384 {
1385 struct vm_reserved_region *regions = NULL;
1386 size_t num_regions = 0;
1387 mach_vm_size_t reserved_size = 0, map_size = 0;
1388
1389 if (map == NULL || (map->size == 0)) {
1390 return 0;
1391 }
1392
1393 map_size = map->size;
1394
1395 if (map->reserved_regions == FALSE || !vm_map_is_exotic(map) || map->terminated) {
1396 /*
1397 * No special reserved regions or not an exotic map or the task
1398 * is terminating and these special regions might have already
1399 * been deallocated.
1400 */
1401 return map_size;
1402 }
1403
1404 num_regions = ml_get_vm_reserved_regions(vm_map_is_64bit(map), ®ions);
1405 assert((num_regions == 0) || (num_regions > 0 && regions != NULL));
1406
1407 while (num_regions) {
1408 reserved_size += regions[--num_regions].vmrr_size;
1409 }
1410
1411 /*
1412 * There are a few places where the map is being switched out due to
1413 * 'termination' without that bit being set (e.g. exec and corpse purging).
1414 * In those cases, we could have the map's regions being deallocated on
1415 * a core while some accounting process is trying to get the map's size.
1416 * So this assert can't be enabled till all those places are uniform in
1417 * their use of the 'map->terminated' bit.
1418 *
1419 * assert(map_size >= reserved_size);
1420 */
1421
1422 return (map_size >= reserved_size) ? (map_size - reserved_size) : map_size;
1423 }
1424
1425 /*
1426 * vm_map_entry_create: [ internal use only ]
1427 *
1428 * Allocates a VM map entry for insertion in the
1429 * given map (or map copy). No fields are filled.
1430 */
1431 #define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
1432
1433 #define vm_map_copy_entry_create(copy, map_locked) \
1434 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1435
1436 static vm_map_entry_t
_vm_map_entry_create(struct vm_map_header * map_header __unused,boolean_t map_locked __unused)1437 _vm_map_entry_create(
1438 struct vm_map_header *map_header __unused,
1439 boolean_t map_locked __unused)
1440 {
1441 vm_map_entry_t entry = NULL;
1442 zone_t zone = vm_map_entry_zone;
1443
1444 assert(map_header->entries_pageable ? !map_locked : TRUE);
1445
1446 #if HAVE_VM_MAP_RESERVED_ENTRY_ZONE
1447 zone_security_flags_t zsflags = zone_security_array[ZONE_ID_VM_MAP_ENTRY];
1448 if (map_header == &zone_submap(zsflags)->hdr) {
1449 /*
1450 * If we are trying to allocate an entry for the submap
1451 * of the vm_map_entry_zone, then this can cause recursive
1452 * locking of this map.
1453 *
1454 * Try to allocate _without blocking_ from this zone,
1455 * but if it is depleted, we need to go to the
1456 * vm_map_entry_reserved_zone which is in the zalloc
1457 * "VM" submap, which can grow without taking any map lock.
1458 *
1459 * Note: the vm_map_entry_zone has a rather high "reserve"
1460 * setup in order to minimize usage of the reserved one.
1461 */
1462 entry = zalloc_flags(vm_map_entry_zone, Z_NOWAIT | Z_ZERO);
1463 zone = vm_map_entry_reserved_zone;
1464 }
1465 #endif
1466 if (entry == NULL) {
1467 entry = zalloc_flags(zone, Z_WAITOK | Z_ZERO);
1468 }
1469
1470 entry->behavior = VM_BEHAVIOR_DEFAULT;
1471 entry->inheritance = VM_INHERIT_DEFAULT;
1472
1473 vm_map_store_update((vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1474 #if MAP_ENTRY_CREATION_DEBUG
1475 entry->vme_creation_maphdr = map_header;
1476 entry->vme_creation_bt = btref_get(__builtin_frame_address(0),
1477 BTREF_GET_NOWAIT);
1478 #endif
1479 return entry;
1480 }
1481
1482 /*
1483 * vm_map_entry_dispose: [ internal use only ]
1484 *
1485 * Inverse of vm_map_entry_create.
1486 *
1487 * write map lock held so no need to
1488 * do anything special to insure correctness
1489 * of the stores
1490 */
1491 #define vm_map_entry_dispose(map, entry) \
1492 _vm_map_entry_dispose(&(map)->hdr, (entry))
1493
1494 #define vm_map_copy_entry_dispose(copy, entry) \
1495 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1496
1497 static void
_vm_map_entry_dispose(struct vm_map_header * map_header __unused,vm_map_entry_t entry)1498 _vm_map_entry_dispose(
1499 struct vm_map_header *map_header __unused,
1500 vm_map_entry_t entry)
1501 {
1502 #if MAP_ENTRY_CREATION_DEBUG
1503 btref_put(entry->vme_creation_bt);
1504 #endif
1505 #if MAP_ENTRY_INSERTION_DEBUG
1506 btref_put(entry->vme_insertion_bt);
1507 #endif
1508 #if HAVE_VM_MAP_RESERVED_ENTRY_ZONE
1509 switch (zone_id_for_native_element(entry, sizeof(*entry))) {
1510 case ZONE_ID_VM_MAP_ENTRY:
1511 case ZONE_ID_INVALID: /* foreign elements are regular entries always */
1512 break;
1513 default:
1514 zfree(vm_map_entry_reserved_zone, entry);
1515 return;
1516 }
1517 #endif /* HAVE_VM_MAP_RESERVED_ENTRY_ZONE */
1518 zfree(vm_map_entry_zone, entry);
1519 }
1520
1521 #if MACH_ASSERT
1522 static boolean_t first_free_check = FALSE;
1523 boolean_t
first_free_is_valid(vm_map_t map)1524 first_free_is_valid(
1525 vm_map_t map)
1526 {
1527 if (!first_free_check) {
1528 return TRUE;
1529 }
1530
1531 return first_free_is_valid_store( map );
1532 }
1533 #endif /* MACH_ASSERT */
1534
1535
1536 #define vm_map_copy_entry_link(copy, after_where, entry) \
1537 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1538
1539 #define vm_map_copy_entry_unlink(copy, entry) \
1540 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1541
1542 /*
1543 * vm_map_destroy:
1544 *
1545 * Actually destroy a map.
1546 */
1547 void
vm_map_destroy(vm_map_t map,int flags)1548 vm_map_destroy(
1549 vm_map_t map,
1550 int flags)
1551 {
1552 vm_map_lock(map);
1553
1554 /* final cleanup: no need to unnest shared region */
1555 flags |= VM_MAP_REMOVE_NO_UNNESTING;
1556 /* final cleanup: ok to remove immutable mappings */
1557 flags |= VM_MAP_REMOVE_IMMUTABLE;
1558 /* final cleanup: allow gaps in range */
1559 flags |= VM_MAP_REMOVE_GAPS_OK;
1560
1561 /* clean up regular map entries */
1562 (void) vm_map_delete(map, map->min_offset, map->max_offset,
1563 flags, VM_MAP_NULL);
1564 /* clean up leftover special mappings (commpage, GPU carveout, etc...) */
1565 #if !defined(__arm__)
1566 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1567 flags, VM_MAP_NULL);
1568 #endif /* !__arm__ */
1569
1570 vm_map_disable_hole_optimization(map);
1571 vm_map_corpse_footprint_destroy(map);
1572
1573 vm_map_unlock(map);
1574
1575 assert(map->hdr.nentries == 0);
1576
1577 if (map->pmap) {
1578 pmap_destroy(map->pmap);
1579 }
1580
1581 #if LOCKS_INDIRECT_ALLOW
1582 if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1583 /*
1584 * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1585 * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1586 * structure or kalloc'ed via lck_mtx_init.
1587 * An example is s_lock_ext within struct _vm_map.
1588 *
1589 * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1590 * can add another tag to detect embedded vs alloc'ed indirect external
1591 * mutexes but that'll be additional checks in the lock path and require
1592 * updating dependencies for the old vs new tag.
1593 *
1594 * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1595 * just when lock debugging is ON, we choose to forego explicitly destroying
1596 * the vm_map mutex and rw lock. Because the vm_map_lck_grp is
1597 * permanent, this has no serious side-effect.
1598 */
1599 } else
1600 #endif /* LOCKS_INDIRECT_ALLOW */
1601 {
1602 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1603 }
1604
1605 zfree(vm_map_zone, map);
1606 }
1607
1608 /*
1609 * Returns pid of the task with the largest number of VM map entries.
1610 * Used in the zone-map-exhaustion jetsam path.
1611 */
1612 pid_t
find_largest_process_vm_map_entries(void)1613 find_largest_process_vm_map_entries(void)
1614 {
1615 pid_t victim_pid = -1;
1616 int max_vm_map_entries = 0;
1617 task_t task = TASK_NULL;
1618 queue_head_t *task_list = &tasks;
1619
1620 lck_mtx_lock(&tasks_threads_lock);
1621 queue_iterate(task_list, task, task_t, tasks) {
1622 if (task == kernel_task || !task->active) {
1623 continue;
1624 }
1625
1626 vm_map_t task_map = task->map;
1627 if (task_map != VM_MAP_NULL) {
1628 int task_vm_map_entries = task_map->hdr.nentries;
1629 if (task_vm_map_entries > max_vm_map_entries) {
1630 max_vm_map_entries = task_vm_map_entries;
1631 victim_pid = pid_from_task(task);
1632 }
1633 }
1634 }
1635 lck_mtx_unlock(&tasks_threads_lock);
1636
1637 printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1638 return victim_pid;
1639 }
1640
1641
1642 /*
1643 * vm_map_lookup_entry: [ internal use only ]
1644 *
1645 * Calls into the vm map store layer to find the map
1646 * entry containing (or immediately preceding) the
1647 * specified address in the given map; the entry is returned
1648 * in the "entry" parameter. The boolean
1649 * result indicates whether the address is
1650 * actually contained in the map.
1651 */
1652 boolean_t
vm_map_lookup_entry(vm_map_t map,vm_map_offset_t address,vm_map_entry_t * entry)1653 vm_map_lookup_entry(
1654 vm_map_t map,
1655 vm_map_offset_t address,
1656 vm_map_entry_t *entry) /* OUT */
1657 {
1658 #if CONFIG_KERNEL_TBI
1659 if (VM_KERNEL_ADDRESS(address)) {
1660 address = VM_KERNEL_STRIP_UPTR(address);
1661 }
1662 #endif /* CONFIG_KERNEL_TBI */
1663 #if CONFIG_PROB_GZALLOC
1664 if (map->pmap == kernel_pmap) {
1665 assertf(!pgz_owned(address),
1666 "it is the responsibility of callers to unguard PGZ addresses");
1667 }
1668 #endif /* CONFIG_PROB_GZALLOC */
1669 return vm_map_store_lookup_entry( map, address, entry );
1670 }
1671
1672 #if CONFIG_PROB_GZALLOC
1673 boolean_t
vm_map_lookup_entry_allow_pgz(vm_map_t map,vm_map_offset_t address,vm_map_entry_t * entry)1674 vm_map_lookup_entry_allow_pgz(
1675 vm_map_t map,
1676 vm_map_offset_t address,
1677 vm_map_entry_t *entry) /* OUT */
1678 {
1679 #if CONFIG_KERNEL_TBI
1680 if (VM_KERNEL_ADDRESS(address)) {
1681 address = VM_KERNEL_STRIP_UPTR(address);
1682 }
1683 #endif /* CONFIG_KERNEL_TBI */
1684 return vm_map_store_lookup_entry( map, address, entry );
1685 }
1686 #endif /* CONFIG_PROB_GZALLOC */
1687
1688
1689 /*
1690 * Routine: vm_map_find_space
1691 * Purpose:
1692 * Allocate a range in the specified virtual address map,
1693 * returning the entry allocated for that range.
1694 * Used by kmem_alloc, etc.
1695 *
1696 * The map must be NOT be locked. It will be returned locked
1697 * on KERN_SUCCESS, unlocked on failure.
1698 *
1699 * If an entry is allocated, the object/offset fields
1700 * are initialized to zero.
1701 */
1702 kern_return_t
vm_map_find_space(vm_map_t map,vm_map_offset_t * address,vm_map_size_t size,vm_map_offset_t mask,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,vm_map_entry_t * o_entry)1703 vm_map_find_space(
1704 vm_map_t map,
1705 vm_map_offset_t *address, /* OUT */
1706 vm_map_size_t size,
1707 vm_map_offset_t mask,
1708 vm_map_kernel_flags_t vmk_flags,
1709 vm_tag_t tag,
1710 vm_map_entry_t *o_entry) /* OUT */
1711 {
1712 vm_map_entry_t entry, new_entry, hole_entry;
1713 vm_map_offset_t start;
1714 vm_map_offset_t end;
1715
1716 if (size == 0) {
1717 *address = 0;
1718 return KERN_INVALID_ARGUMENT;
1719 }
1720
1721 new_entry = vm_map_entry_create(map, FALSE);
1722 vm_map_lock(map);
1723
1724 if (vmk_flags.vmkf_last_free) {
1725 assert(!map->disable_vmentry_reuse);
1726 /* TODO: Make backward lookup generic and support guard pages */
1727 assert(!vmk_flags.vmkf_guard_after && !vmk_flags.vmkf_guard_before);
1728 assert(VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map)));
1729
1730 /* Allocate space from end of map */
1731 vm_map_store_find_last_free(map, &entry);
1732
1733 if (!entry) {
1734 goto noSpace;
1735 }
1736
1737 if (entry == vm_map_to_entry(map)) {
1738 end = map->max_offset;
1739 } else {
1740 end = entry->vme_start;
1741 }
1742
1743 while (TRUE) {
1744 vm_map_entry_t prev;
1745
1746 start = end - size;
1747
1748 if ((start < map->min_offset) || end < start) {
1749 goto noSpace;
1750 }
1751
1752 prev = entry->vme_prev;
1753 entry = prev;
1754
1755 if (prev == vm_map_to_entry(map)) {
1756 break;
1757 }
1758
1759 if (prev->vme_end <= start) {
1760 break;
1761 }
1762
1763 /*
1764 * Didn't fit -- move to the next entry.
1765 */
1766
1767 end = entry->vme_start;
1768 }
1769 } else {
1770 if (vmk_flags.vmkf_guard_after) {
1771 /* account for the back guard page in the size */
1772 size += VM_MAP_PAGE_SIZE(map);
1773 }
1774
1775 /*
1776 * Look for the first possible address; if there's already
1777 * something at this address, we have to start after it.
1778 */
1779
1780 if (map->disable_vmentry_reuse == TRUE) {
1781 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1782 } else {
1783 if (map->holelistenabled) {
1784 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1785
1786 if (hole_entry == NULL) {
1787 /*
1788 * No more space in the map?
1789 */
1790 goto noSpace;
1791 }
1792
1793 entry = hole_entry;
1794 start = entry->vme_start;
1795 } else {
1796 assert(first_free_is_valid(map));
1797 if ((entry = map->first_free) == vm_map_to_entry(map)) {
1798 start = map->min_offset;
1799 } else {
1800 start = entry->vme_end;
1801 }
1802 }
1803 }
1804
1805 /*
1806 * In any case, the "entry" always precedes
1807 * the proposed new region throughout the loop:
1808 */
1809
1810 while (TRUE) {
1811 vm_map_entry_t next;
1812
1813 /*
1814 * Find the end of the proposed new region.
1815 * Be sure we didn't go beyond the end, or
1816 * wrap around the address.
1817 */
1818
1819 if (vmk_flags.vmkf_guard_before) {
1820 /* reserve space for the front guard page */
1821 start += VM_MAP_PAGE_SIZE(map);
1822 }
1823 end = ((start + mask) & ~mask);
1824
1825 if (end < start) {
1826 goto noSpace;
1827 }
1828 start = end;
1829 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
1830 end += size;
1831 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
1832
1833 if ((end > map->max_offset) || (end < start)) {
1834 goto noSpace;
1835 }
1836
1837 next = entry->vme_next;
1838
1839 if (map->holelistenabled) {
1840 if (entry->vme_end >= end) {
1841 break;
1842 }
1843 } else {
1844 /*
1845 * If there are no more entries, we must win.
1846 *
1847 * OR
1848 *
1849 * If there is another entry, it must be
1850 * after the end of the potential new region.
1851 */
1852
1853 if (next == vm_map_to_entry(map)) {
1854 break;
1855 }
1856
1857 if (next->vme_start >= end) {
1858 break;
1859 }
1860 }
1861
1862 /*
1863 * Didn't fit -- move to the next entry.
1864 */
1865
1866 entry = next;
1867
1868 if (map->holelistenabled) {
1869 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
1870 /*
1871 * Wrapped around
1872 */
1873 goto noSpace;
1874 }
1875 start = entry->vme_start;
1876 } else {
1877 start = entry->vme_end;
1878 }
1879 }
1880
1881 if (vmk_flags.vmkf_guard_before) {
1882 /* go back for the front guard page */
1883 start -= VM_MAP_PAGE_SIZE(map);
1884 }
1885 }
1886
1887 if (map->holelistenabled) {
1888 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1889 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.", entry, (unsigned long long)entry->vme_start);
1890 }
1891 }
1892
1893 /*
1894 * At this point,
1895 * "start" and "end" should define the endpoints of the
1896 * available new range, and
1897 * "entry" should refer to the region before the new
1898 * range, and
1899 *
1900 * the map should be locked.
1901 */
1902
1903 *address = start;
1904
1905 assert(start < end);
1906 new_entry->vme_start = start;
1907 new_entry->vme_end = end;
1908 assert(page_aligned(new_entry->vme_start));
1909 assert(page_aligned(new_entry->vme_end));
1910 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1911 VM_MAP_PAGE_MASK(map)));
1912 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1913 VM_MAP_PAGE_MASK(map)));
1914
1915 new_entry->is_shared = FALSE;
1916 new_entry->is_sub_map = FALSE;
1917 new_entry->use_pmap = TRUE;
1918 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1919 VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1920
1921 new_entry->needs_copy = FALSE;
1922
1923 new_entry->inheritance = VM_INHERIT_DEFAULT;
1924 new_entry->protection = VM_PROT_DEFAULT;
1925 new_entry->max_protection = VM_PROT_ALL;
1926 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1927 new_entry->wired_count = 0;
1928 new_entry->user_wired_count = 0;
1929
1930 new_entry->in_transition = FALSE;
1931 new_entry->needs_wakeup = FALSE;
1932 new_entry->no_cache = FALSE;
1933 new_entry->permanent = FALSE;
1934 new_entry->superpage_size = FALSE;
1935 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1936 new_entry->map_aligned = TRUE;
1937 } else {
1938 new_entry->map_aligned = FALSE;
1939 }
1940
1941 new_entry->used_for_jit = FALSE;
1942 new_entry->pmap_cs_associated = FALSE;
1943 new_entry->zero_wired_pages = FALSE;
1944 new_entry->iokit_acct = FALSE;
1945 new_entry->vme_resilient_codesign = FALSE;
1946 new_entry->vme_resilient_media = FALSE;
1947 if (vmk_flags.vmkf_atomic_entry) {
1948 new_entry->vme_atomic = TRUE;
1949 } else {
1950 new_entry->vme_atomic = FALSE;
1951 }
1952
1953 VME_ALIAS_SET(new_entry, tag);
1954
1955 /*
1956 * Insert the new entry into the list
1957 */
1958
1959 vm_map_store_entry_link(map, entry, new_entry, VM_MAP_KERNEL_FLAGS_NONE);
1960
1961 map->size += size;
1962
1963 /*
1964 * Update the lookup hint
1965 */
1966 SAVE_HINT_MAP_WRITE(map, new_entry);
1967
1968 *o_entry = new_entry;
1969 return KERN_SUCCESS;
1970
1971 noSpace:
1972
1973 vm_map_entry_dispose(map, new_entry);
1974 vm_map_unlock(map);
1975 return KERN_NO_SPACE;
1976 }
1977
1978 int vm_map_pmap_enter_print = FALSE;
1979 int vm_map_pmap_enter_enable = FALSE;
1980
1981 /*
1982 * Routine: vm_map_pmap_enter [internal only]
1983 *
1984 * Description:
1985 * Force pages from the specified object to be entered into
1986 * the pmap at the specified address if they are present.
1987 * As soon as a page not found in the object the scan ends.
1988 *
1989 * Returns:
1990 * Nothing.
1991 *
1992 * In/out conditions:
1993 * The source map should not be locked on entry.
1994 */
1995 __unused static void
vm_map_pmap_enter(vm_map_t map,vm_map_offset_t addr,vm_map_offset_t end_addr,vm_object_t object,vm_object_offset_t offset,vm_prot_t protection)1996 vm_map_pmap_enter(
1997 vm_map_t map,
1998 vm_map_offset_t addr,
1999 vm_map_offset_t end_addr,
2000 vm_object_t object,
2001 vm_object_offset_t offset,
2002 vm_prot_t protection)
2003 {
2004 int type_of_fault;
2005 kern_return_t kr;
2006 struct vm_object_fault_info fault_info = {};
2007
2008 if (map->pmap == 0) {
2009 return;
2010 }
2011
2012 assert(VM_MAP_PAGE_SHIFT(map) == PAGE_SHIFT);
2013
2014 while (addr < end_addr) {
2015 vm_page_t m;
2016
2017
2018 /*
2019 * TODO:
2020 * From vm_map_enter(), we come into this function without the map
2021 * lock held or the object lock held.
2022 * We haven't taken a reference on the object either.
2023 * We should do a proper lookup on the map to make sure
2024 * that things are sane before we go locking objects that
2025 * could have been deallocated from under us.
2026 */
2027
2028 vm_object_lock(object);
2029
2030 m = vm_page_lookup(object, offset);
2031
2032 if (m == VM_PAGE_NULL || m->vmp_busy || m->vmp_fictitious ||
2033 (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_absent))) {
2034 vm_object_unlock(object);
2035 return;
2036 }
2037
2038 if (vm_map_pmap_enter_print) {
2039 printf("vm_map_pmap_enter:");
2040 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
2041 map, (unsigned long long)addr, object, (unsigned long long)offset);
2042 }
2043 type_of_fault = DBG_CACHE_HIT_FAULT;
2044 kr = vm_fault_enter(m, map->pmap,
2045 addr,
2046 PAGE_SIZE, 0,
2047 protection, protection,
2048 VM_PAGE_WIRED(m),
2049 FALSE, /* change_wiring */
2050 VM_KERN_MEMORY_NONE, /* tag - not wiring */
2051 &fault_info,
2052 NULL, /* need_retry */
2053 &type_of_fault);
2054
2055 vm_object_unlock(object);
2056
2057 offset += PAGE_SIZE_64;
2058 addr += PAGE_SIZE;
2059 }
2060 }
2061
2062 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
2063 kern_return_t
vm_map_random_address_for_size(vm_map_t map,vm_map_offset_t * address,vm_map_size_t size)2064 vm_map_random_address_for_size(
2065 vm_map_t map,
2066 vm_map_offset_t *address,
2067 vm_map_size_t size)
2068 {
2069 kern_return_t kr = KERN_SUCCESS;
2070 int tries = 0;
2071 vm_map_offset_t random_addr = 0;
2072 vm_map_offset_t hole_end;
2073
2074 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
2075 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
2076 vm_map_size_t vm_hole_size = 0;
2077 vm_map_size_t addr_space_size;
2078
2079 addr_space_size = vm_map_max(map) - vm_map_min(map);
2080
2081 assert(VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map)));
2082
2083 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2084 if (startup_phase < STARTUP_SUB_ZALLOC) {
2085 random_addr = (vm_map_offset_t)early_random();
2086 } else {
2087 random_addr = (vm_map_offset_t)random();
2088 }
2089 random_addr <<= VM_MAP_PAGE_SHIFT(map);
2090 random_addr = vm_map_trunc_page(
2091 vm_map_min(map) + (random_addr % addr_space_size),
2092 VM_MAP_PAGE_MASK(map));
2093
2094 #if CONFIG_PROB_GZALLOC
2095 if (map->pmap == kernel_pmap && pgz_owned(random_addr)) {
2096 continue;
2097 }
2098 #endif /* CONFIG_PROB_GZALLOC */
2099
2100 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
2101 if (prev_entry == vm_map_to_entry(map)) {
2102 next_entry = vm_map_first_entry(map);
2103 } else {
2104 next_entry = prev_entry->vme_next;
2105 }
2106 if (next_entry == vm_map_to_entry(map)) {
2107 hole_end = vm_map_max(map);
2108 } else {
2109 hole_end = next_entry->vme_start;
2110 }
2111 vm_hole_size = hole_end - random_addr;
2112 if (vm_hole_size >= size) {
2113 *address = random_addr;
2114 break;
2115 }
2116 }
2117 tries++;
2118 }
2119
2120 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2121 kr = KERN_NO_SPACE;
2122 }
2123 return kr;
2124 }
2125
2126 static boolean_t
vm_memory_malloc_no_cow(int alias)2127 vm_memory_malloc_no_cow(
2128 int alias)
2129 {
2130 uint64_t alias_mask;
2131
2132 if (alias > 63) {
2133 return FALSE;
2134 }
2135
2136 alias_mask = 1ULL << alias;
2137 if (alias_mask & vm_memory_malloc_no_cow_mask) {
2138 return TRUE;
2139 }
2140 return FALSE;
2141 }
2142
2143 uint64_t vm_map_enter_RLIMIT_AS_count = 0;
2144 uint64_t vm_map_enter_RLIMIT_DATA_count = 0;
2145 /*
2146 * Routine: vm_map_enter
2147 *
2148 * Description:
2149 * Allocate a range in the specified virtual address map.
2150 * The resulting range will refer to memory defined by
2151 * the given memory object and offset into that object.
2152 *
2153 * Arguments are as defined in the vm_map call.
2154 */
2155 static unsigned int vm_map_enter_restore_successes = 0;
2156 static unsigned int vm_map_enter_restore_failures = 0;
2157 kern_return_t
vm_map_enter(vm_map_t map,vm_map_offset_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t alias,vm_object_t object,vm_object_offset_t offset,boolean_t needs_copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)2158 vm_map_enter(
2159 vm_map_t map,
2160 vm_map_offset_t *address, /* IN/OUT */
2161 vm_map_size_t size,
2162 vm_map_offset_t mask,
2163 int flags,
2164 vm_map_kernel_flags_t vmk_flags,
2165 vm_tag_t alias,
2166 vm_object_t object,
2167 vm_object_offset_t offset,
2168 boolean_t needs_copy,
2169 vm_prot_t cur_protection,
2170 vm_prot_t max_protection,
2171 vm_inherit_t inheritance)
2172 {
2173 vm_map_entry_t entry, new_entry;
2174 vm_map_offset_t start, tmp_start, tmp_offset;
2175 vm_map_offset_t end, tmp_end;
2176 vm_map_offset_t tmp2_start, tmp2_end;
2177 vm_map_offset_t desired_empty_end;
2178 vm_map_offset_t step;
2179 kern_return_t result = KERN_SUCCESS;
2180 vm_map_t zap_old_map = VM_MAP_NULL;
2181 vm_map_t zap_new_map = VM_MAP_NULL;
2182 boolean_t map_locked = FALSE;
2183 boolean_t pmap_empty = TRUE;
2184 boolean_t new_mapping_established = FALSE;
2185 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
2186 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
2187 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
2188 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2189 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
2190 boolean_t is_submap = vmk_flags.vmkf_submap;
2191 boolean_t permanent = (((flags & VM_FLAGS_PERMANENT) != 0) || vmk_flags.vmkf_permanent);
2192 boolean_t no_copy_on_read = vmk_flags.vmkf_no_copy_on_read;
2193 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
2194 boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
2195 boolean_t translated_allow_execute = vmk_flags.vmkf_translated_allow_execute;
2196 boolean_t resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
2197 boolean_t resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
2198 boolean_t random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
2199 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
2200 vm_tag_t user_alias;
2201 vm_map_offset_t effective_min_offset, effective_max_offset;
2202 kern_return_t kr;
2203 boolean_t clear_map_aligned = FALSE;
2204 vm_map_entry_t hole_entry;
2205 vm_map_size_t chunk_size = 0;
2206 vm_object_t caller_object;
2207
2208 caller_object = object;
2209
2210 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2211
2212 if (flags & VM_FLAGS_4GB_CHUNK) {
2213 #if defined(__LP64__)
2214 chunk_size = (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2215 #else /* __LP64__ */
2216 chunk_size = ANON_CHUNK_SIZE;
2217 #endif /* __LP64__ */
2218 } else {
2219 chunk_size = ANON_CHUNK_SIZE;
2220 }
2221
2222 if (superpage_size) {
2223 switch (superpage_size) {
2224 /*
2225 * Note that the current implementation only supports
2226 * a single size for superpages, SUPERPAGE_SIZE, per
2227 * architecture. As soon as more sizes are supposed
2228 * to be supported, SUPERPAGE_SIZE has to be replaced
2229 * with a lookup of the size depending on superpage_size.
2230 */
2231 #ifdef __x86_64__
2232 case SUPERPAGE_SIZE_ANY:
2233 /* handle it like 2 MB and round up to page size */
2234 size = (size + 2 * 1024 * 1024 - 1) & ~(2 * 1024 * 1024 - 1);
2235 OS_FALLTHROUGH;
2236 case SUPERPAGE_SIZE_2MB:
2237 break;
2238 #endif
2239 default:
2240 return KERN_INVALID_ARGUMENT;
2241 }
2242 mask = SUPERPAGE_SIZE - 1;
2243 if (size & (SUPERPAGE_SIZE - 1)) {
2244 return KERN_INVALID_ARGUMENT;
2245 }
2246 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
2247 }
2248
2249
2250 if ((cur_protection & VM_PROT_WRITE) &&
2251 (cur_protection & VM_PROT_EXECUTE) &&
2252 #if XNU_TARGET_OS_OSX
2253 map->pmap != kernel_pmap &&
2254 (cs_process_global_enforcement() ||
2255 (vmk_flags.vmkf_cs_enforcement_override
2256 ? vmk_flags.vmkf_cs_enforcement
2257 : (vm_map_cs_enforcement(map)
2258 #if __arm64__
2259 || !VM_MAP_IS_EXOTIC(map)
2260 #endif /* __arm64__ */
2261 ))) &&
2262 #endif /* XNU_TARGET_OS_OSX */
2263 (VM_MAP_POLICY_WX_FAIL(map) ||
2264 VM_MAP_POLICY_WX_STRIP_X(map)) &&
2265 !entry_for_jit) {
2266 boolean_t vm_protect_wx_fail = VM_MAP_POLICY_WX_FAIL(map);
2267
2268 DTRACE_VM3(cs_wx,
2269 uint64_t, 0,
2270 uint64_t, 0,
2271 vm_prot_t, cur_protection);
2272 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. %s\n",
2273 proc_selfpid(),
2274 (current_task()->bsd_info
2275 ? proc_name_address(current_task()->bsd_info)
2276 : "?"),
2277 __FUNCTION__,
2278 (vm_protect_wx_fail ? "failing" : "turning off execute"));
2279 cur_protection &= ~VM_PROT_EXECUTE;
2280 if (vm_protect_wx_fail) {
2281 return KERN_PROTECTION_FAILURE;
2282 }
2283 }
2284
2285 /*
2286 * If the task has requested executable lockdown,
2287 * deny any new executable mapping.
2288 */
2289 if (map->map_disallow_new_exec == TRUE) {
2290 if (cur_protection & VM_PROT_EXECUTE) {
2291 return KERN_PROTECTION_FAILURE;
2292 }
2293 }
2294
2295 if (resilient_codesign) {
2296 assert(!is_submap);
2297 int reject_prot = (needs_copy ? VM_PROT_ALLEXEC : (VM_PROT_WRITE | VM_PROT_ALLEXEC));
2298 if ((cur_protection | max_protection) & reject_prot) {
2299 return KERN_PROTECTION_FAILURE;
2300 }
2301 }
2302
2303 if (resilient_media) {
2304 assert(!is_submap);
2305 // assert(!needs_copy);
2306 if (object != VM_OBJECT_NULL &&
2307 !object->internal) {
2308 /*
2309 * This mapping is directly backed by an external
2310 * memory manager (e.g. a vnode pager for a file):
2311 * we would not have any safe place to inject
2312 * a zero-filled page if an actual page is not
2313 * available, without possibly impacting the actual
2314 * contents of the mapped object (e.g. the file),
2315 * so we can't provide any media resiliency here.
2316 */
2317 return KERN_INVALID_ARGUMENT;
2318 }
2319 }
2320
2321 if (is_submap) {
2322 if (purgable) {
2323 /* submaps can not be purgeable */
2324 return KERN_INVALID_ARGUMENT;
2325 }
2326 if (object == VM_OBJECT_NULL) {
2327 /* submaps can not be created lazily */
2328 return KERN_INVALID_ARGUMENT;
2329 }
2330 }
2331 if (vmk_flags.vmkf_already) {
2332 /*
2333 * VM_FLAGS_ALREADY says that it's OK if the same mapping
2334 * is already present. For it to be meaningul, the requested
2335 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2336 * we shouldn't try and remove what was mapped there first
2337 * (!VM_FLAGS_OVERWRITE).
2338 */
2339 if ((flags & VM_FLAGS_ANYWHERE) ||
2340 (flags & VM_FLAGS_OVERWRITE)) {
2341 return KERN_INVALID_ARGUMENT;
2342 }
2343 }
2344
2345 effective_min_offset = map->min_offset;
2346
2347 if (vmk_flags.vmkf_beyond_max) {
2348 /*
2349 * Allow an insertion beyond the map's max offset.
2350 */
2351 #if !defined(__arm__)
2352 if (vm_map_is_64bit(map)) {
2353 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
2354 } else
2355 #endif /* __arm__ */
2356 effective_max_offset = 0x00000000FFFFF000ULL;
2357 } else {
2358 #if XNU_TARGET_OS_OSX
2359 if (__improbable(vmk_flags.vmkf_32bit_map_va)) {
2360 effective_max_offset = MIN(map->max_offset, 0x00000000FFFFF000ULL);
2361 } else {
2362 effective_max_offset = map->max_offset;
2363 }
2364 #else /* XNU_TARGET_OS_OSX */
2365 effective_max_offset = map->max_offset;
2366 #endif /* XNU_TARGET_OS_OSX */
2367 }
2368
2369 if (size == 0 ||
2370 (offset & MIN(VM_MAP_PAGE_MASK(map), PAGE_MASK_64)) != 0) {
2371 *address = 0;
2372 return KERN_INVALID_ARGUMENT;
2373 }
2374
2375 if (map->pmap == kernel_pmap) {
2376 user_alias = VM_KERN_MEMORY_NONE;
2377 } else {
2378 user_alias = alias;
2379 }
2380
2381 if (user_alias == VM_MEMORY_MALLOC_MEDIUM) {
2382 chunk_size = MALLOC_MEDIUM_CHUNK_SIZE;
2383 }
2384
2385 #define RETURN(value) { result = value; goto BailOut; }
2386
2387 assertf(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK), "0x%llx", (uint64_t)*address);
2388 assertf(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK), "0x%llx", (uint64_t)size);
2389 if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK) {
2390 assertf(page_aligned(*address), "0x%llx", (uint64_t)*address);
2391 assertf(page_aligned(size), "0x%llx", (uint64_t)size);
2392 }
2393
2394 if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK &&
2395 !VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
2396 /*
2397 * In most cases, the caller rounds the size up to the
2398 * map's page size.
2399 * If we get a size that is explicitly not map-aligned here,
2400 * we'll have to respect the caller's wish and mark the
2401 * mapping as "not map-aligned" to avoid tripping the
2402 * map alignment checks later.
2403 */
2404 clear_map_aligned = TRUE;
2405 }
2406 if (!anywhere &&
2407 VM_MAP_PAGE_MASK(map) >= PAGE_MASK &&
2408 !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2409 /*
2410 * We've been asked to map at a fixed address and that
2411 * address is not aligned to the map's specific alignment.
2412 * The caller should know what it's doing (i.e. most likely
2413 * mapping some fragmented copy map, transferring memory from
2414 * a VM map with a different alignment), so clear map_aligned
2415 * for this new VM map entry and proceed.
2416 */
2417 clear_map_aligned = TRUE;
2418 }
2419
2420 /*
2421 * Only zero-fill objects are allowed to be purgable.
2422 * LP64todo - limit purgable objects to 32-bits for now
2423 */
2424 if (purgable &&
2425 (offset != 0 ||
2426 (object != VM_OBJECT_NULL &&
2427 (object->vo_size != size ||
2428 object->purgable == VM_PURGABLE_DENY))
2429 || size > ANON_MAX_SIZE)) { /* LP64todo: remove when dp capable */
2430 return KERN_INVALID_ARGUMENT;
2431 }
2432
2433 if (!anywhere && overwrite) {
2434 /*
2435 * Create a temporary VM map to hold the old mappings in the
2436 * affected area while we create the new one.
2437 * This avoids releasing the VM map lock in
2438 * vm_map_entry_delete() and allows atomicity
2439 * when we want to replace some mappings with a new one.
2440 * It also allows us to restore the old VM mappings if the
2441 * new mapping fails.
2442 */
2443 zap_old_map = vm_map_create_options(PMAP_NULL,
2444 *address,
2445 *address + size,
2446 VM_MAP_CREATE_ZAP_OPTIONS(map));
2447 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
2448 }
2449
2450 StartAgain:;
2451
2452 start = *address;
2453
2454 if (anywhere) {
2455 vm_map_lock(map);
2456 map_locked = TRUE;
2457
2458 if (entry_for_jit) {
2459 if (map->jit_entry_exists &&
2460 !VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map)) {
2461 result = KERN_INVALID_ARGUMENT;
2462 goto BailOut;
2463 }
2464 if (VM_MAP_POLICY_ALLOW_JIT_RANDOM_ADDRESS(map)) {
2465 random_address = TRUE;
2466 }
2467 }
2468
2469 if (random_address) {
2470 /*
2471 * Get a random start address.
2472 */
2473 result = vm_map_random_address_for_size(map, address, size);
2474 if (result != KERN_SUCCESS) {
2475 goto BailOut;
2476 }
2477 start = *address;
2478 }
2479 #if XNU_TARGET_OS_OSX
2480 else if ((start == 0 || start == vm_map_min(map)) &&
2481 !map->disable_vmentry_reuse &&
2482 map->vmmap_high_start != 0) {
2483 start = map->vmmap_high_start;
2484 }
2485 #endif /* XNU_TARGET_OS_OSX */
2486
2487
2488 /*
2489 * Calculate the first possible address.
2490 */
2491
2492 if (start < effective_min_offset) {
2493 start = effective_min_offset;
2494 }
2495 if (start > effective_max_offset) {
2496 RETURN(KERN_NO_SPACE);
2497 }
2498
2499 /*
2500 * Look for the first possible address;
2501 * if there's already something at this
2502 * address, we have to start after it.
2503 */
2504
2505 if (map->disable_vmentry_reuse == TRUE) {
2506 VM_MAP_HIGHEST_ENTRY(map, entry, start);
2507 } else {
2508 if (map->holelistenabled) {
2509 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
2510
2511 if (hole_entry == NULL) {
2512 /*
2513 * No more space in the map?
2514 */
2515 result = KERN_NO_SPACE;
2516 goto BailOut;
2517 } else {
2518 boolean_t found_hole = FALSE;
2519
2520 do {
2521 if (hole_entry->vme_start >= start) {
2522 start = hole_entry->vme_start;
2523 found_hole = TRUE;
2524 break;
2525 }
2526
2527 if (hole_entry->vme_end > start) {
2528 found_hole = TRUE;
2529 break;
2530 }
2531 hole_entry = hole_entry->vme_next;
2532 } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
2533
2534 if (found_hole == FALSE) {
2535 result = KERN_NO_SPACE;
2536 goto BailOut;
2537 }
2538
2539 entry = hole_entry;
2540
2541 if (start == 0) {
2542 start += PAGE_SIZE_64;
2543 }
2544 }
2545 } else {
2546 assert(first_free_is_valid(map));
2547
2548 entry = map->first_free;
2549
2550 if (entry == vm_map_to_entry(map)) {
2551 entry = NULL;
2552 } else {
2553 if (entry->vme_next == vm_map_to_entry(map)) {
2554 /*
2555 * Hole at the end of the map.
2556 */
2557 entry = NULL;
2558 } else {
2559 if (start < (entry->vme_next)->vme_start) {
2560 start = entry->vme_end;
2561 start = vm_map_round_page(start,
2562 VM_MAP_PAGE_MASK(map));
2563 } else {
2564 /*
2565 * Need to do a lookup.
2566 */
2567 entry = NULL;
2568 }
2569 }
2570 }
2571
2572 if (entry == NULL) {
2573 vm_map_entry_t tmp_entry;
2574 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2575 assert(!entry_for_jit);
2576 start = tmp_entry->vme_end;
2577 start = vm_map_round_page(start,
2578 VM_MAP_PAGE_MASK(map));
2579 }
2580 entry = tmp_entry;
2581 }
2582 }
2583 }
2584
2585 /*
2586 * In any case, the "entry" always precedes
2587 * the proposed new region throughout the
2588 * loop:
2589 */
2590
2591 while (TRUE) {
2592 vm_map_entry_t next;
2593
2594 /*
2595 * Find the end of the proposed new region.
2596 * Be sure we didn't go beyond the end, or
2597 * wrap around the address.
2598 */
2599
2600 end = ((start + mask) & ~mask);
2601 end = vm_map_round_page(end,
2602 VM_MAP_PAGE_MASK(map));
2603 if (end < start) {
2604 RETURN(KERN_NO_SPACE);
2605 }
2606 start = end;
2607 assert(VM_MAP_PAGE_ALIGNED(start,
2608 VM_MAP_PAGE_MASK(map)));
2609 end += size;
2610
2611 /* We want an entire page of empty space, but don't increase the allocation size. */
2612 desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
2613
2614 if ((desired_empty_end > effective_max_offset) || (desired_empty_end < start)) {
2615 if (map->wait_for_space) {
2616 assert(!keep_map_locked);
2617 if (size <= (effective_max_offset -
2618 effective_min_offset)) {
2619 assert_wait((event_t)map,
2620 THREAD_ABORTSAFE);
2621 vm_map_unlock(map);
2622 map_locked = FALSE;
2623 thread_block(THREAD_CONTINUE_NULL);
2624 goto StartAgain;
2625 }
2626 }
2627 RETURN(KERN_NO_SPACE);
2628 }
2629
2630 next = entry->vme_next;
2631
2632 if (map->holelistenabled) {
2633 if (entry->vme_end >= desired_empty_end) {
2634 break;
2635 }
2636 } else {
2637 /*
2638 * If there are no more entries, we must win.
2639 *
2640 * OR
2641 *
2642 * If there is another entry, it must be
2643 * after the end of the potential new region.
2644 */
2645
2646 if (next == vm_map_to_entry(map)) {
2647 break;
2648 }
2649
2650 if (next->vme_start >= desired_empty_end) {
2651 break;
2652 }
2653 }
2654
2655 /*
2656 * Didn't fit -- move to the next entry.
2657 */
2658
2659 entry = next;
2660
2661 if (map->holelistenabled) {
2662 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
2663 /*
2664 * Wrapped around
2665 */
2666 result = KERN_NO_SPACE;
2667 goto BailOut;
2668 }
2669 start = entry->vme_start;
2670 } else {
2671 start = entry->vme_end;
2672 }
2673
2674 start = vm_map_round_page(start,
2675 VM_MAP_PAGE_MASK(map));
2676 }
2677
2678 if (map->holelistenabled) {
2679 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2680 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.", entry, (unsigned long long)entry->vme_start);
2681 }
2682 }
2683
2684 *address = start;
2685 assert(VM_MAP_PAGE_ALIGNED(*address,
2686 VM_MAP_PAGE_MASK(map)));
2687 } else {
2688 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT &&
2689 !overwrite &&
2690 user_alias == VM_MEMORY_REALLOC) {
2691 /*
2692 * Force realloc() to switch to a new allocation,
2693 * to prevent 4k-fragmented virtual ranges.
2694 */
2695 // DEBUG4K_ERROR("no realloc in place");
2696 return KERN_NO_SPACE;
2697 }
2698
2699 /*
2700 * Verify that:
2701 * the address doesn't itself violate
2702 * the mask requirement.
2703 */
2704
2705 vm_map_lock(map);
2706 map_locked = TRUE;
2707 if ((start & mask) != 0) {
2708 RETURN(KERN_NO_SPACE);
2709 }
2710
2711 /*
2712 * ... the address is within bounds
2713 */
2714
2715 end = start + size;
2716
2717 if ((start < effective_min_offset) ||
2718 (end > effective_max_offset) ||
2719 (start >= end)) {
2720 RETURN(KERN_INVALID_ADDRESS);
2721 }
2722
2723 if (overwrite && zap_old_map != VM_MAP_NULL) {
2724 int remove_flags;
2725 /*
2726 * Fixed mapping and "overwrite" flag: attempt to
2727 * remove all existing mappings in the specified
2728 * address range, saving them in our "zap_old_map".
2729 */
2730 remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES;
2731 remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
2732 if (vmk_flags.vmkf_overwrite_immutable) {
2733 /* we can overwrite immutable mappings */
2734 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2735 }
2736 (void) vm_map_delete(map, start, end,
2737 remove_flags,
2738 zap_old_map);
2739 }
2740
2741 /*
2742 * ... the starting address isn't allocated
2743 */
2744
2745 if (vm_map_lookup_entry(map, start, &entry)) {
2746 if (!(vmk_flags.vmkf_already)) {
2747 RETURN(KERN_NO_SPACE);
2748 }
2749 /*
2750 * Check if what's already there is what we want.
2751 */
2752 tmp_start = start;
2753 tmp_offset = offset;
2754 if (entry->vme_start < start) {
2755 tmp_start -= start - entry->vme_start;
2756 tmp_offset -= start - entry->vme_start;
2757 }
2758 for (; entry->vme_start < end;
2759 entry = entry->vme_next) {
2760 /*
2761 * Check if the mapping's attributes
2762 * match the existing map entry.
2763 */
2764 if (entry == vm_map_to_entry(map) ||
2765 entry->vme_start != tmp_start ||
2766 entry->is_sub_map != is_submap ||
2767 VME_OFFSET(entry) != tmp_offset ||
2768 entry->needs_copy != needs_copy ||
2769 entry->protection != cur_protection ||
2770 entry->max_protection != max_protection ||
2771 entry->inheritance != inheritance ||
2772 entry->iokit_acct != iokit_acct ||
2773 VME_ALIAS(entry) != alias) {
2774 /* not the same mapping ! */
2775 RETURN(KERN_NO_SPACE);
2776 }
2777 /*
2778 * Check if the same object is being mapped.
2779 */
2780 if (is_submap) {
2781 if (VME_SUBMAP(entry) !=
2782 (vm_map_t) object) {
2783 /* not the same submap */
2784 RETURN(KERN_NO_SPACE);
2785 }
2786 } else {
2787 if (VME_OBJECT(entry) != object) {
2788 /* not the same VM object... */
2789 vm_object_t obj2;
2790
2791 obj2 = VME_OBJECT(entry);
2792 if ((obj2 == VM_OBJECT_NULL ||
2793 obj2->internal) &&
2794 (object == VM_OBJECT_NULL ||
2795 object->internal)) {
2796 /*
2797 * ... but both are
2798 * anonymous memory,
2799 * so equivalent.
2800 */
2801 } else {
2802 RETURN(KERN_NO_SPACE);
2803 }
2804 }
2805 }
2806
2807 tmp_offset += entry->vme_end - entry->vme_start;
2808 tmp_start += entry->vme_end - entry->vme_start;
2809 if (entry->vme_end >= end) {
2810 /* reached the end of our mapping */
2811 break;
2812 }
2813 }
2814 /* it all matches: let's use what's already there ! */
2815 RETURN(KERN_MEMORY_PRESENT);
2816 }
2817
2818 /*
2819 * ... the next region doesn't overlap the
2820 * end point.
2821 */
2822
2823 if ((entry->vme_next != vm_map_to_entry(map)) &&
2824 (entry->vme_next->vme_start < end)) {
2825 RETURN(KERN_NO_SPACE);
2826 }
2827 }
2828
2829 /*
2830 * At this point,
2831 * "start" and "end" should define the endpoints of the
2832 * available new range, and
2833 * "entry" should refer to the region before the new
2834 * range, and
2835 *
2836 * the map should be locked.
2837 */
2838
2839 /*
2840 * See whether we can avoid creating a new entry (and object) by
2841 * extending one of our neighbors. [So far, we only attempt to
2842 * extend from below.] Note that we can never extend/join
2843 * purgable objects because they need to remain distinct
2844 * entities in order to implement their "volatile object"
2845 * semantics.
2846 */
2847
2848 if (purgable ||
2849 entry_for_jit ||
2850 vm_memory_malloc_no_cow(user_alias)) {
2851 if (object == VM_OBJECT_NULL) {
2852 object = vm_object_allocate(size);
2853 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2854 object->true_share = FALSE;
2855 if (purgable) {
2856 task_t owner;
2857 object->purgable = VM_PURGABLE_NONVOLATILE;
2858 if (map->pmap == kernel_pmap) {
2859 /*
2860 * Purgeable mappings made in a kernel
2861 * map are "owned" by the kernel itself
2862 * rather than the current user task
2863 * because they're likely to be used by
2864 * more than this user task (see
2865 * execargs_purgeable_allocate(), for
2866 * example).
2867 */
2868 owner = kernel_task;
2869 } else {
2870 owner = current_task();
2871 }
2872 assert(object->vo_owner == NULL);
2873 assert(object->resident_page_count == 0);
2874 assert(object->wired_page_count == 0);
2875 vm_object_lock(object);
2876 vm_purgeable_nonvolatile_enqueue(object, owner);
2877 vm_object_unlock(object);
2878 }
2879 offset = (vm_object_offset_t)0;
2880 }
2881 } else if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
2882 /* no coalescing if address space uses sub-pages */
2883 } else if ((is_submap == FALSE) &&
2884 (object == VM_OBJECT_NULL) &&
2885 (entry != vm_map_to_entry(map)) &&
2886 (entry->vme_end == start) &&
2887 (!entry->is_shared) &&
2888 (!entry->is_sub_map) &&
2889 (!entry->in_transition) &&
2890 (!entry->needs_wakeup) &&
2891 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2892 (entry->protection == cur_protection) &&
2893 (entry->max_protection == max_protection) &&
2894 (entry->inheritance == inheritance) &&
2895 ((user_alias == VM_MEMORY_REALLOC) ||
2896 (VME_ALIAS(entry) == alias)) &&
2897 (entry->no_cache == no_cache) &&
2898 (entry->permanent == permanent) &&
2899 /* no coalescing for immutable executable mappings */
2900 !((entry->protection & VM_PROT_EXECUTE) &&
2901 entry->permanent) &&
2902 (!entry->superpage_size && !superpage_size) &&
2903 /*
2904 * No coalescing if not map-aligned, to avoid propagating
2905 * that condition any further than needed:
2906 */
2907 (!entry->map_aligned || !clear_map_aligned) &&
2908 (!entry->zero_wired_pages) &&
2909 (!entry->used_for_jit && !entry_for_jit) &&
2910 (!entry->pmap_cs_associated) &&
2911 (entry->iokit_acct == iokit_acct) &&
2912 (!entry->vme_resilient_codesign) &&
2913 (!entry->vme_resilient_media) &&
2914 (!entry->vme_atomic) &&
2915 (entry->vme_no_copy_on_read == no_copy_on_read) &&
2916
2917 ((entry->vme_end - entry->vme_start) + size <=
2918 (user_alias == VM_MEMORY_REALLOC ?
2919 ANON_CHUNK_SIZE :
2920 NO_COALESCE_LIMIT)) &&
2921
2922 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
2923 if (vm_object_coalesce(VME_OBJECT(entry),
2924 VM_OBJECT_NULL,
2925 VME_OFFSET(entry),
2926 (vm_object_offset_t) 0,
2927 (vm_map_size_t)(entry->vme_end - entry->vme_start),
2928 (vm_map_size_t)(end - entry->vme_end))) {
2929 /*
2930 * Coalesced the two objects - can extend
2931 * the previous map entry to include the
2932 * new range.
2933 */
2934 map->size += (end - entry->vme_end);
2935 assert(entry->vme_start < end);
2936 assert(VM_MAP_PAGE_ALIGNED(end,
2937 VM_MAP_PAGE_MASK(map)));
2938 if (__improbable(vm_debug_events)) {
2939 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
2940 }
2941 entry->vme_end = end;
2942 if (map->holelistenabled) {
2943 vm_map_store_update_first_free(map, entry, TRUE);
2944 } else {
2945 vm_map_store_update_first_free(map, map->first_free, TRUE);
2946 }
2947 new_mapping_established = TRUE;
2948 RETURN(KERN_SUCCESS);
2949 }
2950 }
2951
2952 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2953 new_entry = NULL;
2954
2955 for (tmp2_start = start; tmp2_start < end; tmp2_start += step) {
2956 tmp2_end = tmp2_start + step;
2957 /*
2958 * Create a new entry
2959 *
2960 * XXX FBDP
2961 * The reserved "page zero" in each process's address space can
2962 * be arbitrarily large. Splitting it into separate objects and
2963 * therefore different VM map entries serves no purpose and just
2964 * slows down operations on the VM map, so let's not split the
2965 * allocation into chunks if the max protection is NONE. That
2966 * memory should never be accessible, so it will never get to the
2967 * default pager.
2968 */
2969 tmp_start = tmp2_start;
2970 if (object == VM_OBJECT_NULL &&
2971 size > chunk_size &&
2972 max_protection != VM_PROT_NONE &&
2973 superpage_size == 0) {
2974 tmp_end = tmp_start + chunk_size;
2975 } else {
2976 tmp_end = tmp2_end;
2977 }
2978 do {
2979 if (!is_submap &&
2980 object != VM_OBJECT_NULL &&
2981 object->internal &&
2982 offset + (tmp_end - tmp_start) > object->vo_size) {
2983 // printf("FBDP object %p size 0x%llx overmapping offset 0x%llx size 0x%llx\n", object, object->vo_size, offset, (uint64_t)(tmp_end - tmp_start));
2984 DTRACE_VM5(vm_map_enter_overmap,
2985 vm_map_t, map,
2986 vm_map_address_t, tmp_start,
2987 vm_map_address_t, tmp_end,
2988 vm_object_offset_t, offset,
2989 vm_object_size_t, object->vo_size);
2990 }
2991 new_entry = vm_map_entry_insert(map,
2992 entry, tmp_start, tmp_end,
2993 object, offset, vmk_flags,
2994 needs_copy, FALSE, FALSE,
2995 cur_protection, max_protection,
2996 VM_BEHAVIOR_DEFAULT,
2997 (entry_for_jit && !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map) ?
2998 VM_INHERIT_NONE : inheritance),
2999 0,
3000 no_cache,
3001 permanent,
3002 no_copy_on_read,
3003 superpage_size,
3004 clear_map_aligned,
3005 is_submap,
3006 entry_for_jit,
3007 alias,
3008 translated_allow_execute);
3009
3010 assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
3011
3012 if (resilient_codesign) {
3013 int reject_prot = (needs_copy ? VM_PROT_ALLEXEC : (VM_PROT_WRITE | VM_PROT_ALLEXEC));
3014 if (!((cur_protection | max_protection) & reject_prot)) {
3015 new_entry->vme_resilient_codesign = TRUE;
3016 }
3017 }
3018
3019 if (resilient_media &&
3020 (object == VM_OBJECT_NULL ||
3021 object->internal)) {
3022 new_entry->vme_resilient_media = TRUE;
3023 }
3024
3025 assert(!new_entry->iokit_acct);
3026 if (!is_submap &&
3027 object != VM_OBJECT_NULL &&
3028 (object->purgable != VM_PURGABLE_DENY ||
3029 object->vo_ledger_tag)) {
3030 assert(new_entry->use_pmap);
3031 assert(!new_entry->iokit_acct);
3032 /*
3033 * Turn off pmap accounting since
3034 * purgeable (or tagged) objects have their
3035 * own ledgers.
3036 */
3037 new_entry->use_pmap = FALSE;
3038 } else if (!is_submap &&
3039 iokit_acct &&
3040 object != VM_OBJECT_NULL &&
3041 object->internal) {
3042 /* alternate accounting */
3043 assert(!new_entry->iokit_acct);
3044 assert(new_entry->use_pmap);
3045 new_entry->iokit_acct = TRUE;
3046 new_entry->use_pmap = FALSE;
3047 DTRACE_VM4(
3048 vm_map_iokit_mapped_region,
3049 vm_map_t, map,
3050 vm_map_offset_t, new_entry->vme_start,
3051 vm_map_offset_t, new_entry->vme_end,
3052 int, VME_ALIAS(new_entry));
3053 vm_map_iokit_mapped_region(
3054 map,
3055 (new_entry->vme_end -
3056 new_entry->vme_start));
3057 } else if (!is_submap) {
3058 assert(!new_entry->iokit_acct);
3059 assert(new_entry->use_pmap);
3060 }
3061
3062 if (is_submap) {
3063 vm_map_t submap;
3064 boolean_t submap_is_64bit;
3065 boolean_t use_pmap;
3066
3067 assert(new_entry->is_sub_map);
3068 assert(!new_entry->use_pmap);
3069 assert(!new_entry->iokit_acct);
3070 submap = (vm_map_t) object;
3071 submap_is_64bit = vm_map_is_64bit(submap);
3072 use_pmap = vmk_flags.vmkf_nested_pmap;
3073 #ifndef NO_NESTED_PMAP
3074 if (use_pmap && submap->pmap == NULL) {
3075 ledger_t ledger = map->pmap->ledger;
3076 /* we need a sub pmap to nest... */
3077 submap->pmap = pmap_create_options(ledger, 0,
3078 submap_is_64bit ? PMAP_CREATE_64BIT : 0);
3079 if (submap->pmap == NULL) {
3080 /* let's proceed without nesting... */
3081 }
3082 #if defined(__arm__) || defined(__arm64__)
3083 else {
3084 pmap_set_nested(submap->pmap);
3085 }
3086 #endif
3087 }
3088 if (use_pmap && submap->pmap != NULL) {
3089 if (VM_MAP_PAGE_SHIFT(map) != VM_MAP_PAGE_SHIFT(submap)) {
3090 DEBUG4K_ERROR("map %p (%d) submap %p (%d): incompatible page sizes\n", map, VM_MAP_PAGE_SHIFT(map), submap, VM_MAP_PAGE_SHIFT(submap));
3091 kr = KERN_FAILURE;
3092 } else {
3093 kr = pmap_nest(map->pmap,
3094 submap->pmap,
3095 tmp_start,
3096 tmp_end - tmp_start);
3097 }
3098 if (kr != KERN_SUCCESS) {
3099 printf("vm_map_enter: "
3100 "pmap_nest(0x%llx,0x%llx) "
3101 "error 0x%x\n",
3102 (long long)tmp_start,
3103 (long long)tmp_end,
3104 kr);
3105 } else {
3106 /* we're now nested ! */
3107 new_entry->use_pmap = TRUE;
3108 pmap_empty = FALSE;
3109 }
3110 }
3111 #endif /* NO_NESTED_PMAP */
3112 }
3113 entry = new_entry;
3114
3115 if (superpage_size) {
3116 vm_page_t pages, m;
3117 vm_object_t sp_object;
3118 vm_object_offset_t sp_offset;
3119
3120 VME_OFFSET_SET(entry, 0);
3121
3122 /* allocate one superpage */
3123 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES - 1, TRUE, 0);
3124 if (kr != KERN_SUCCESS) {
3125 /* deallocate whole range... */
3126 new_mapping_established = TRUE;
3127 /* ... but only up to "tmp_end" */
3128 size -= end - tmp_end;
3129 RETURN(kr);
3130 }
3131
3132 /* create one vm_object per superpage */
3133 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
3134 sp_object->phys_contiguous = TRUE;
3135 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages) * PAGE_SIZE;
3136 VME_OBJECT_SET(entry, sp_object);
3137 assert(entry->use_pmap);
3138
3139 /* enter the base pages into the object */
3140 vm_object_lock(sp_object);
3141 for (sp_offset = 0;
3142 sp_offset < SUPERPAGE_SIZE;
3143 sp_offset += PAGE_SIZE) {
3144 m = pages;
3145 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
3146 pages = NEXT_PAGE(m);
3147 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3148 vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
3149 }
3150 vm_object_unlock(sp_object);
3151 }
3152 } while (tmp_end != tmp2_end &&
3153 (tmp_start = tmp_end) &&
3154 (tmp_end = (tmp2_end - tmp_end > chunk_size) ?
3155 tmp_end + chunk_size : tmp2_end));
3156 }
3157
3158 new_mapping_established = TRUE;
3159
3160 BailOut:
3161 assert(map_locked == TRUE);
3162
3163 /*
3164 * Address space limit enforcement (RLIMIT_AS and RLIMIT_DATA):
3165 * If we have identified and possibly established the new mapping(s),
3166 * make sure we did not go beyond the address space limit.
3167 */
3168 if (result == KERN_SUCCESS) {
3169 if (map->size_limit != RLIM_INFINITY &&
3170 map->size > map->size_limit) {
3171 /*
3172 * Establishing the requested mappings would exceed
3173 * the process's RLIMIT_AS limit: fail with
3174 * KERN_NO_SPACE.
3175 */
3176 result = KERN_NO_SPACE;
3177 printf("%d[%s] %s: map size 0x%llx over RLIMIT_AS 0x%llx\n",
3178 proc_selfpid(),
3179 (current_task()->bsd_info
3180 ? proc_name_address(current_task()->bsd_info)
3181 : "?"),
3182 __FUNCTION__,
3183 (uint64_t) map->size,
3184 (uint64_t) map->size_limit);
3185 DTRACE_VM2(vm_map_enter_RLIMIT_AS,
3186 vm_map_size_t, map->size,
3187 uint64_t, map->size_limit);
3188 vm_map_enter_RLIMIT_AS_count++;
3189 } else if (map->data_limit != RLIM_INFINITY &&
3190 map->size > map->data_limit) {
3191 /*
3192 * Establishing the requested mappings would exceed
3193 * the process's RLIMIT_DATA limit: fail with
3194 * KERN_NO_SPACE.
3195 */
3196 result = KERN_NO_SPACE;
3197 printf("%d[%s] %s: map size 0x%llx over RLIMIT_DATA 0x%llx\n",
3198 proc_selfpid(),
3199 (current_task()->bsd_info
3200 ? proc_name_address(current_task()->bsd_info)
3201 : "?"),
3202 __FUNCTION__,
3203 (uint64_t) map->size,
3204 (uint64_t) map->data_limit);
3205 DTRACE_VM2(vm_map_enter_RLIMIT_DATA,
3206 vm_map_size_t, map->size,
3207 uint64_t, map->data_limit);
3208 vm_map_enter_RLIMIT_DATA_count++;
3209 }
3210 }
3211
3212 if (result == KERN_SUCCESS) {
3213 vm_prot_t pager_prot;
3214 memory_object_t pager;
3215
3216 #if DEBUG
3217 if (pmap_empty &&
3218 !(vmk_flags.vmkf_no_pmap_check)) {
3219 assert(pmap_is_empty(map->pmap,
3220 *address,
3221 *address + size));
3222 }
3223 #endif /* DEBUG */
3224
3225 /*
3226 * For "named" VM objects, let the pager know that the
3227 * memory object is being mapped. Some pagers need to keep
3228 * track of this, to know when they can reclaim the memory
3229 * object, for example.
3230 * VM calls memory_object_map() for each mapping (specifying
3231 * the protection of each mapping) and calls
3232 * memory_object_last_unmap() when all the mappings are gone.
3233 */
3234 pager_prot = max_protection;
3235 if (needs_copy) {
3236 /*
3237 * Copy-On-Write mapping: won't modify
3238 * the memory object.
3239 */
3240 pager_prot &= ~VM_PROT_WRITE;
3241 }
3242 if (!is_submap &&
3243 object != VM_OBJECT_NULL &&
3244 object->named &&
3245 object->pager != MEMORY_OBJECT_NULL) {
3246 vm_object_lock(object);
3247 pager = object->pager;
3248 if (object->named &&
3249 pager != MEMORY_OBJECT_NULL) {
3250 assert(object->pager_ready);
3251 vm_object_mapping_wait(object, THREAD_UNINT);
3252 vm_object_mapping_begin(object);
3253 vm_object_unlock(object);
3254
3255 kr = memory_object_map(pager, pager_prot);
3256 assert(kr == KERN_SUCCESS);
3257
3258 vm_object_lock(object);
3259 vm_object_mapping_end(object);
3260 }
3261 vm_object_unlock(object);
3262 }
3263 }
3264
3265 assert(map_locked == TRUE);
3266
3267 if (!keep_map_locked) {
3268 vm_map_unlock(map);
3269 map_locked = FALSE;
3270 }
3271
3272 /*
3273 * We can't hold the map lock if we enter this block.
3274 */
3275
3276 if (result == KERN_SUCCESS) {
3277 /* Wire down the new entry if the user
3278 * requested all new map entries be wired.
3279 */
3280 if ((map->wiring_required) || (superpage_size)) {
3281 assert(!keep_map_locked);
3282 pmap_empty = FALSE; /* pmap won't be empty */
3283 kr = vm_map_wire_kernel(map, start, end,
3284 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3285 TRUE);
3286 result = kr;
3287 }
3288
3289 }
3290
3291 if (result != KERN_SUCCESS) {
3292 if (new_mapping_established) {
3293 /*
3294 * The caller had an extra reference on the VM object
3295 * it gave us.
3296 * We've transferred that reference to the mapping we
3297 * just established but we're about to undo that mapping
3298 * and release that reference.
3299 * The caller expects its reference to be consumed on
3300 * success only, so we have to get the extra reference
3301 * back for the caller.
3302 */
3303 vm_object_reference(caller_object);
3304
3305 /*
3306 * We have to get rid of the new mappings since we
3307 * won't make them available to the user.
3308 * Try and do that atomically, to minimize the risk
3309 * that someone else create new mappings that range.
3310 */
3311 zap_new_map = vm_map_create_options(PMAP_NULL,
3312 *address,
3313 *address + size,
3314 VM_MAP_CREATE_ZAP_OPTIONS(map));
3315 vm_map_set_page_shift(zap_new_map,
3316 VM_MAP_PAGE_SHIFT(map));
3317
3318 if (!map_locked) {
3319 vm_map_lock(map);
3320 map_locked = TRUE;
3321 }
3322 (void) vm_map_delete(map, *address, *address + size,
3323 (VM_MAP_REMOVE_SAVE_ENTRIES |
3324 VM_MAP_REMOVE_NO_MAP_ALIGN),
3325 zap_new_map);
3326 }
3327 if (zap_old_map != VM_MAP_NULL &&
3328 zap_old_map->hdr.nentries != 0) {
3329 vm_map_entry_t entry1, entry2;
3330
3331 /*
3332 * The new mapping failed. Attempt to restore
3333 * the old mappings, saved in the "zap_old_map".
3334 */
3335 if (!map_locked) {
3336 vm_map_lock(map);
3337 map_locked = TRUE;
3338 }
3339
3340 /* first check if the coast is still clear */
3341 start = vm_map_first_entry(zap_old_map)->vme_start;
3342 end = vm_map_last_entry(zap_old_map)->vme_end;
3343 if (vm_map_lookup_entry(map, start, &entry1) ||
3344 vm_map_lookup_entry(map, end, &entry2) ||
3345 entry1 != entry2) {
3346 /*
3347 * Part of that range has already been
3348 * re-mapped: we can't restore the old
3349 * mappings...
3350 */
3351 vm_map_enter_restore_failures++;
3352 } else {
3353 /*
3354 * Transfer the saved map entries from
3355 * "zap_old_map" to the original "map",
3356 * inserting them all after "entry1".
3357 */
3358 for (entry2 = vm_map_first_entry(zap_old_map);
3359 entry2 != vm_map_to_entry(zap_old_map);
3360 entry2 = vm_map_first_entry(zap_old_map)) {
3361 vm_map_size_t entry_size;
3362
3363 entry_size = (entry2->vme_end -
3364 entry2->vme_start);
3365 vm_map_store_entry_unlink(zap_old_map,
3366 entry2);
3367 zap_old_map->size -= entry_size;
3368 vm_map_store_entry_link(map, entry1, entry2,
3369 VM_MAP_KERNEL_FLAGS_NONE);
3370 map->size += entry_size;
3371 entry1 = entry2;
3372 }
3373 if (map->wiring_required) {
3374 /*
3375 * XXX TODO: we should rewire the
3376 * old pages here...
3377 */
3378 }
3379 vm_map_enter_restore_successes++;
3380 }
3381 }
3382 }
3383
3384 /*
3385 * The caller is responsible for releasing the lock if it requested to
3386 * keep the map locked.
3387 */
3388 if (map_locked && !keep_map_locked) {
3389 vm_map_unlock(map);
3390 }
3391
3392 /*
3393 * Get rid of the "zap_maps" and all the map entries that
3394 * they may still contain.
3395 */
3396 if (zap_old_map != VM_MAP_NULL) {
3397 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3398 zap_old_map = VM_MAP_NULL;
3399 }
3400 if (zap_new_map != VM_MAP_NULL) {
3401 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3402 zap_new_map = VM_MAP_NULL;
3403 }
3404
3405 return result;
3406
3407 #undef RETURN
3408 }
3409
3410 #if __arm64__
3411 extern const struct memory_object_pager_ops fourk_pager_ops;
3412 kern_return_t
vm_map_enter_fourk(vm_map_t map,vm_map_offset_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t alias,vm_object_t object,vm_object_offset_t offset,boolean_t needs_copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)3413 vm_map_enter_fourk(
3414 vm_map_t map,
3415 vm_map_offset_t *address, /* IN/OUT */
3416 vm_map_size_t size,
3417 vm_map_offset_t mask,
3418 int flags,
3419 vm_map_kernel_flags_t vmk_flags,
3420 vm_tag_t alias,
3421 vm_object_t object,
3422 vm_object_offset_t offset,
3423 boolean_t needs_copy,
3424 vm_prot_t cur_protection,
3425 vm_prot_t max_protection,
3426 vm_inherit_t inheritance)
3427 {
3428 vm_map_entry_t entry, new_entry;
3429 vm_map_offset_t start, fourk_start;
3430 vm_map_offset_t end, fourk_end;
3431 vm_map_size_t fourk_size;
3432 kern_return_t result = KERN_SUCCESS;
3433 vm_map_t zap_old_map = VM_MAP_NULL;
3434 vm_map_t zap_new_map = VM_MAP_NULL;
3435 boolean_t map_locked = FALSE;
3436 boolean_t pmap_empty = TRUE;
3437 boolean_t new_mapping_established = FALSE;
3438 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3439 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
3440 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
3441 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
3442 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
3443 boolean_t is_submap = vmk_flags.vmkf_submap;
3444 boolean_t permanent = vmk_flags.vmkf_permanent;
3445 boolean_t no_copy_on_read = vmk_flags.vmkf_permanent;
3446 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
3447 // boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
3448 boolean_t translated_allow_execute = vmk_flags.vmkf_translated_allow_execute;
3449 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3450 vm_map_offset_t effective_min_offset, effective_max_offset;
3451 kern_return_t kr;
3452 boolean_t clear_map_aligned = FALSE;
3453 memory_object_t fourk_mem_obj;
3454 vm_object_t fourk_object;
3455 vm_map_offset_t fourk_pager_offset;
3456 int fourk_pager_index_start, fourk_pager_index_num;
3457 int cur_idx;
3458 boolean_t fourk_copy;
3459 vm_object_t copy_object;
3460 vm_object_offset_t copy_offset;
3461
3462 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
3463 panic("%s:%d", __FUNCTION__, __LINE__);
3464 }
3465 fourk_mem_obj = MEMORY_OBJECT_NULL;
3466 fourk_object = VM_OBJECT_NULL;
3467
3468 if (superpage_size) {
3469 return KERN_NOT_SUPPORTED;
3470 }
3471
3472 if ((cur_protection & VM_PROT_WRITE) &&
3473 (cur_protection & VM_PROT_EXECUTE) &&
3474 #if XNU_TARGET_OS_OSX
3475 map->pmap != kernel_pmap &&
3476 (vm_map_cs_enforcement(map)
3477 #if __arm64__
3478 || !VM_MAP_IS_EXOTIC(map)
3479 #endif /* __arm64__ */
3480 ) &&
3481 #endif /* XNU_TARGET_OS_OSX */
3482 !entry_for_jit) {
3483 DTRACE_VM3(cs_wx,
3484 uint64_t, 0,
3485 uint64_t, 0,
3486 vm_prot_t, cur_protection);
3487 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
3488 "turning off execute\n",
3489 proc_selfpid(),
3490 (current_task()->bsd_info
3491 ? proc_name_address(current_task()->bsd_info)
3492 : "?"),
3493 __FUNCTION__);
3494 cur_protection &= ~VM_PROT_EXECUTE;
3495 }
3496
3497 /*
3498 * If the task has requested executable lockdown,
3499 * deny any new executable mapping.
3500 */
3501 if (map->map_disallow_new_exec == TRUE) {
3502 if (cur_protection & VM_PROT_EXECUTE) {
3503 return KERN_PROTECTION_FAILURE;
3504 }
3505 }
3506
3507 if (is_submap) {
3508 return KERN_NOT_SUPPORTED;
3509 }
3510 if (vmk_flags.vmkf_already) {
3511 return KERN_NOT_SUPPORTED;
3512 }
3513 if (purgable || entry_for_jit) {
3514 return KERN_NOT_SUPPORTED;
3515 }
3516
3517 effective_min_offset = map->min_offset;
3518
3519 if (vmk_flags.vmkf_beyond_max) {
3520 return KERN_NOT_SUPPORTED;
3521 } else {
3522 effective_max_offset = map->max_offset;
3523 }
3524
3525 if (size == 0 ||
3526 (offset & FOURK_PAGE_MASK) != 0) {
3527 *address = 0;
3528 return KERN_INVALID_ARGUMENT;
3529 }
3530
3531 #define RETURN(value) { result = value; goto BailOut; }
3532
3533 assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3534 assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3535
3536 if (!anywhere && overwrite) {
3537 return KERN_NOT_SUPPORTED;
3538 }
3539 if (!anywhere && overwrite) {
3540 /*
3541 * Create a temporary VM map to hold the old mappings in the
3542 * affected area while we create the new one.
3543 * This avoids releasing the VM map lock in
3544 * vm_map_entry_delete() and allows atomicity
3545 * when we want to replace some mappings with a new one.
3546 * It also allows us to restore the old VM mappings if the
3547 * new mapping fails.
3548 */
3549 zap_old_map = vm_map_create_options(PMAP_NULL,
3550 *address,
3551 *address + size,
3552 VM_MAP_CREATE_ZAP_OPTIONS(map));
3553 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3554 }
3555
3556 fourk_start = *address;
3557 fourk_size = size;
3558 fourk_end = fourk_start + fourk_size;
3559
3560 start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3561 end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3562 size = end - start;
3563
3564 if (anywhere) {
3565 return KERN_NOT_SUPPORTED;
3566 } else {
3567 /*
3568 * Verify that:
3569 * the address doesn't itself violate
3570 * the mask requirement.
3571 */
3572
3573 vm_map_lock(map);
3574 map_locked = TRUE;
3575 if ((start & mask) != 0) {
3576 RETURN(KERN_NO_SPACE);
3577 }
3578
3579 /*
3580 * ... the address is within bounds
3581 */
3582
3583 end = start + size;
3584
3585 if ((start < effective_min_offset) ||
3586 (end > effective_max_offset) ||
3587 (start >= end)) {
3588 RETURN(KERN_INVALID_ADDRESS);
3589 }
3590
3591 if (overwrite && zap_old_map != VM_MAP_NULL) {
3592 /*
3593 * Fixed mapping and "overwrite" flag: attempt to
3594 * remove all existing mappings in the specified
3595 * address range, saving them in our "zap_old_map".
3596 */
3597 (void) vm_map_delete(map, start, end,
3598 (VM_MAP_REMOVE_SAVE_ENTRIES |
3599 VM_MAP_REMOVE_NO_MAP_ALIGN),
3600 zap_old_map);
3601 }
3602
3603 /*
3604 * ... the starting address isn't allocated
3605 */
3606 if (vm_map_lookup_entry(map, start, &entry)) {
3607 vm_object_t cur_object, shadow_object;
3608
3609 /*
3610 * We might already some 4K mappings
3611 * in a 16K page here.
3612 */
3613
3614 if (entry->vme_end - entry->vme_start
3615 != SIXTEENK_PAGE_SIZE) {
3616 RETURN(KERN_NO_SPACE);
3617 }
3618 if (entry->is_sub_map) {
3619 RETURN(KERN_NO_SPACE);
3620 }
3621 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3622 RETURN(KERN_NO_SPACE);
3623 }
3624
3625 /* go all the way down the shadow chain */
3626 cur_object = VME_OBJECT(entry);
3627 vm_object_lock(cur_object);
3628 while (cur_object->shadow != VM_OBJECT_NULL) {
3629 shadow_object = cur_object->shadow;
3630 vm_object_lock(shadow_object);
3631 vm_object_unlock(cur_object);
3632 cur_object = shadow_object;
3633 shadow_object = VM_OBJECT_NULL;
3634 }
3635 if (cur_object->internal ||
3636 cur_object->pager == NULL) {
3637 vm_object_unlock(cur_object);
3638 RETURN(KERN_NO_SPACE);
3639 }
3640 if (cur_object->pager->mo_pager_ops
3641 != &fourk_pager_ops) {
3642 vm_object_unlock(cur_object);
3643 RETURN(KERN_NO_SPACE);
3644 }
3645 fourk_object = cur_object;
3646 fourk_mem_obj = fourk_object->pager;
3647
3648 /* keep the "4K" object alive */
3649 vm_object_reference_locked(fourk_object);
3650 memory_object_reference(fourk_mem_obj);
3651 vm_object_unlock(fourk_object);
3652
3653 /* merge permissions */
3654 entry->protection |= cur_protection;
3655 entry->max_protection |= max_protection;
3656
3657 if ((entry->protection & VM_PROT_WRITE) &&
3658 (entry->protection & VM_PROT_ALLEXEC) &&
3659 fourk_binary_compatibility_unsafe &&
3660 fourk_binary_compatibility_allow_wx) {
3661 /* write+execute: need to be "jit" */
3662 entry->used_for_jit = TRUE;
3663 }
3664 goto map_in_fourk_pager;
3665 }
3666
3667 /*
3668 * ... the next region doesn't overlap the
3669 * end point.
3670 */
3671
3672 if ((entry->vme_next != vm_map_to_entry(map)) &&
3673 (entry->vme_next->vme_start < end)) {
3674 RETURN(KERN_NO_SPACE);
3675 }
3676 }
3677
3678 /*
3679 * At this point,
3680 * "start" and "end" should define the endpoints of the
3681 * available new range, and
3682 * "entry" should refer to the region before the new
3683 * range, and
3684 *
3685 * the map should be locked.
3686 */
3687
3688 /* create a new "4K" pager */
3689 fourk_mem_obj = fourk_pager_create();
3690 fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3691 assert(fourk_object);
3692
3693 /* keep the "4" object alive */
3694 vm_object_reference(fourk_object);
3695
3696 /* create a "copy" object, to map the "4K" object copy-on-write */
3697 fourk_copy = TRUE;
3698 result = vm_object_copy_strategically(fourk_object,
3699 0,
3700 end - start,
3701 ©_object,
3702 ©_offset,
3703 &fourk_copy);
3704 assert(result == KERN_SUCCESS);
3705 assert(copy_object != VM_OBJECT_NULL);
3706 assert(copy_offset == 0);
3707
3708 /* map the "4K" pager's copy object */
3709 new_entry =
3710 vm_map_entry_insert(map, entry,
3711 vm_map_trunc_page(start,
3712 VM_MAP_PAGE_MASK(map)),
3713 vm_map_round_page(end,
3714 VM_MAP_PAGE_MASK(map)),
3715 copy_object,
3716 0, /* offset */
3717 vmk_flags,
3718 FALSE, /* needs_copy */
3719 FALSE,
3720 FALSE,
3721 cur_protection, max_protection,
3722 VM_BEHAVIOR_DEFAULT,
3723 (entry_for_jit && !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map) ?
3724 VM_INHERIT_NONE : inheritance),
3725 0,
3726 no_cache,
3727 permanent,
3728 no_copy_on_read,
3729 superpage_size,
3730 clear_map_aligned,
3731 is_submap,
3732 FALSE, /* jit */
3733 alias,
3734 translated_allow_execute);
3735 entry = new_entry;
3736
3737 #if VM_MAP_DEBUG_FOURK
3738 if (vm_map_debug_fourk) {
3739 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3740 map,
3741 (uint64_t) entry->vme_start,
3742 (uint64_t) entry->vme_end,
3743 fourk_mem_obj);
3744 }
3745 #endif /* VM_MAP_DEBUG_FOURK */
3746
3747 new_mapping_established = TRUE;
3748
3749 map_in_fourk_pager:
3750 /* "map" the original "object" where it belongs in the "4K" pager */
3751 fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3752 fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3753 if (fourk_size > SIXTEENK_PAGE_SIZE) {
3754 fourk_pager_index_num = 4;
3755 } else {
3756 fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3757 }
3758 if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3759 fourk_pager_index_num = 4 - fourk_pager_index_start;
3760 }
3761 for (cur_idx = 0;
3762 cur_idx < fourk_pager_index_num;
3763 cur_idx++) {
3764 vm_object_t old_object;
3765 vm_object_offset_t old_offset;
3766
3767 kr = fourk_pager_populate(fourk_mem_obj,
3768 TRUE, /* overwrite */
3769 fourk_pager_index_start + cur_idx,
3770 object,
3771 (object
3772 ? (offset +
3773 (cur_idx * FOURK_PAGE_SIZE))
3774 : 0),
3775 &old_object,
3776 &old_offset);
3777 #if VM_MAP_DEBUG_FOURK
3778 if (vm_map_debug_fourk) {
3779 if (old_object == (vm_object_t) -1 &&
3780 old_offset == (vm_object_offset_t) -1) {
3781 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3782 "pager [%p:0x%llx] "
3783 "populate[%d] "
3784 "[object:%p,offset:0x%llx]\n",
3785 map,
3786 (uint64_t) entry->vme_start,
3787 (uint64_t) entry->vme_end,
3788 fourk_mem_obj,
3789 VME_OFFSET(entry),
3790 fourk_pager_index_start + cur_idx,
3791 object,
3792 (object
3793 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3794 : 0));
3795 } else {
3796 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3797 "pager [%p:0x%llx] "
3798 "populate[%d] [object:%p,offset:0x%llx] "
3799 "old [%p:0x%llx]\n",
3800 map,
3801 (uint64_t) entry->vme_start,
3802 (uint64_t) entry->vme_end,
3803 fourk_mem_obj,
3804 VME_OFFSET(entry),
3805 fourk_pager_index_start + cur_idx,
3806 object,
3807 (object
3808 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3809 : 0),
3810 old_object,
3811 old_offset);
3812 }
3813 }
3814 #endif /* VM_MAP_DEBUG_FOURK */
3815
3816 assert(kr == KERN_SUCCESS);
3817 if (object != old_object &&
3818 object != VM_OBJECT_NULL &&
3819 object != (vm_object_t) -1) {
3820 vm_object_reference(object);
3821 }
3822 if (object != old_object &&
3823 old_object != VM_OBJECT_NULL &&
3824 old_object != (vm_object_t) -1) {
3825 vm_object_deallocate(old_object);
3826 }
3827 }
3828
3829 BailOut:
3830 assert(map_locked == TRUE);
3831
3832 if (result == KERN_SUCCESS) {
3833 vm_prot_t pager_prot;
3834 memory_object_t pager;
3835
3836 #if DEBUG
3837 if (pmap_empty &&
3838 !(vmk_flags.vmkf_no_pmap_check)) {
3839 assert(pmap_is_empty(map->pmap,
3840 *address,
3841 *address + size));
3842 }
3843 #endif /* DEBUG */
3844
3845 /*
3846 * For "named" VM objects, let the pager know that the
3847 * memory object is being mapped. Some pagers need to keep
3848 * track of this, to know when they can reclaim the memory
3849 * object, for example.
3850 * VM calls memory_object_map() for each mapping (specifying
3851 * the protection of each mapping) and calls
3852 * memory_object_last_unmap() when all the mappings are gone.
3853 */
3854 pager_prot = max_protection;
3855 if (needs_copy) {
3856 /*
3857 * Copy-On-Write mapping: won't modify
3858 * the memory object.
3859 */
3860 pager_prot &= ~VM_PROT_WRITE;
3861 }
3862 if (!is_submap &&
3863 object != VM_OBJECT_NULL &&
3864 object->named &&
3865 object->pager != MEMORY_OBJECT_NULL) {
3866 vm_object_lock(object);
3867 pager = object->pager;
3868 if (object->named &&
3869 pager != MEMORY_OBJECT_NULL) {
3870 assert(object->pager_ready);
3871 vm_object_mapping_wait(object, THREAD_UNINT);
3872 vm_object_mapping_begin(object);
3873 vm_object_unlock(object);
3874
3875 kr = memory_object_map(pager, pager_prot);
3876 assert(kr == KERN_SUCCESS);
3877
3878 vm_object_lock(object);
3879 vm_object_mapping_end(object);
3880 }
3881 vm_object_unlock(object);
3882 }
3883 if (!is_submap &&
3884 fourk_object != VM_OBJECT_NULL &&
3885 fourk_object->named &&
3886 fourk_object->pager != MEMORY_OBJECT_NULL) {
3887 vm_object_lock(fourk_object);
3888 pager = fourk_object->pager;
3889 if (fourk_object->named &&
3890 pager != MEMORY_OBJECT_NULL) {
3891 assert(fourk_object->pager_ready);
3892 vm_object_mapping_wait(fourk_object,
3893 THREAD_UNINT);
3894 vm_object_mapping_begin(fourk_object);
3895 vm_object_unlock(fourk_object);
3896
3897 kr = memory_object_map(pager, VM_PROT_READ);
3898 assert(kr == KERN_SUCCESS);
3899
3900 vm_object_lock(fourk_object);
3901 vm_object_mapping_end(fourk_object);
3902 }
3903 vm_object_unlock(fourk_object);
3904 }
3905 }
3906
3907 if (fourk_object != VM_OBJECT_NULL) {
3908 vm_object_deallocate(fourk_object);
3909 fourk_object = VM_OBJECT_NULL;
3910 memory_object_deallocate(fourk_mem_obj);
3911 fourk_mem_obj = MEMORY_OBJECT_NULL;
3912 }
3913
3914 assert(map_locked == TRUE);
3915
3916 if (!keep_map_locked) {
3917 vm_map_unlock(map);
3918 map_locked = FALSE;
3919 }
3920
3921 /*
3922 * We can't hold the map lock if we enter this block.
3923 */
3924
3925 if (result == KERN_SUCCESS) {
3926 /* Wire down the new entry if the user
3927 * requested all new map entries be wired.
3928 */
3929 if ((map->wiring_required) || (superpage_size)) {
3930 assert(!keep_map_locked);
3931 pmap_empty = FALSE; /* pmap won't be empty */
3932 kr = vm_map_wire_kernel(map, start, end,
3933 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3934 TRUE);
3935 result = kr;
3936 }
3937
3938 }
3939
3940 if (result != KERN_SUCCESS) {
3941 if (new_mapping_established) {
3942 /*
3943 * We have to get rid of the new mappings since we
3944 * won't make them available to the user.
3945 * Try and do that atomically, to minimize the risk
3946 * that someone else create new mappings that range.
3947 */
3948 zap_new_map = vm_map_create_options(PMAP_NULL,
3949 *address,
3950 *address + size,
3951 VM_MAP_CREATE_ZAP_OPTIONS(map));
3952 vm_map_set_page_shift(zap_new_map,
3953 VM_MAP_PAGE_SHIFT(map));
3954
3955 if (!map_locked) {
3956 vm_map_lock(map);
3957 map_locked = TRUE;
3958 }
3959 (void) vm_map_delete(map, *address, *address + size,
3960 (VM_MAP_REMOVE_SAVE_ENTRIES |
3961 VM_MAP_REMOVE_NO_MAP_ALIGN),
3962 zap_new_map);
3963 }
3964 if (zap_old_map != VM_MAP_NULL &&
3965 zap_old_map->hdr.nentries != 0) {
3966 vm_map_entry_t entry1, entry2;
3967
3968 /*
3969 * The new mapping failed. Attempt to restore
3970 * the old mappings, saved in the "zap_old_map".
3971 */
3972 if (!map_locked) {
3973 vm_map_lock(map);
3974 map_locked = TRUE;
3975 }
3976
3977 /* first check if the coast is still clear */
3978 start = vm_map_first_entry(zap_old_map)->vme_start;
3979 end = vm_map_last_entry(zap_old_map)->vme_end;
3980 if (vm_map_lookup_entry(map, start, &entry1) ||
3981 vm_map_lookup_entry(map, end, &entry2) ||
3982 entry1 != entry2) {
3983 /*
3984 * Part of that range has already been
3985 * re-mapped: we can't restore the old
3986 * mappings...
3987 */
3988 vm_map_enter_restore_failures++;
3989 } else {
3990 /*
3991 * Transfer the saved map entries from
3992 * "zap_old_map" to the original "map",
3993 * inserting them all after "entry1".
3994 */
3995 for (entry2 = vm_map_first_entry(zap_old_map);
3996 entry2 != vm_map_to_entry(zap_old_map);
3997 entry2 = vm_map_first_entry(zap_old_map)) {
3998 vm_map_size_t entry_size;
3999
4000 entry_size = (entry2->vme_end -
4001 entry2->vme_start);
4002 vm_map_store_entry_unlink(zap_old_map,
4003 entry2);
4004 zap_old_map->size -= entry_size;
4005 vm_map_store_entry_link(map, entry1, entry2,
4006 VM_MAP_KERNEL_FLAGS_NONE);
4007 map->size += entry_size;
4008 entry1 = entry2;
4009 }
4010 if (map->wiring_required) {
4011 /*
4012 * XXX TODO: we should rewire the
4013 * old pages here...
4014 */
4015 }
4016 vm_map_enter_restore_successes++;
4017 }
4018 }
4019 }
4020
4021 /*
4022 * The caller is responsible for releasing the lock if it requested to
4023 * keep the map locked.
4024 */
4025 if (map_locked && !keep_map_locked) {
4026 vm_map_unlock(map);
4027 }
4028
4029 /*
4030 * Get rid of the "zap_maps" and all the map entries that
4031 * they may still contain.
4032 */
4033 if (zap_old_map != VM_MAP_NULL) {
4034 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
4035 zap_old_map = VM_MAP_NULL;
4036 }
4037 if (zap_new_map != VM_MAP_NULL) {
4038 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
4039 zap_new_map = VM_MAP_NULL;
4040 }
4041
4042 return result;
4043
4044 #undef RETURN
4045 }
4046 #endif /* __arm64__ */
4047
4048 /*
4049 * Counters for the prefault optimization.
4050 */
4051 int64_t vm_prefault_nb_pages = 0;
4052 int64_t vm_prefault_nb_bailout = 0;
4053
4054 static kern_return_t
vm_map_enter_mem_object_helper(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,ipc_port_t port,vm_object_offset_t offset,boolean_t copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance,upl_page_list_ptr_t page_list,unsigned int page_list_count)4055 vm_map_enter_mem_object_helper(
4056 vm_map_t target_map,
4057 vm_map_offset_t *address,
4058 vm_map_size_t initial_size,
4059 vm_map_offset_t mask,
4060 int flags,
4061 vm_map_kernel_flags_t vmk_flags,
4062 vm_tag_t tag,
4063 ipc_port_t port,
4064 vm_object_offset_t offset,
4065 boolean_t copy,
4066 vm_prot_t cur_protection,
4067 vm_prot_t max_protection,
4068 vm_inherit_t inheritance,
4069 upl_page_list_ptr_t page_list,
4070 unsigned int page_list_count)
4071 {
4072 vm_map_address_t map_addr;
4073 vm_map_size_t map_size;
4074 vm_object_t object;
4075 vm_object_size_t size;
4076 kern_return_t result;
4077 boolean_t mask_cur_protection, mask_max_protection;
4078 boolean_t kernel_prefault, try_prefault = (page_list_count != 0);
4079 vm_map_offset_t offset_in_mapping = 0;
4080 #if __arm64__
4081 boolean_t fourk = vmk_flags.vmkf_fourk;
4082 #endif /* __arm64__ */
4083
4084 if (VM_MAP_PAGE_SHIFT(target_map) < PAGE_SHIFT) {
4085 /* XXX TODO4K prefaulting depends on page size... */
4086 try_prefault = FALSE;
4087 }
4088
4089 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
4090
4091 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
4092 mask_max_protection = max_protection & VM_PROT_IS_MASK;
4093 cur_protection &= ~VM_PROT_IS_MASK;
4094 max_protection &= ~VM_PROT_IS_MASK;
4095
4096 /*
4097 * Check arguments for validity
4098 */
4099 if ((target_map == VM_MAP_NULL) ||
4100 (cur_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4101 (max_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4102 (inheritance > VM_INHERIT_LAST_VALID) ||
4103 (try_prefault && (copy || !page_list)) ||
4104 initial_size == 0) {
4105 return KERN_INVALID_ARGUMENT;
4106 }
4107
4108 #if __arm64__
4109 if (fourk && VM_MAP_PAGE_SHIFT(target_map) < PAGE_SHIFT) {
4110 /* no "fourk" if map is using a sub-page page size */
4111 fourk = FALSE;
4112 }
4113 if (fourk) {
4114 map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
4115 map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
4116 } else
4117 #endif /* __arm64__ */
4118 {
4119 map_addr = vm_map_trunc_page(*address,
4120 VM_MAP_PAGE_MASK(target_map));
4121 map_size = vm_map_round_page(initial_size,
4122 VM_MAP_PAGE_MASK(target_map));
4123 }
4124 size = vm_object_round_page(initial_size);
4125
4126 /*
4127 * Find the vm object (if any) corresponding to this port.
4128 */
4129 if (!IP_VALID(port)) {
4130 object = VM_OBJECT_NULL;
4131 offset = 0;
4132 copy = FALSE;
4133 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
4134 vm_named_entry_t named_entry;
4135 vm_object_offset_t data_offset;
4136
4137 named_entry = mach_memory_entry_from_port(port);
4138
4139 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4140 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4141 data_offset = named_entry->data_offset;
4142 offset += named_entry->data_offset;
4143 } else {
4144 data_offset = 0;
4145 }
4146
4147 /* a few checks to make sure user is obeying rules */
4148 if (size == 0) {
4149 if (offset >= named_entry->size) {
4150 return KERN_INVALID_RIGHT;
4151 }
4152 size = named_entry->size - offset;
4153 }
4154 if (mask_max_protection) {
4155 max_protection &= named_entry->protection;
4156 }
4157 if (mask_cur_protection) {
4158 cur_protection &= named_entry->protection;
4159 }
4160 if ((named_entry->protection & max_protection) !=
4161 max_protection) {
4162 return KERN_INVALID_RIGHT;
4163 }
4164 if ((named_entry->protection & cur_protection) !=
4165 cur_protection) {
4166 return KERN_INVALID_RIGHT;
4167 }
4168 if (offset + size < offset) {
4169 /* overflow */
4170 return KERN_INVALID_ARGUMENT;
4171 }
4172 if (named_entry->size < (offset + initial_size)) {
4173 return KERN_INVALID_ARGUMENT;
4174 }
4175
4176 if (named_entry->is_copy) {
4177 /* for a vm_map_copy, we can only map it whole */
4178 if ((size != named_entry->size) &&
4179 (vm_map_round_page(size,
4180 VM_MAP_PAGE_MASK(target_map)) ==
4181 named_entry->size)) {
4182 /* XXX FBDP use the rounded size... */
4183 size = vm_map_round_page(
4184 size,
4185 VM_MAP_PAGE_MASK(target_map));
4186 }
4187 }
4188
4189 /* the callers parameter offset is defined to be the */
4190 /* offset from beginning of named entry offset in object */
4191 offset = offset + named_entry->offset;
4192
4193 if (!VM_MAP_PAGE_ALIGNED(size,
4194 VM_MAP_PAGE_MASK(target_map))) {
4195 /*
4196 * Let's not map more than requested;
4197 * vm_map_enter() will handle this "not map-aligned"
4198 * case.
4199 */
4200 map_size = size;
4201 }
4202
4203 if (named_entry->is_sub_map) {
4204 vm_map_t submap;
4205
4206 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4207 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4208 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
4209 }
4210
4211 submap = named_entry->backing.map;
4212 vm_map_reference(submap);
4213
4214 vmk_flags.vmkf_submap = TRUE;
4215
4216 result = vm_map_enter(target_map,
4217 &map_addr,
4218 map_size,
4219 mask,
4220 flags,
4221 vmk_flags,
4222 tag,
4223 (vm_object_t)(uintptr_t) submap,
4224 offset,
4225 copy,
4226 cur_protection,
4227 max_protection,
4228 inheritance);
4229 if (result != KERN_SUCCESS) {
4230 vm_map_deallocate(submap);
4231 } else {
4232 /*
4233 * No need to lock "submap" just to check its
4234 * "mapped" flag: that flag is never reset
4235 * once it's been set and if we race, we'll
4236 * just end up setting it twice, which is OK.
4237 */
4238 if (submap->mapped_in_other_pmaps == FALSE &&
4239 vm_map_pmap(submap) != PMAP_NULL &&
4240 vm_map_pmap(submap) !=
4241 vm_map_pmap(target_map)) {
4242 /*
4243 * This submap is being mapped in a map
4244 * that uses a different pmap.
4245 * Set its "mapped_in_other_pmaps" flag
4246 * to indicate that we now need to
4247 * remove mappings from all pmaps rather
4248 * than just the submap's pmap.
4249 */
4250 vm_map_lock(submap);
4251 submap->mapped_in_other_pmaps = TRUE;
4252 vm_map_unlock(submap);
4253 }
4254 *address = map_addr;
4255 }
4256 return result;
4257 } else if (named_entry->is_copy) {
4258 kern_return_t kr;
4259 vm_map_copy_t copy_map;
4260 vm_map_entry_t copy_entry;
4261 vm_map_offset_t copy_addr;
4262 vm_map_copy_t target_copy_map;
4263 vm_map_offset_t overmap_start, overmap_end;
4264 vm_map_offset_t trimmed_start;
4265 vm_map_size_t target_size;
4266
4267 if (flags & ~(VM_FLAGS_FIXED |
4268 VM_FLAGS_ANYWHERE |
4269 VM_FLAGS_OVERWRITE |
4270 VM_FLAGS_RETURN_4K_DATA_ADDR |
4271 VM_FLAGS_RETURN_DATA_ADDR |
4272 VM_FLAGS_ALIAS_MASK)) {
4273 return KERN_INVALID_ARGUMENT;
4274 }
4275
4276 copy_map = named_entry->backing.copy;
4277 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
4278 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
4279 /* unsupported type; should not happen */
4280 printf("vm_map_enter_mem_object: "
4281 "memory_entry->backing.copy "
4282 "unsupported type 0x%x\n",
4283 copy_map->type);
4284 return KERN_INVALID_ARGUMENT;
4285 }
4286
4287 if (VM_MAP_PAGE_SHIFT(target_map) != copy_map->cpy_hdr.page_shift) {
4288 DEBUG4K_SHARE("copy_map %p offset %llx size 0x%llx pgshift %d -> target_map %p pgshift %d\n", copy_map, offset, (uint64_t)map_size, copy_map->cpy_hdr.page_shift, target_map, VM_MAP_PAGE_SHIFT(target_map));
4289 }
4290
4291 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4292 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4293 offset_in_mapping = offset & VM_MAP_PAGE_MASK(target_map);
4294 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4295 offset_in_mapping &= ~((signed)(0xFFF));
4296 }
4297 }
4298
4299 target_copy_map = VM_MAP_COPY_NULL;
4300 target_size = copy_map->size;
4301 overmap_start = 0;
4302 overmap_end = 0;
4303 trimmed_start = 0;
4304 if (copy_map->cpy_hdr.page_shift != VM_MAP_PAGE_SHIFT(target_map)) {
4305 DEBUG4K_ADJUST("adjusting...\n");
4306 kr = vm_map_copy_adjust_to_target(
4307 copy_map,
4308 offset /* includes data_offset */,
4309 initial_size,
4310 target_map,
4311 copy,
4312 &target_copy_map,
4313 &overmap_start,
4314 &overmap_end,
4315 &trimmed_start);
4316 if (kr != KERN_SUCCESS) {
4317 return kr;
4318 }
4319 target_size = target_copy_map->size;
4320 if (trimmed_start >= data_offset) {
4321 data_offset = offset & VM_MAP_PAGE_MASK(target_map);
4322 } else {
4323 data_offset -= trimmed_start;
4324 }
4325 } else {
4326 target_copy_map = copy_map;
4327 }
4328
4329 /* reserve a contiguous range */
4330 kr = vm_map_enter(target_map,
4331 &map_addr,
4332 vm_map_round_page(target_size, VM_MAP_PAGE_MASK(target_map)),
4333 mask,
4334 flags & (VM_FLAGS_ANYWHERE |
4335 VM_FLAGS_OVERWRITE |
4336 VM_FLAGS_RETURN_4K_DATA_ADDR |
4337 VM_FLAGS_RETURN_DATA_ADDR),
4338 vmk_flags,
4339 tag,
4340 VM_OBJECT_NULL,
4341 0,
4342 FALSE, /* copy */
4343 cur_protection,
4344 max_protection,
4345 inheritance);
4346 if (kr != KERN_SUCCESS) {
4347 DEBUG4K_ERROR("kr 0x%x\n", kr);
4348 if (target_copy_map != copy_map) {
4349 vm_map_copy_discard(target_copy_map);
4350 target_copy_map = VM_MAP_COPY_NULL;
4351 }
4352 return kr;
4353 }
4354
4355 copy_addr = map_addr;
4356
4357 for (copy_entry = vm_map_copy_first_entry(target_copy_map);
4358 copy_entry != vm_map_copy_to_entry(target_copy_map);
4359 copy_entry = copy_entry->vme_next) {
4360 int remap_flags;
4361 vm_map_kernel_flags_t vmk_remap_flags;
4362 vm_map_t copy_submap;
4363 vm_object_t copy_object;
4364 vm_map_size_t copy_size;
4365 vm_object_offset_t copy_offset;
4366 int copy_vm_alias;
4367
4368 remap_flags = 0;
4369 vmk_remap_flags = VM_MAP_KERNEL_FLAGS_NONE;
4370
4371 copy_object = VME_OBJECT(copy_entry);
4372 copy_offset = VME_OFFSET(copy_entry);
4373 copy_size = (copy_entry->vme_end -
4374 copy_entry->vme_start);
4375 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
4376 if (copy_vm_alias == 0) {
4377 /*
4378 * Caller does not want a specific
4379 * alias for this new mapping: use
4380 * the alias of the original mapping.
4381 */
4382 copy_vm_alias = VME_ALIAS(copy_entry);
4383 }
4384
4385 /* sanity check */
4386 if ((copy_addr + copy_size) >
4387 (map_addr +
4388 overmap_start + overmap_end +
4389 named_entry->size /* XXX full size */)) {
4390 /* over-mapping too much !? */
4391 kr = KERN_INVALID_ARGUMENT;
4392 DEBUG4K_ERROR("kr 0x%x\n", kr);
4393 /* abort */
4394 break;
4395 }
4396
4397 /* take a reference on the object */
4398 if (copy_entry->is_sub_map) {
4399 vmk_remap_flags.vmkf_submap = TRUE;
4400 copy_submap = VME_SUBMAP(copy_entry);
4401 vm_map_lock(copy_submap);
4402 vm_map_reference(copy_submap);
4403 vm_map_unlock(copy_submap);
4404 copy_object = (vm_object_t)(uintptr_t) copy_submap;
4405 } else if (!copy &&
4406 copy_object != VM_OBJECT_NULL &&
4407 (copy_entry->needs_copy ||
4408 copy_object->shadowed ||
4409 (!copy_object->true_share &&
4410 !copy_entry->is_shared &&
4411 copy_object->vo_size > copy_size))) {
4412 /*
4413 * We need to resolve our side of this
4414 * "symmetric" copy-on-write now; we
4415 * need a new object to map and share,
4416 * instead of the current one which
4417 * might still be shared with the
4418 * original mapping.
4419 *
4420 * Note: A "vm_map_copy_t" does not
4421 * have a lock but we're protected by
4422 * the named entry's lock here.
4423 */
4424 // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4425 VME_OBJECT_SHADOW(copy_entry, copy_size);
4426 if (!copy_entry->needs_copy &&
4427 copy_entry->protection & VM_PROT_WRITE) {
4428 vm_prot_t prot;
4429
4430 prot = copy_entry->protection & ~VM_PROT_WRITE;
4431 vm_object_pmap_protect(copy_object,
4432 copy_offset,
4433 copy_size,
4434 PMAP_NULL,
4435 PAGE_SIZE,
4436 0,
4437 prot);
4438 }
4439
4440 copy_entry->needs_copy = FALSE;
4441 copy_entry->is_shared = TRUE;
4442 copy_object = VME_OBJECT(copy_entry);
4443 copy_offset = VME_OFFSET(copy_entry);
4444 vm_object_lock(copy_object);
4445 vm_object_reference_locked(copy_object);
4446 if (copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4447 /* we're about to make a shared mapping of this object */
4448 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4449 copy_object->true_share = TRUE;
4450 }
4451 vm_object_unlock(copy_object);
4452 } else {
4453 /*
4454 * We already have the right object
4455 * to map.
4456 */
4457 copy_object = VME_OBJECT(copy_entry);
4458 vm_object_reference(copy_object);
4459 }
4460
4461 /* over-map the object into destination */
4462 remap_flags |= flags;
4463 remap_flags |= VM_FLAGS_FIXED;
4464 remap_flags |= VM_FLAGS_OVERWRITE;
4465 remap_flags &= ~VM_FLAGS_ANYWHERE;
4466 if (!copy && !copy_entry->is_sub_map) {
4467 /*
4468 * copy-on-write should have been
4469 * resolved at this point, or we would
4470 * end up sharing instead of copying.
4471 */
4472 assert(!copy_entry->needs_copy);
4473 }
4474 #if XNU_TARGET_OS_OSX
4475 if (copy_entry->used_for_jit) {
4476 vmk_remap_flags.vmkf_map_jit = TRUE;
4477 }
4478 #endif /* XNU_TARGET_OS_OSX */
4479
4480 assertf((copy_vm_alias & VME_ALIAS_MASK) == copy_vm_alias,
4481 "VM Tag truncated from 0x%x to 0x%x\n", copy_vm_alias, (copy_vm_alias & VME_ALIAS_MASK));
4482 kr = vm_map_enter(target_map,
4483 ©_addr,
4484 copy_size,
4485 (vm_map_offset_t) 0,
4486 remap_flags,
4487 vmk_remap_flags,
4488 (vm_tag_t) copy_vm_alias, /* see comment at end of vm_fault_unwire re. cast*/
4489 copy_object,
4490 copy_offset,
4491 ((copy_object == NULL)
4492 ? FALSE
4493 : (copy || copy_entry->needs_copy)),
4494 cur_protection,
4495 max_protection,
4496 inheritance);
4497 if (kr != KERN_SUCCESS) {
4498 DEBUG4K_SHARE("failed kr 0x%x\n", kr);
4499 if (copy_entry->is_sub_map) {
4500 vm_map_deallocate(copy_submap);
4501 } else {
4502 vm_object_deallocate(copy_object);
4503 }
4504 /* abort */
4505 break;
4506 }
4507
4508 /* next mapping */
4509 copy_addr += copy_size;
4510 }
4511
4512 if (kr == KERN_SUCCESS) {
4513 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4514 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4515 *address = map_addr + offset_in_mapping;
4516 } else {
4517 *address = map_addr;
4518 }
4519 if (overmap_start) {
4520 *address += overmap_start;
4521 DEBUG4K_SHARE("map %p map_addr 0x%llx offset_in_mapping 0x%llx overmap_start 0x%llx -> *address 0x%llx\n", target_map, (uint64_t)map_addr, (uint64_t) offset_in_mapping, (uint64_t)overmap_start, (uint64_t)*address);
4522 }
4523 }
4524 if (target_copy_map != copy_map) {
4525 vm_map_copy_discard(target_copy_map);
4526 target_copy_map = VM_MAP_COPY_NULL;
4527 }
4528
4529 if (kr != KERN_SUCCESS) {
4530 if (!(flags & VM_FLAGS_OVERWRITE)) {
4531 /* deallocate the contiguous range */
4532 (void) vm_deallocate(target_map,
4533 map_addr,
4534 map_size);
4535 }
4536 }
4537
4538 return kr;
4539 }
4540
4541 if (named_entry->is_object) {
4542 unsigned int access;
4543 vm_prot_t protections;
4544 unsigned int wimg_mode;
4545
4546 /* we are mapping a VM object */
4547
4548 protections = named_entry->protection & (VM_PROT_ALL | VM_PROT_ALLEXEC);
4549 access = GET_MAP_MEM(named_entry->protection);
4550
4551 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4552 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4553 offset_in_mapping = offset - VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(target_map));
4554 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4555 offset_in_mapping &= ~((signed)(0xFFF));
4556 }
4557 offset = VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(target_map));
4558 map_size = VM_MAP_ROUND_PAGE((offset + offset_in_mapping + initial_size) - offset, VM_MAP_PAGE_MASK(target_map));
4559 }
4560
4561 object = vm_named_entry_to_vm_object(named_entry);
4562 assert(object != VM_OBJECT_NULL);
4563 vm_object_lock(object);
4564
4565 vm_object_reference_locked(object);
4566
4567 wimg_mode = object->wimg_bits;
4568 vm_prot_to_wimg(access, &wimg_mode);
4569 if (object->wimg_bits != wimg_mode) {
4570 vm_object_change_wimg_mode(object, wimg_mode);
4571 }
4572
4573 vm_object_unlock(object);
4574 } else {
4575 panic("invalid VM named entry %p", named_entry);
4576 }
4577 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4578 /*
4579 * JMM - This is temporary until we unify named entries
4580 * and raw memory objects.
4581 *
4582 * Detected fake ip_kotype for a memory object. In
4583 * this case, the port isn't really a port at all, but
4584 * instead is just a raw memory object.
4585 */
4586 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4587 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4588 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4589 }
4590
4591 object = memory_object_to_vm_object((memory_object_t)port);
4592 if (object == VM_OBJECT_NULL) {
4593 return KERN_INVALID_OBJECT;
4594 }
4595 vm_object_reference(object);
4596
4597 /* wait for object (if any) to be ready */
4598 if (object != VM_OBJECT_NULL) {
4599 if (object == kernel_object) {
4600 printf("Warning: Attempt to map kernel object"
4601 " by a non-private kernel entity\n");
4602 return KERN_INVALID_OBJECT;
4603 }
4604 if (!object->pager_ready) {
4605 vm_object_lock(object);
4606
4607 while (!object->pager_ready) {
4608 vm_object_wait(object,
4609 VM_OBJECT_EVENT_PAGER_READY,
4610 THREAD_UNINT);
4611 vm_object_lock(object);
4612 }
4613 vm_object_unlock(object);
4614 }
4615 }
4616 } else {
4617 return KERN_INVALID_OBJECT;
4618 }
4619
4620 if (object != VM_OBJECT_NULL &&
4621 object->named &&
4622 object->pager != MEMORY_OBJECT_NULL &&
4623 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4624 memory_object_t pager;
4625 vm_prot_t pager_prot;
4626 kern_return_t kr;
4627
4628 /*
4629 * For "named" VM objects, let the pager know that the
4630 * memory object is being mapped. Some pagers need to keep
4631 * track of this, to know when they can reclaim the memory
4632 * object, for example.
4633 * VM calls memory_object_map() for each mapping (specifying
4634 * the protection of each mapping) and calls
4635 * memory_object_last_unmap() when all the mappings are gone.
4636 */
4637 pager_prot = max_protection;
4638 if (copy) {
4639 /*
4640 * Copy-On-Write mapping: won't modify the
4641 * memory object.
4642 */
4643 pager_prot &= ~VM_PROT_WRITE;
4644 }
4645 vm_object_lock(object);
4646 pager = object->pager;
4647 if (object->named &&
4648 pager != MEMORY_OBJECT_NULL &&
4649 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4650 assert(object->pager_ready);
4651 vm_object_mapping_wait(object, THREAD_UNINT);
4652 vm_object_mapping_begin(object);
4653 vm_object_unlock(object);
4654
4655 kr = memory_object_map(pager, pager_prot);
4656 assert(kr == KERN_SUCCESS);
4657
4658 vm_object_lock(object);
4659 vm_object_mapping_end(object);
4660 }
4661 vm_object_unlock(object);
4662 }
4663
4664 /*
4665 * Perform the copy if requested
4666 */
4667
4668 if (copy) {
4669 vm_object_t new_object;
4670 vm_object_offset_t new_offset;
4671
4672 result = vm_object_copy_strategically(object, offset,
4673 map_size,
4674 &new_object, &new_offset,
4675 ©);
4676
4677
4678 if (result == KERN_MEMORY_RESTART_COPY) {
4679 boolean_t success;
4680 boolean_t src_needs_copy;
4681
4682 /*
4683 * XXX
4684 * We currently ignore src_needs_copy.
4685 * This really is the issue of how to make
4686 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4687 * non-kernel users to use. Solution forthcoming.
4688 * In the meantime, since we don't allow non-kernel
4689 * memory managers to specify symmetric copy,
4690 * we won't run into problems here.
4691 */
4692 new_object = object;
4693 new_offset = offset;
4694 success = vm_object_copy_quickly(new_object,
4695 new_offset,
4696 map_size,
4697 &src_needs_copy,
4698 ©);
4699 assert(success);
4700 result = KERN_SUCCESS;
4701 }
4702 /*
4703 * Throw away the reference to the
4704 * original object, as it won't be mapped.
4705 */
4706
4707 vm_object_deallocate(object);
4708
4709 if (result != KERN_SUCCESS) {
4710 return result;
4711 }
4712
4713 object = new_object;
4714 offset = new_offset;
4715 }
4716
4717 /*
4718 * If non-kernel users want to try to prefault pages, the mapping and prefault
4719 * needs to be atomic.
4720 */
4721 kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4722 vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4723
4724 #if __arm64__
4725 if (fourk) {
4726 /* map this object in a "4K" pager */
4727 result = vm_map_enter_fourk(target_map,
4728 &map_addr,
4729 map_size,
4730 (vm_map_offset_t) mask,
4731 flags,
4732 vmk_flags,
4733 tag,
4734 object,
4735 offset,
4736 copy,
4737 cur_protection,
4738 max_protection,
4739 inheritance);
4740 } else
4741 #endif /* __arm64__ */
4742 {
4743 result = vm_map_enter(target_map,
4744 &map_addr, map_size,
4745 (vm_map_offset_t)mask,
4746 flags,
4747 vmk_flags,
4748 tag,
4749 object, offset,
4750 copy,
4751 cur_protection, max_protection,
4752 inheritance);
4753 }
4754 if (result != KERN_SUCCESS) {
4755 vm_object_deallocate(object);
4756 }
4757
4758 /*
4759 * Try to prefault, and do not forget to release the vm map lock.
4760 */
4761 if (result == KERN_SUCCESS && try_prefault) {
4762 mach_vm_address_t va = map_addr;
4763 kern_return_t kr = KERN_SUCCESS;
4764 unsigned int i = 0;
4765 int pmap_options;
4766
4767 pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
4768 if (object->internal) {
4769 pmap_options |= PMAP_OPTIONS_INTERNAL;
4770 }
4771
4772 for (i = 0; i < page_list_count; ++i) {
4773 if (!UPL_VALID_PAGE(page_list, i)) {
4774 if (kernel_prefault) {
4775 assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4776 result = KERN_MEMORY_ERROR;
4777 break;
4778 }
4779 } else {
4780 /*
4781 * If this function call failed, we should stop
4782 * trying to optimize, other calls are likely
4783 * going to fail too.
4784 *
4785 * We are not gonna report an error for such
4786 * failure though. That's an optimization, not
4787 * something critical.
4788 */
4789 kr = pmap_enter_options(target_map->pmap,
4790 va, UPL_PHYS_PAGE(page_list, i),
4791 cur_protection, VM_PROT_NONE,
4792 0, TRUE, pmap_options, NULL);
4793 if (kr != KERN_SUCCESS) {
4794 OSIncrementAtomic64(&vm_prefault_nb_bailout);
4795 if (kernel_prefault) {
4796 result = kr;
4797 }
4798 break;
4799 }
4800 OSIncrementAtomic64(&vm_prefault_nb_pages);
4801 }
4802
4803 /* Next virtual address */
4804 va += PAGE_SIZE;
4805 }
4806 if (vmk_flags.vmkf_keep_map_locked) {
4807 vm_map_unlock(target_map);
4808 }
4809 }
4810
4811 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4812 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4813 *address = map_addr + offset_in_mapping;
4814 } else {
4815 *address = map_addr;
4816 }
4817 return result;
4818 }
4819
4820 kern_return_t
vm_map_enter_mem_object(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,ipc_port_t port,vm_object_offset_t offset,boolean_t copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)4821 vm_map_enter_mem_object(
4822 vm_map_t target_map,
4823 vm_map_offset_t *address,
4824 vm_map_size_t initial_size,
4825 vm_map_offset_t mask,
4826 int flags,
4827 vm_map_kernel_flags_t vmk_flags,
4828 vm_tag_t tag,
4829 ipc_port_t port,
4830 vm_object_offset_t offset,
4831 boolean_t copy,
4832 vm_prot_t cur_protection,
4833 vm_prot_t max_protection,
4834 vm_inherit_t inheritance)
4835 {
4836 kern_return_t ret;
4837
4838 ret = vm_map_enter_mem_object_helper(target_map,
4839 address,
4840 initial_size,
4841 mask,
4842 flags,
4843 vmk_flags,
4844 tag,
4845 port,
4846 offset,
4847 copy,
4848 cur_protection,
4849 max_protection,
4850 inheritance,
4851 NULL,
4852 0);
4853
4854 #if KASAN
4855 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4856 kasan_notify_address(*address, initial_size);
4857 }
4858 #endif
4859
4860 return ret;
4861 }
4862
4863 kern_return_t
vm_map_enter_mem_object_prefault(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,ipc_port_t port,vm_object_offset_t offset,vm_prot_t cur_protection,vm_prot_t max_protection,upl_page_list_ptr_t page_list,unsigned int page_list_count)4864 vm_map_enter_mem_object_prefault(
4865 vm_map_t target_map,
4866 vm_map_offset_t *address,
4867 vm_map_size_t initial_size,
4868 vm_map_offset_t mask,
4869 int flags,
4870 vm_map_kernel_flags_t vmk_flags,
4871 vm_tag_t tag,
4872 ipc_port_t port,
4873 vm_object_offset_t offset,
4874 vm_prot_t cur_protection,
4875 vm_prot_t max_protection,
4876 upl_page_list_ptr_t page_list,
4877 unsigned int page_list_count)
4878 {
4879 kern_return_t ret;
4880
4881 ret = vm_map_enter_mem_object_helper(target_map,
4882 address,
4883 initial_size,
4884 mask,
4885 flags,
4886 vmk_flags,
4887 tag,
4888 port,
4889 offset,
4890 FALSE,
4891 cur_protection,
4892 max_protection,
4893 VM_INHERIT_DEFAULT,
4894 page_list,
4895 page_list_count);
4896
4897 #if KASAN
4898 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4899 kasan_notify_address(*address, initial_size);
4900 }
4901 #endif
4902
4903 return ret;
4904 }
4905
4906
4907 kern_return_t
vm_map_enter_mem_object_control(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,memory_object_control_t control,vm_object_offset_t offset,boolean_t copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)4908 vm_map_enter_mem_object_control(
4909 vm_map_t target_map,
4910 vm_map_offset_t *address,
4911 vm_map_size_t initial_size,
4912 vm_map_offset_t mask,
4913 int flags,
4914 vm_map_kernel_flags_t vmk_flags,
4915 vm_tag_t tag,
4916 memory_object_control_t control,
4917 vm_object_offset_t offset,
4918 boolean_t copy,
4919 vm_prot_t cur_protection,
4920 vm_prot_t max_protection,
4921 vm_inherit_t inheritance)
4922 {
4923 vm_map_address_t map_addr;
4924 vm_map_size_t map_size;
4925 vm_object_t object;
4926 vm_object_size_t size;
4927 kern_return_t result;
4928 memory_object_t pager;
4929 vm_prot_t pager_prot;
4930 kern_return_t kr;
4931 #if __arm64__
4932 boolean_t fourk = vmk_flags.vmkf_fourk;
4933 #endif /* __arm64__ */
4934
4935 /*
4936 * Check arguments for validity
4937 */
4938 if ((target_map == VM_MAP_NULL) ||
4939 (cur_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4940 (max_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4941 (inheritance > VM_INHERIT_LAST_VALID) ||
4942 initial_size == 0) {
4943 return KERN_INVALID_ARGUMENT;
4944 }
4945
4946 #if __arm64__
4947 if (fourk && VM_MAP_PAGE_MASK(target_map) < PAGE_MASK) {
4948 fourk = FALSE;
4949 }
4950
4951 if (fourk) {
4952 map_addr = vm_map_trunc_page(*address,
4953 FOURK_PAGE_MASK);
4954 map_size = vm_map_round_page(initial_size,
4955 FOURK_PAGE_MASK);
4956 } else
4957 #endif /* __arm64__ */
4958 {
4959 map_addr = vm_map_trunc_page(*address,
4960 VM_MAP_PAGE_MASK(target_map));
4961 map_size = vm_map_round_page(initial_size,
4962 VM_MAP_PAGE_MASK(target_map));
4963 }
4964 size = vm_object_round_page(initial_size);
4965
4966 object = memory_object_control_to_vm_object(control);
4967
4968 if (object == VM_OBJECT_NULL) {
4969 return KERN_INVALID_OBJECT;
4970 }
4971
4972 if (object == kernel_object) {
4973 printf("Warning: Attempt to map kernel object"
4974 " by a non-private kernel entity\n");
4975 return KERN_INVALID_OBJECT;
4976 }
4977
4978 vm_object_lock(object);
4979 object->ref_count++;
4980
4981 /*
4982 * For "named" VM objects, let the pager know that the
4983 * memory object is being mapped. Some pagers need to keep
4984 * track of this, to know when they can reclaim the memory
4985 * object, for example.
4986 * VM calls memory_object_map() for each mapping (specifying
4987 * the protection of each mapping) and calls
4988 * memory_object_last_unmap() when all the mappings are gone.
4989 */
4990 pager_prot = max_protection;
4991 if (copy) {
4992 pager_prot &= ~VM_PROT_WRITE;
4993 }
4994 pager = object->pager;
4995 if (object->named &&
4996 pager != MEMORY_OBJECT_NULL &&
4997 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4998 assert(object->pager_ready);
4999 vm_object_mapping_wait(object, THREAD_UNINT);
5000 vm_object_mapping_begin(object);
5001 vm_object_unlock(object);
5002
5003 kr = memory_object_map(pager, pager_prot);
5004 assert(kr == KERN_SUCCESS);
5005
5006 vm_object_lock(object);
5007 vm_object_mapping_end(object);
5008 }
5009 vm_object_unlock(object);
5010
5011 /*
5012 * Perform the copy if requested
5013 */
5014
5015 if (copy) {
5016 vm_object_t new_object;
5017 vm_object_offset_t new_offset;
5018
5019 result = vm_object_copy_strategically(object, offset, size,
5020 &new_object, &new_offset,
5021 ©);
5022
5023
5024 if (result == KERN_MEMORY_RESTART_COPY) {
5025 boolean_t success;
5026 boolean_t src_needs_copy;
5027
5028 /*
5029 * XXX
5030 * We currently ignore src_needs_copy.
5031 * This really is the issue of how to make
5032 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
5033 * non-kernel users to use. Solution forthcoming.
5034 * In the meantime, since we don't allow non-kernel
5035 * memory managers to specify symmetric copy,
5036 * we won't run into problems here.
5037 */
5038 new_object = object;
5039 new_offset = offset;
5040 success = vm_object_copy_quickly(new_object,
5041 new_offset, size,
5042 &src_needs_copy,
5043 ©);
5044 assert(success);
5045 result = KERN_SUCCESS;
5046 }
5047 /*
5048 * Throw away the reference to the
5049 * original object, as it won't be mapped.
5050 */
5051
5052 vm_object_deallocate(object);
5053
5054 if (result != KERN_SUCCESS) {
5055 return result;
5056 }
5057
5058 object = new_object;
5059 offset = new_offset;
5060 }
5061
5062 #if __arm64__
5063 if (fourk) {
5064 result = vm_map_enter_fourk(target_map,
5065 &map_addr,
5066 map_size,
5067 (vm_map_offset_t)mask,
5068 flags,
5069 vmk_flags,
5070 tag,
5071 object, offset,
5072 copy,
5073 cur_protection, max_protection,
5074 inheritance);
5075 } else
5076 #endif /* __arm64__ */
5077 {
5078 result = vm_map_enter(target_map,
5079 &map_addr, map_size,
5080 (vm_map_offset_t)mask,
5081 flags,
5082 vmk_flags,
5083 tag,
5084 object, offset,
5085 copy,
5086 cur_protection, max_protection,
5087 inheritance);
5088 }
5089 if (result != KERN_SUCCESS) {
5090 vm_object_deallocate(object);
5091 }
5092 *address = map_addr;
5093
5094 return result;
5095 }
5096
5097
5098 #if VM_CPM
5099
5100 #ifdef MACH_ASSERT
5101 extern pmap_paddr_t avail_start, avail_end;
5102 #endif
5103
5104 /*
5105 * Allocate memory in the specified map, with the caveat that
5106 * the memory is physically contiguous. This call may fail
5107 * if the system can't find sufficient contiguous memory.
5108 * This call may cause or lead to heart-stopping amounts of
5109 * paging activity.
5110 *
5111 * Memory obtained from this call should be freed in the
5112 * normal way, viz., via vm_deallocate.
5113 */
5114 kern_return_t
vm_map_enter_cpm(vm_map_t map,vm_map_offset_t * addr,vm_map_size_t size,int flags)5115 vm_map_enter_cpm(
5116 vm_map_t map,
5117 vm_map_offset_t *addr,
5118 vm_map_size_t size,
5119 int flags)
5120 {
5121 vm_object_t cpm_obj;
5122 pmap_t pmap;
5123 vm_page_t m, pages;
5124 kern_return_t kr;
5125 vm_map_offset_t va, start, end, offset;
5126 #if MACH_ASSERT
5127 vm_map_offset_t prev_addr = 0;
5128 #endif /* MACH_ASSERT */
5129
5130 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
5131 vm_tag_t tag;
5132
5133 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
5134 /* XXX TODO4K do we need to support this? */
5135 *addr = 0;
5136 return KERN_NOT_SUPPORTED;
5137 }
5138
5139 VM_GET_FLAGS_ALIAS(flags, tag);
5140
5141 if (size == 0) {
5142 *addr = 0;
5143 return KERN_SUCCESS;
5144 }
5145 if (anywhere) {
5146 *addr = vm_map_min(map);
5147 } else {
5148 *addr = vm_map_trunc_page(*addr,
5149 VM_MAP_PAGE_MASK(map));
5150 }
5151 size = vm_map_round_page(size,
5152 VM_MAP_PAGE_MASK(map));
5153
5154 /*
5155 * LP64todo - cpm_allocate should probably allow
5156 * allocations of >4GB, but not with the current
5157 * algorithm, so just cast down the size for now.
5158 */
5159 if (size > VM_MAX_ADDRESS) {
5160 return KERN_RESOURCE_SHORTAGE;
5161 }
5162 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
5163 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS) {
5164 return kr;
5165 }
5166
5167 cpm_obj = vm_object_allocate((vm_object_size_t)size);
5168 assert(cpm_obj != VM_OBJECT_NULL);
5169 assert(cpm_obj->internal);
5170 assert(cpm_obj->vo_size == (vm_object_size_t)size);
5171 assert(cpm_obj->can_persist == FALSE);
5172 assert(cpm_obj->pager_created == FALSE);
5173 assert(cpm_obj->pageout == FALSE);
5174 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5175
5176 /*
5177 * Insert pages into object.
5178 */
5179
5180 vm_object_lock(cpm_obj);
5181 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5182 m = pages;
5183 pages = NEXT_PAGE(m);
5184 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
5185
5186 assert(!m->vmp_gobbled);
5187 assert(!m->vmp_wanted);
5188 assert(!m->vmp_pageout);
5189 assert(!m->vmp_tabled);
5190 assert(VM_PAGE_WIRED(m));
5191 assert(m->vmp_busy);
5192 assert(VM_PAGE_GET_PHYS_PAGE(m) >= (avail_start >> PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m) <= (avail_end >> PAGE_SHIFT));
5193
5194 m->vmp_busy = FALSE;
5195 vm_page_insert(m, cpm_obj, offset);
5196 }
5197 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
5198 vm_object_unlock(cpm_obj);
5199
5200 /*
5201 * Hang onto a reference on the object in case a
5202 * multi-threaded application for some reason decides
5203 * to deallocate the portion of the address space into
5204 * which we will insert this object.
5205 *
5206 * Unfortunately, we must insert the object now before
5207 * we can talk to the pmap module about which addresses
5208 * must be wired down. Hence, the race with a multi-
5209 * threaded app.
5210 */
5211 vm_object_reference(cpm_obj);
5212
5213 /*
5214 * Insert object into map.
5215 */
5216
5217 kr = vm_map_enter(
5218 map,
5219 addr,
5220 size,
5221 (vm_map_offset_t)0,
5222 flags,
5223 VM_MAP_KERNEL_FLAGS_NONE,
5224 cpm_obj,
5225 (vm_object_offset_t)0,
5226 FALSE,
5227 VM_PROT_ALL,
5228 VM_PROT_ALL,
5229 VM_INHERIT_DEFAULT);
5230
5231 if (kr != KERN_SUCCESS) {
5232 /*
5233 * A CPM object doesn't have can_persist set,
5234 * so all we have to do is deallocate it to
5235 * free up these pages.
5236 */
5237 assert(cpm_obj->pager_created == FALSE);
5238 assert(cpm_obj->can_persist == FALSE);
5239 assert(cpm_obj->pageout == FALSE);
5240 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5241 vm_object_deallocate(cpm_obj); /* kill acquired ref */
5242 vm_object_deallocate(cpm_obj); /* kill creation ref */
5243 }
5244
5245 /*
5246 * Inform the physical mapping system that the
5247 * range of addresses may not fault, so that
5248 * page tables and such can be locked down as well.
5249 */
5250 start = *addr;
5251 end = start + size;
5252 pmap = vm_map_pmap(map);
5253 pmap_pageable(pmap, start, end, FALSE);
5254
5255 /*
5256 * Enter each page into the pmap, to avoid faults.
5257 * Note that this loop could be coded more efficiently,
5258 * if the need arose, rather than looking up each page
5259 * again.
5260 */
5261 for (offset = 0, va = start; offset < size;
5262 va += PAGE_SIZE, offset += PAGE_SIZE) {
5263 int type_of_fault;
5264
5265 vm_object_lock(cpm_obj);
5266 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5267 assert(m != VM_PAGE_NULL);
5268
5269 vm_page_zero_fill(m);
5270
5271 type_of_fault = DBG_ZERO_FILL_FAULT;
5272
5273 vm_fault_enter(m, pmap, va,
5274 PAGE_SIZE, 0,
5275 VM_PROT_ALL, VM_PROT_WRITE,
5276 VM_PAGE_WIRED(m),
5277 FALSE, /* change_wiring */
5278 VM_KERN_MEMORY_NONE, /* tag - not wiring */
5279 FALSE, /* no_cache */
5280 FALSE, /* cs_bypass */
5281 0, /* user_tag */
5282 0, /* pmap_options */
5283 NULL, /* need_retry */
5284 &type_of_fault);
5285
5286 vm_object_unlock(cpm_obj);
5287 }
5288
5289 #if MACH_ASSERT
5290 /*
5291 * Verify ordering in address space.
5292 */
5293 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5294 vm_object_lock(cpm_obj);
5295 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5296 vm_object_unlock(cpm_obj);
5297 if (m == VM_PAGE_NULL) {
5298 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
5299 cpm_obj, (uint64_t)offset);
5300 }
5301 assert(m->vmp_tabled);
5302 assert(!m->vmp_busy);
5303 assert(!m->vmp_wanted);
5304 assert(!m->vmp_fictitious);
5305 assert(!m->vmp_private);
5306 assert(!m->vmp_absent);
5307 assert(!m->vmp_error);
5308 assert(!m->vmp_cleaning);
5309 assert(!m->vmp_laundry);
5310 assert(!m->vmp_precious);
5311 assert(!m->vmp_clustered);
5312 if (offset != 0) {
5313 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
5314 printf("start 0x%llx end 0x%llx va 0x%llx\n",
5315 (uint64_t)start, (uint64_t)end, (uint64_t)va);
5316 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
5317 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
5318 panic("vm_allocate_cpm: pages not contig!");
5319 }
5320 }
5321 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
5322 }
5323 #endif /* MACH_ASSERT */
5324
5325 vm_object_deallocate(cpm_obj); /* kill extra ref */
5326
5327 return kr;
5328 }
5329
5330
5331 #else /* VM_CPM */
5332
5333 /*
5334 * Interface is defined in all cases, but unless the kernel
5335 * is built explicitly for this option, the interface does
5336 * nothing.
5337 */
5338
5339 kern_return_t
vm_map_enter_cpm(__unused vm_map_t map,__unused vm_map_offset_t * addr,__unused vm_map_size_t size,__unused int flags)5340 vm_map_enter_cpm(
5341 __unused vm_map_t map,
5342 __unused vm_map_offset_t *addr,
5343 __unused vm_map_size_t size,
5344 __unused int flags)
5345 {
5346 return KERN_FAILURE;
5347 }
5348 #endif /* VM_CPM */
5349
5350 /* Not used without nested pmaps */
5351 #ifndef NO_NESTED_PMAP
5352 /*
5353 * Clip and unnest a portion of a nested submap mapping.
5354 */
5355
5356
5357 static void
vm_map_clip_unnest(vm_map_t map,vm_map_entry_t entry,vm_map_offset_t start_unnest,vm_map_offset_t end_unnest)5358 vm_map_clip_unnest(
5359 vm_map_t map,
5360 vm_map_entry_t entry,
5361 vm_map_offset_t start_unnest,
5362 vm_map_offset_t end_unnest)
5363 {
5364 vm_map_offset_t old_start_unnest = start_unnest;
5365 vm_map_offset_t old_end_unnest = end_unnest;
5366
5367 assert(entry->is_sub_map);
5368 assert(VME_SUBMAP(entry) != NULL);
5369 assert(entry->use_pmap);
5370
5371 /*
5372 * Query the platform for the optimal unnest range.
5373 * DRK: There's some duplication of effort here, since
5374 * callers may have adjusted the range to some extent. This
5375 * routine was introduced to support 1GiB subtree nesting
5376 * for x86 platforms, which can also nest on 2MiB boundaries
5377 * depending on size/alignment.
5378 */
5379 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
5380 assert(VME_SUBMAP(entry)->is_nested_map);
5381 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
5382 log_unnest_badness(map,
5383 old_start_unnest,
5384 old_end_unnest,
5385 VME_SUBMAP(entry)->is_nested_map,
5386 (entry->vme_start +
5387 VME_SUBMAP(entry)->lowest_unnestable_start -
5388 VME_OFFSET(entry)));
5389 }
5390
5391 if (entry->vme_start > start_unnest ||
5392 entry->vme_end < end_unnest) {
5393 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
5394 "bad nested entry: start=0x%llx end=0x%llx\n",
5395 (long long)start_unnest, (long long)end_unnest,
5396 (long long)entry->vme_start, (long long)entry->vme_end);
5397 }
5398
5399 if (start_unnest > entry->vme_start) {
5400 _vm_map_clip_start(&map->hdr,
5401 entry,
5402 start_unnest);
5403 if (map->holelistenabled) {
5404 vm_map_store_update_first_free(map, NULL, FALSE);
5405 } else {
5406 vm_map_store_update_first_free(map, map->first_free, FALSE);
5407 }
5408 }
5409 if (entry->vme_end > end_unnest) {
5410 _vm_map_clip_end(&map->hdr,
5411 entry,
5412 end_unnest);
5413 if (map->holelistenabled) {
5414 vm_map_store_update_first_free(map, NULL, FALSE);
5415 } else {
5416 vm_map_store_update_first_free(map, map->first_free, FALSE);
5417 }
5418 }
5419
5420 pmap_unnest(map->pmap,
5421 entry->vme_start,
5422 entry->vme_end - entry->vme_start);
5423 if ((map->mapped_in_other_pmaps) && os_ref_get_count_raw(&map->map_refcnt) != 0) {
5424 /* clean up parent map/maps */
5425 vm_map_submap_pmap_clean(
5426 map, entry->vme_start,
5427 entry->vme_end,
5428 VME_SUBMAP(entry),
5429 VME_OFFSET(entry));
5430 }
5431 entry->use_pmap = FALSE;
5432 if ((map->pmap != kernel_pmap) &&
5433 (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
5434 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
5435 }
5436 }
5437 #endif /* NO_NESTED_PMAP */
5438
5439 /*
5440 * vm_map_clip_start: [ internal use only ]
5441 *
5442 * Asserts that the given entry begins at or after
5443 * the specified address; if necessary,
5444 * it splits the entry into two.
5445 */
5446 void
vm_map_clip_start(vm_map_t map,vm_map_entry_t entry,vm_map_offset_t startaddr)5447 vm_map_clip_start(
5448 vm_map_t map,
5449 vm_map_entry_t entry,
5450 vm_map_offset_t startaddr)
5451 {
5452 #ifndef NO_NESTED_PMAP
5453 if (entry->is_sub_map &&
5454 entry->use_pmap &&
5455 startaddr >= entry->vme_start) {
5456 vm_map_offset_t start_unnest, end_unnest;
5457
5458 /*
5459 * Make sure "startaddr" is no longer in a nested range
5460 * before we clip. Unnest only the minimum range the platform
5461 * can handle.
5462 * vm_map_clip_unnest may perform additional adjustments to
5463 * the unnest range.
5464 */
5465 start_unnest = startaddr & ~(pmap_shared_region_size_min(map->pmap) - 1);
5466 end_unnest = start_unnest + pmap_shared_region_size_min(map->pmap);
5467 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5468 }
5469 #endif /* NO_NESTED_PMAP */
5470 if (startaddr > entry->vme_start) {
5471 if (VME_OBJECT(entry) &&
5472 !entry->is_sub_map &&
5473 VME_OBJECT(entry)->phys_contiguous) {
5474 pmap_remove(map->pmap,
5475 (addr64_t)(entry->vme_start),
5476 (addr64_t)(entry->vme_end));
5477 }
5478 if (entry->vme_atomic) {
5479 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)", map, entry);
5480 }
5481
5482 DTRACE_VM5(
5483 vm_map_clip_start,
5484 vm_map_t, map,
5485 vm_map_offset_t, entry->vme_start,
5486 vm_map_offset_t, entry->vme_end,
5487 vm_map_offset_t, startaddr,
5488 int, VME_ALIAS(entry));
5489
5490 _vm_map_clip_start(&map->hdr, entry, startaddr);
5491 if (map->holelistenabled) {
5492 vm_map_store_update_first_free(map, NULL, FALSE);
5493 } else {
5494 vm_map_store_update_first_free(map, map->first_free, FALSE);
5495 }
5496 }
5497 }
5498
5499
5500 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5501 MACRO_BEGIN \
5502 if ((startaddr) > (entry)->vme_start) \
5503 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5504 MACRO_END
5505
5506 /*
5507 * This routine is called only when it is known that
5508 * the entry must be split.
5509 */
5510 static void
_vm_map_clip_start(struct vm_map_header * map_header,vm_map_entry_t entry,vm_map_offset_t start)5511 _vm_map_clip_start(
5512 struct vm_map_header *map_header,
5513 vm_map_entry_t entry,
5514 vm_map_offset_t start)
5515 {
5516 vm_map_entry_t new_entry;
5517
5518 /*
5519 * Split off the front portion --
5520 * note that we must insert the new
5521 * entry BEFORE this one, so that
5522 * this entry has the specified starting
5523 * address.
5524 */
5525
5526 if (entry->map_aligned) {
5527 assert(VM_MAP_PAGE_ALIGNED(start,
5528 VM_MAP_HDR_PAGE_MASK(map_header)));
5529 }
5530
5531 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5532 vm_map_entry_copy_full(new_entry, entry);
5533
5534 new_entry->vme_end = start;
5535 assert(new_entry->vme_start < new_entry->vme_end);
5536 VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
5537 assert(start < entry->vme_end);
5538 entry->vme_start = start;
5539
5540 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
5541
5542 if (entry->is_sub_map) {
5543 vm_map_reference(VME_SUBMAP(new_entry));
5544 } else {
5545 vm_object_reference(VME_OBJECT(new_entry));
5546 }
5547 }
5548
5549
5550 /*
5551 * vm_map_clip_end: [ internal use only ]
5552 *
5553 * Asserts that the given entry ends at or before
5554 * the specified address; if necessary,
5555 * it splits the entry into two.
5556 */
5557 void
vm_map_clip_end(vm_map_t map,vm_map_entry_t entry,vm_map_offset_t endaddr)5558 vm_map_clip_end(
5559 vm_map_t map,
5560 vm_map_entry_t entry,
5561 vm_map_offset_t endaddr)
5562 {
5563 if (endaddr > entry->vme_end) {
5564 /*
5565 * Within the scope of this clipping, limit "endaddr" to
5566 * the end of this map entry...
5567 */
5568 endaddr = entry->vme_end;
5569 }
5570 #ifndef NO_NESTED_PMAP
5571 if (entry->is_sub_map && entry->use_pmap) {
5572 vm_map_offset_t start_unnest, end_unnest;
5573
5574 /*
5575 * Make sure the range between the start of this entry and
5576 * the new "endaddr" is no longer nested before we clip.
5577 * Unnest only the minimum range the platform can handle.
5578 * vm_map_clip_unnest may perform additional adjustments to
5579 * the unnest range.
5580 */
5581 start_unnest = entry->vme_start;
5582 end_unnest =
5583 (endaddr + pmap_shared_region_size_min(map->pmap) - 1) &
5584 ~(pmap_shared_region_size_min(map->pmap) - 1);
5585 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5586 }
5587 #endif /* NO_NESTED_PMAP */
5588 if (endaddr < entry->vme_end) {
5589 if (VME_OBJECT(entry) &&
5590 !entry->is_sub_map &&
5591 VME_OBJECT(entry)->phys_contiguous) {
5592 pmap_remove(map->pmap,
5593 (addr64_t)(entry->vme_start),
5594 (addr64_t)(entry->vme_end));
5595 }
5596 if (entry->vme_atomic) {
5597 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)", map, entry);
5598 }
5599 DTRACE_VM5(
5600 vm_map_clip_end,
5601 vm_map_t, map,
5602 vm_map_offset_t, entry->vme_start,
5603 vm_map_offset_t, entry->vme_end,
5604 vm_map_offset_t, endaddr,
5605 int, VME_ALIAS(entry));
5606
5607 _vm_map_clip_end(&map->hdr, entry, endaddr);
5608 if (map->holelistenabled) {
5609 vm_map_store_update_first_free(map, NULL, FALSE);
5610 } else {
5611 vm_map_store_update_first_free(map, map->first_free, FALSE);
5612 }
5613 }
5614 }
5615
5616
5617 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5618 MACRO_BEGIN \
5619 if ((endaddr) < (entry)->vme_end) \
5620 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5621 MACRO_END
5622
5623 /*
5624 * This routine is called only when it is known that
5625 * the entry must be split.
5626 */
5627 static void
_vm_map_clip_end(struct vm_map_header * map_header,vm_map_entry_t entry,vm_map_offset_t end)5628 _vm_map_clip_end(
5629 struct vm_map_header *map_header,
5630 vm_map_entry_t entry,
5631 vm_map_offset_t end)
5632 {
5633 vm_map_entry_t new_entry;
5634
5635 /*
5636 * Create a new entry and insert it
5637 * AFTER the specified entry
5638 */
5639
5640 if (entry->map_aligned) {
5641 assert(VM_MAP_PAGE_ALIGNED(end,
5642 VM_MAP_HDR_PAGE_MASK(map_header)));
5643 }
5644
5645 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5646 vm_map_entry_copy_full(new_entry, entry);
5647
5648 assert(entry->vme_start < end);
5649 new_entry->vme_start = entry->vme_end = end;
5650 VME_OFFSET_SET(new_entry,
5651 VME_OFFSET(new_entry) + (end - entry->vme_start));
5652 assert(new_entry->vme_start < new_entry->vme_end);
5653
5654 _vm_map_store_entry_link(map_header, entry, new_entry);
5655
5656 if (entry->is_sub_map) {
5657 vm_map_reference(VME_SUBMAP(new_entry));
5658 } else {
5659 vm_object_reference(VME_OBJECT(new_entry));
5660 }
5661 }
5662
5663
5664 /*
5665 * VM_MAP_RANGE_CHECK: [ internal use only ]
5666 *
5667 * Asserts that the starting and ending region
5668 * addresses fall within the valid range of the map.
5669 */
5670 #define VM_MAP_RANGE_CHECK(map, start, end) \
5671 MACRO_BEGIN \
5672 if (start < vm_map_min(map)) \
5673 start = vm_map_min(map); \
5674 if (end > vm_map_max(map)) \
5675 end = vm_map_max(map); \
5676 if (start > end) \
5677 start = end; \
5678 MACRO_END
5679
5680 /*
5681 * vm_map_range_check: [ internal use only ]
5682 *
5683 * Check that the region defined by the specified start and
5684 * end addresses are wholly contained within a single map
5685 * entry or set of adjacent map entries of the spacified map,
5686 * i.e. the specified region contains no unmapped space.
5687 * If any or all of the region is unmapped, FALSE is returned.
5688 * Otherwise, TRUE is returned and if the output argument 'entry'
5689 * is not NULL it points to the map entry containing the start
5690 * of the region.
5691 *
5692 * The map is locked for reading on entry and is left locked.
5693 */
5694 static boolean_t
vm_map_range_check(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_map_entry_t * entry)5695 vm_map_range_check(
5696 vm_map_t map,
5697 vm_map_offset_t start,
5698 vm_map_offset_t end,
5699 vm_map_entry_t *entry)
5700 {
5701 vm_map_entry_t cur;
5702 vm_map_offset_t prev;
5703
5704 /*
5705 * Basic sanity checks first
5706 */
5707 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
5708 return FALSE;
5709 }
5710
5711 /*
5712 * Check first if the region starts within a valid
5713 * mapping for the map.
5714 */
5715 if (!vm_map_lookup_entry(map, start, &cur)) {
5716 return FALSE;
5717 }
5718
5719 /*
5720 * Optimize for the case that the region is contained
5721 * in a single map entry.
5722 */
5723 if (entry != (vm_map_entry_t *) NULL) {
5724 *entry = cur;
5725 }
5726 if (end <= cur->vme_end) {
5727 return TRUE;
5728 }
5729
5730 /*
5731 * If the region is not wholly contained within a
5732 * single entry, walk the entries looking for holes.
5733 */
5734 prev = cur->vme_end;
5735 cur = cur->vme_next;
5736 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
5737 if (end <= cur->vme_end) {
5738 return TRUE;
5739 }
5740 prev = cur->vme_end;
5741 cur = cur->vme_next;
5742 }
5743 return FALSE;
5744 }
5745
5746 /*
5747 * vm_map_submap: [ kernel use only ]
5748 *
5749 * Mark the given range as handled by a subordinate map.
5750 *
5751 * This range must have been created with vm_map_find using
5752 * the vm_submap_object, and no other operations may have been
5753 * performed on this range prior to calling vm_map_submap.
5754 *
5755 * Only a limited number of operations can be performed
5756 * within this rage after calling vm_map_submap:
5757 * vm_fault
5758 * [Don't try vm_map_copyin!]
5759 *
5760 * To remove a submapping, one must first remove the
5761 * range from the superior map, and then destroy the
5762 * submap (if desired). [Better yet, don't try it.]
5763 */
5764 kern_return_t
vm_map_submap(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_map_t submap,vm_map_offset_t offset,__unused boolean_t use_pmap)5765 vm_map_submap(
5766 vm_map_t map,
5767 vm_map_offset_t start,
5768 vm_map_offset_t end,
5769 vm_map_t submap,
5770 vm_map_offset_t offset,
5771 #ifdef NO_NESTED_PMAP
5772 __unused
5773 #endif /* NO_NESTED_PMAP */
5774 boolean_t use_pmap)
5775 {
5776 vm_map_entry_t entry;
5777 kern_return_t result = KERN_INVALID_ARGUMENT;
5778 vm_object_t object;
5779
5780 vm_map_lock(map);
5781
5782 if (!vm_map_lookup_entry(map, start, &entry)) {
5783 entry = entry->vme_next;
5784 }
5785
5786 if (entry == vm_map_to_entry(map) ||
5787 entry->is_sub_map) {
5788 vm_map_unlock(map);
5789 return KERN_INVALID_ARGUMENT;
5790 }
5791
5792 vm_map_clip_start(map, entry, start);
5793 vm_map_clip_end(map, entry, end);
5794
5795 if ((entry->vme_start == start) && (entry->vme_end == end) &&
5796 (!entry->is_sub_map) &&
5797 ((object = VME_OBJECT(entry)) == vm_submap_object) &&
5798 (object->resident_page_count == 0) &&
5799 (object->copy == VM_OBJECT_NULL) &&
5800 (object->shadow == VM_OBJECT_NULL) &&
5801 (!object->pager_created)) {
5802 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
5803 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
5804 vm_object_deallocate(object);
5805 entry->is_sub_map = TRUE;
5806 entry->use_pmap = FALSE;
5807 VME_SUBMAP_SET(entry, submap);
5808 vm_map_reference(submap);
5809 if (submap->mapped_in_other_pmaps == FALSE &&
5810 vm_map_pmap(submap) != PMAP_NULL &&
5811 vm_map_pmap(submap) != vm_map_pmap(map)) {
5812 /*
5813 * This submap is being mapped in a map
5814 * that uses a different pmap.
5815 * Set its "mapped_in_other_pmaps" flag
5816 * to indicate that we now need to
5817 * remove mappings from all pmaps rather
5818 * than just the submap's pmap.
5819 */
5820 submap->mapped_in_other_pmaps = TRUE;
5821 }
5822
5823 #ifndef NO_NESTED_PMAP
5824 if (use_pmap) {
5825 /* nest if platform code will allow */
5826 if (submap->pmap == NULL) {
5827 ledger_t ledger = map->pmap->ledger;
5828 submap->pmap = pmap_create_options(ledger,
5829 (vm_map_size_t) 0, 0);
5830 if (submap->pmap == PMAP_NULL) {
5831 vm_map_unlock(map);
5832 return KERN_NO_SPACE;
5833 }
5834 #if defined(__arm__) || defined(__arm64__)
5835 pmap_set_nested(submap->pmap);
5836 #endif
5837 }
5838 result = pmap_nest(map->pmap,
5839 (VME_SUBMAP(entry))->pmap,
5840 (addr64_t)start,
5841 (uint64_t)(end - start));
5842 if (result) {
5843 panic("vm_map_submap: pmap_nest failed, rc = %08X", result);
5844 }
5845 entry->use_pmap = TRUE;
5846 }
5847 #else /* NO_NESTED_PMAP */
5848 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
5849 #endif /* NO_NESTED_PMAP */
5850 result = KERN_SUCCESS;
5851 }
5852 vm_map_unlock(map);
5853
5854 return result;
5855 }
5856
5857 /*
5858 * vm_map_protect:
5859 *
5860 * Sets the protection of the specified address
5861 * region in the target map. If "set_max" is
5862 * specified, the maximum protection is to be set;
5863 * otherwise, only the current protection is affected.
5864 */
5865 kern_return_t
vm_map_protect(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t new_prot,boolean_t set_max)5866 vm_map_protect(
5867 vm_map_t map,
5868 vm_map_offset_t start,
5869 vm_map_offset_t end,
5870 vm_prot_t new_prot,
5871 boolean_t set_max)
5872 {
5873 vm_map_entry_t current;
5874 vm_map_offset_t prev;
5875 vm_map_entry_t entry;
5876 vm_prot_t new_max;
5877 int pmap_options = 0;
5878 kern_return_t kr;
5879
5880 if (new_prot & VM_PROT_COPY) {
5881 vm_map_offset_t new_start;
5882 vm_prot_t cur_prot, max_prot;
5883 vm_map_kernel_flags_t kflags;
5884
5885 /* LP64todo - see below */
5886 if (start >= map->max_offset) {
5887 return KERN_INVALID_ADDRESS;
5888 }
5889
5890 if ((new_prot & VM_PROT_ALLEXEC) &&
5891 map->pmap != kernel_pmap &&
5892 (vm_map_cs_enforcement(map)
5893 #if XNU_TARGET_OS_OSX && __arm64__
5894 || !VM_MAP_IS_EXOTIC(map)
5895 #endif /* XNU_TARGET_OS_OSX && __arm64__ */
5896 ) &&
5897 VM_MAP_POLICY_WX_FAIL(map)) {
5898 DTRACE_VM3(cs_wx,
5899 uint64_t, (uint64_t) start,
5900 uint64_t, (uint64_t) end,
5901 vm_prot_t, new_prot);
5902 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5903 proc_selfpid(),
5904 (current_task()->bsd_info
5905 ? proc_name_address(current_task()->bsd_info)
5906 : "?"),
5907 __FUNCTION__);
5908 return KERN_PROTECTION_FAILURE;
5909 }
5910
5911 /*
5912 * Let vm_map_remap_extract() know that it will need to:
5913 * + make a copy of the mapping
5914 * + add VM_PROT_WRITE to the max protections
5915 * + remove any protections that are no longer allowed from the
5916 * max protections (to avoid any WRITE/EXECUTE conflict, for
5917 * example).
5918 * Note that "max_prot" is an IN/OUT parameter only for this
5919 * specific (VM_PROT_COPY) case. It's usually an OUT parameter
5920 * only.
5921 */
5922 max_prot = new_prot & (VM_PROT_ALL | VM_PROT_ALLEXEC);
5923 cur_prot = VM_PROT_NONE;
5924 kflags = VM_MAP_KERNEL_FLAGS_NONE;
5925 kflags.vmkf_remap_prot_copy = TRUE;
5926 kflags.vmkf_overwrite_immutable = TRUE;
5927 new_start = start;
5928 kr = vm_map_remap(map,
5929 &new_start,
5930 end - start,
5931 0, /* mask */
5932 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
5933 kflags,
5934 0,
5935 map,
5936 start,
5937 TRUE, /* copy-on-write remapping! */
5938 &cur_prot, /* IN/OUT */
5939 &max_prot, /* IN/OUT */
5940 VM_INHERIT_DEFAULT);
5941 if (kr != KERN_SUCCESS) {
5942 return kr;
5943 }
5944 new_prot &= ~VM_PROT_COPY;
5945 }
5946
5947 vm_map_lock(map);
5948
5949 /* LP64todo - remove this check when vm_map_commpage64()
5950 * no longer has to stuff in a map_entry for the commpage
5951 * above the map's max_offset.
5952 */
5953 if (start >= map->max_offset) {
5954 vm_map_unlock(map);
5955 return KERN_INVALID_ADDRESS;
5956 }
5957
5958 while (1) {
5959 /*
5960 * Lookup the entry. If it doesn't start in a valid
5961 * entry, return an error.
5962 */
5963 if (!vm_map_lookup_entry(map, start, &entry)) {
5964 vm_map_unlock(map);
5965 return KERN_INVALID_ADDRESS;
5966 }
5967
5968 if (entry->superpage_size && (start & (SUPERPAGE_SIZE - 1))) { /* extend request to whole entry */
5969 start = SUPERPAGE_ROUND_DOWN(start);
5970 continue;
5971 }
5972 break;
5973 }
5974 if (entry->superpage_size) {
5975 end = SUPERPAGE_ROUND_UP(end);
5976 }
5977
5978 /*
5979 * Make a first pass to check for protection and address
5980 * violations.
5981 */
5982
5983 current = entry;
5984 prev = current->vme_start;
5985 while ((current != vm_map_to_entry(map)) &&
5986 (current->vme_start < end)) {
5987 /*
5988 * If there is a hole, return an error.
5989 */
5990 if (current->vme_start != prev) {
5991 vm_map_unlock(map);
5992 return KERN_INVALID_ADDRESS;
5993 }
5994
5995 new_max = current->max_protection;
5996
5997 #if defined(__x86_64__)
5998 /* Allow max mask to include execute prot bits if this map doesn't enforce CS */
5999 if (set_max && (new_prot & VM_PROT_ALLEXEC) && !vm_map_cs_enforcement(map)) {
6000 new_max = (new_max & ~VM_PROT_ALLEXEC) | (new_prot & VM_PROT_ALLEXEC);
6001 }
6002 #endif
6003 if ((new_prot & new_max) != new_prot) {
6004 vm_map_unlock(map);
6005 return KERN_PROTECTION_FAILURE;
6006 }
6007
6008 if (current->used_for_jit &&
6009 pmap_has_prot_policy(map->pmap, current->translated_allow_execute, current->protection)) {
6010 vm_map_unlock(map);
6011 return KERN_PROTECTION_FAILURE;
6012 }
6013
6014 if ((new_prot & VM_PROT_WRITE) &&
6015 (new_prot & VM_PROT_ALLEXEC) &&
6016 #if XNU_TARGET_OS_OSX
6017 map->pmap != kernel_pmap &&
6018 (vm_map_cs_enforcement(map)
6019 #if __arm64__
6020 || !VM_MAP_IS_EXOTIC(map)
6021 #endif /* __arm64__ */
6022 ) &&
6023 #endif /* XNU_TARGET_OS_OSX */
6024 !(current->used_for_jit)) {
6025 DTRACE_VM3(cs_wx,
6026 uint64_t, (uint64_t) current->vme_start,
6027 uint64_t, (uint64_t) current->vme_end,
6028 vm_prot_t, new_prot);
6029 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
6030 proc_selfpid(),
6031 (current_task()->bsd_info
6032 ? proc_name_address(current_task()->bsd_info)
6033 : "?"),
6034 __FUNCTION__);
6035 new_prot &= ~VM_PROT_ALLEXEC;
6036 if (VM_MAP_POLICY_WX_FAIL(map)) {
6037 vm_map_unlock(map);
6038 return KERN_PROTECTION_FAILURE;
6039 }
6040 }
6041
6042 /*
6043 * If the task has requested executable lockdown,
6044 * deny both:
6045 * - adding executable protections OR
6046 * - adding write protections to an existing executable mapping.
6047 */
6048 if (map->map_disallow_new_exec == TRUE) {
6049 if ((new_prot & VM_PROT_ALLEXEC) ||
6050 ((current->protection & VM_PROT_EXECUTE) && (new_prot & VM_PROT_WRITE))) {
6051 vm_map_unlock(map);
6052 return KERN_PROTECTION_FAILURE;
6053 }
6054 }
6055
6056 prev = current->vme_end;
6057 current = current->vme_next;
6058 }
6059
6060 #if __arm64__
6061 if (end > prev &&
6062 end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
6063 vm_map_entry_t prev_entry;
6064
6065 prev_entry = current->vme_prev;
6066 if (prev_entry != vm_map_to_entry(map) &&
6067 !prev_entry->map_aligned &&
6068 (vm_map_round_page(prev_entry->vme_end,
6069 VM_MAP_PAGE_MASK(map))
6070 == end)) {
6071 /*
6072 * The last entry in our range is not "map-aligned"
6073 * but it would have reached all the way to "end"
6074 * if it had been map-aligned, so this is not really
6075 * a hole in the range and we can proceed.
6076 */
6077 prev = end;
6078 }
6079 }
6080 #endif /* __arm64__ */
6081
6082 if (end > prev) {
6083 vm_map_unlock(map);
6084 return KERN_INVALID_ADDRESS;
6085 }
6086
6087 /*
6088 * Go back and fix up protections.
6089 * Clip to start here if the range starts within
6090 * the entry.
6091 */
6092
6093 current = entry;
6094 if (current != vm_map_to_entry(map)) {
6095 /* clip and unnest if necessary */
6096 vm_map_clip_start(map, current, start);
6097 }
6098
6099 while ((current != vm_map_to_entry(map)) &&
6100 (current->vme_start < end)) {
6101 vm_prot_t old_prot;
6102
6103 vm_map_clip_end(map, current, end);
6104
6105 if (current->is_sub_map) {
6106 /* clipping did unnest if needed */
6107 assert(!current->use_pmap);
6108 }
6109
6110 old_prot = current->protection;
6111
6112 if (set_max) {
6113 current->max_protection = new_prot;
6114 /* Consider either EXECUTE or UEXEC as EXECUTE for this masking */
6115 current->protection = (new_prot & old_prot);
6116 } else {
6117 current->protection = new_prot;
6118 }
6119
6120 /*
6121 * Update physical map if necessary.
6122 * If the request is to turn off write protection,
6123 * we won't do it for real (in pmap). This is because
6124 * it would cause copy-on-write to fail. We've already
6125 * set, the new protection in the map, so if a
6126 * write-protect fault occurred, it will be fixed up
6127 * properly, COW or not.
6128 */
6129 if (current->protection != old_prot) {
6130 /* Look one level in we support nested pmaps */
6131 /* from mapped submaps which are direct entries */
6132 /* in our map */
6133
6134 vm_prot_t prot;
6135
6136 prot = current->protection;
6137 if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
6138 prot &= ~VM_PROT_WRITE;
6139 } else {
6140 assert(!VME_OBJECT(current)->code_signed);
6141 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
6142 }
6143
6144 if (override_nx(map, VME_ALIAS(current)) && prot) {
6145 prot |= VM_PROT_EXECUTE;
6146 }
6147
6148 #if DEVELOPMENT || DEBUG
6149 if (!(old_prot & VM_PROT_EXECUTE) &&
6150 (prot & VM_PROT_EXECUTE) &&
6151 panic_on_unsigned_execute &&
6152 (proc_selfcsflags() & CS_KILL)) {
6153 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
6154 }
6155 #endif /* DEVELOPMENT || DEBUG */
6156
6157 if (pmap_has_prot_policy(map->pmap, current->translated_allow_execute, prot)) {
6158 if (current->wired_count) {
6159 panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x",
6160 map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
6161 }
6162
6163 /* If the pmap layer cares about this
6164 * protection type, force a fault for
6165 * each page so that vm_fault will
6166 * repopulate the page with the full
6167 * set of protections.
6168 */
6169 /*
6170 * TODO: We don't seem to need this,
6171 * but this is due to an internal
6172 * implementation detail of
6173 * pmap_protect. Do we want to rely
6174 * on this?
6175 */
6176 prot = VM_PROT_NONE;
6177 }
6178
6179 if (current->is_sub_map && current->use_pmap) {
6180 pmap_protect(VME_SUBMAP(current)->pmap,
6181 current->vme_start,
6182 current->vme_end,
6183 prot);
6184 } else {
6185 if (prot & VM_PROT_WRITE) {
6186 if (VME_OBJECT(current) == compressor_object) {
6187 /*
6188 * For write requests on the
6189 * compressor, we wil ask the
6190 * pmap layer to prevent us from
6191 * taking a write fault when we
6192 * attempt to access the mapping
6193 * next.
6194 */
6195 pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
6196 }
6197 }
6198
6199 pmap_protect_options(map->pmap,
6200 current->vme_start,
6201 current->vme_end,
6202 prot,
6203 pmap_options,
6204 NULL);
6205 }
6206 }
6207 current = current->vme_next;
6208 }
6209
6210 current = entry;
6211 while ((current != vm_map_to_entry(map)) &&
6212 (current->vme_start <= end)) {
6213 vm_map_simplify_entry(map, current);
6214 current = current->vme_next;
6215 }
6216
6217 vm_map_unlock(map);
6218 return KERN_SUCCESS;
6219 }
6220
6221 /*
6222 * vm_map_inherit:
6223 *
6224 * Sets the inheritance of the specified address
6225 * range in the target map. Inheritance
6226 * affects how the map will be shared with
6227 * child maps at the time of vm_map_fork.
6228 */
6229 kern_return_t
vm_map_inherit(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_inherit_t new_inheritance)6230 vm_map_inherit(
6231 vm_map_t map,
6232 vm_map_offset_t start,
6233 vm_map_offset_t end,
6234 vm_inherit_t new_inheritance)
6235 {
6236 vm_map_entry_t entry;
6237 vm_map_entry_t temp_entry;
6238
6239 vm_map_lock(map);
6240
6241 VM_MAP_RANGE_CHECK(map, start, end);
6242
6243 if (vm_map_lookup_entry(map, start, &temp_entry)) {
6244 entry = temp_entry;
6245 } else {
6246 temp_entry = temp_entry->vme_next;
6247 entry = temp_entry;
6248 }
6249
6250 /* first check entire range for submaps which can't support the */
6251 /* given inheritance. */
6252 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6253 if (entry->is_sub_map) {
6254 if (new_inheritance == VM_INHERIT_COPY) {
6255 vm_map_unlock(map);
6256 return KERN_INVALID_ARGUMENT;
6257 }
6258 }
6259
6260 entry = entry->vme_next;
6261 }
6262
6263 entry = temp_entry;
6264 if (entry != vm_map_to_entry(map)) {
6265 /* clip and unnest if necessary */
6266 vm_map_clip_start(map, entry, start);
6267 }
6268
6269 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6270 vm_map_clip_end(map, entry, end);
6271 if (entry->is_sub_map) {
6272 /* clip did unnest if needed */
6273 assert(!entry->use_pmap);
6274 }
6275
6276 entry->inheritance = new_inheritance;
6277
6278 entry = entry->vme_next;
6279 }
6280
6281 vm_map_unlock(map);
6282 return KERN_SUCCESS;
6283 }
6284
6285 /*
6286 * Update the accounting for the amount of wired memory in this map. If the user has
6287 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
6288 */
6289
6290 static kern_return_t
add_wire_counts(vm_map_t map,vm_map_entry_t entry,boolean_t user_wire)6291 add_wire_counts(
6292 vm_map_t map,
6293 vm_map_entry_t entry,
6294 boolean_t user_wire)
6295 {
6296 vm_map_size_t size;
6297
6298 if (user_wire) {
6299 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
6300
6301 /*
6302 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
6303 * this map entry.
6304 */
6305
6306 if (entry->user_wired_count == 0) {
6307 size = entry->vme_end - entry->vme_start;
6308
6309 /*
6310 * Since this is the first time the user is wiring this map entry, check to see if we're
6311 * exceeding the user wire limits. There is a per map limit which is the smaller of either
6312 * the process's rlimit or the global vm_per_task_user_wire_limit which caps this value. There is also
6313 * a system-wide limit on the amount of memory all users can wire. If the user is over either
6314 * limit, then we fail.
6315 */
6316
6317 if (size + map->user_wire_size > MIN(map->user_wire_limit, vm_per_task_user_wire_limit) ||
6318 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6319 if (size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6320 #if DEVELOPMENT || DEBUG
6321 if (panic_on_mlock_failure) {
6322 panic("mlock: Over global wire limit. %llu bytes wired and requested to wire %llu bytes more", ptoa_64(total_wire_count), (uint64_t) size);
6323 }
6324 #endif /* DEVELOPMENT || DEBUG */
6325 os_atomic_inc(&vm_add_wire_count_over_global_limit, relaxed);
6326 } else {
6327 os_atomic_inc(&vm_add_wire_count_over_user_limit, relaxed);
6328 #if DEVELOPMENT || DEBUG
6329 if (panic_on_mlock_failure) {
6330 panic("mlock: Over process wire limit. %llu bytes wired and requested to wire %llu bytes more", (uint64_t) map->user_wire_size, (uint64_t) size);
6331 }
6332 #endif /* DEVELOPMENT || DEBUG */
6333 }
6334 return KERN_RESOURCE_SHORTAGE;
6335 }
6336
6337 /*
6338 * The first time the user wires an entry, we also increment the wired_count and add this to
6339 * the total that has been wired in the map.
6340 */
6341
6342 if (entry->wired_count >= MAX_WIRE_COUNT) {
6343 return KERN_FAILURE;
6344 }
6345
6346 entry->wired_count++;
6347 map->user_wire_size += size;
6348 }
6349
6350 if (entry->user_wired_count >= MAX_WIRE_COUNT) {
6351 return KERN_FAILURE;
6352 }
6353
6354 entry->user_wired_count++;
6355 } else {
6356 /*
6357 * The kernel's wiring the memory. Just bump the count and continue.
6358 */
6359
6360 if (entry->wired_count >= MAX_WIRE_COUNT) {
6361 panic("vm_map_wire: too many wirings");
6362 }
6363
6364 entry->wired_count++;
6365 }
6366
6367 return KERN_SUCCESS;
6368 }
6369
6370 /*
6371 * Update the memory wiring accounting now that the given map entry is being unwired.
6372 */
6373
6374 static void
subtract_wire_counts(vm_map_t map,vm_map_entry_t entry,boolean_t user_wire)6375 subtract_wire_counts(
6376 vm_map_t map,
6377 vm_map_entry_t entry,
6378 boolean_t user_wire)
6379 {
6380 if (user_wire) {
6381 /*
6382 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
6383 */
6384
6385 if (entry->user_wired_count == 1) {
6386 /*
6387 * We're removing the last user wire reference. Decrement the wired_count and the total
6388 * user wired memory for this map.
6389 */
6390
6391 assert(entry->wired_count >= 1);
6392 entry->wired_count--;
6393 map->user_wire_size -= entry->vme_end - entry->vme_start;
6394 }
6395
6396 assert(entry->user_wired_count >= 1);
6397 entry->user_wired_count--;
6398 } else {
6399 /*
6400 * The kernel is unwiring the memory. Just update the count.
6401 */
6402
6403 assert(entry->wired_count >= 1);
6404 entry->wired_count--;
6405 }
6406 }
6407
6408 int cs_executable_wire = 0;
6409
6410 /*
6411 * vm_map_wire:
6412 *
6413 * Sets the pageability of the specified address range in the
6414 * target map as wired. Regions specified as not pageable require
6415 * locked-down physical memory and physical page maps. The
6416 * access_type variable indicates types of accesses that must not
6417 * generate page faults. This is checked against protection of
6418 * memory being locked-down.
6419 *
6420 * The map must not be locked, but a reference must remain to the
6421 * map throughout the call.
6422 */
6423 static kern_return_t
vm_map_wire_nested(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t caller_prot,vm_tag_t tag,boolean_t user_wire,pmap_t map_pmap,vm_map_offset_t pmap_addr,ppnum_t * physpage_p)6424 vm_map_wire_nested(
6425 vm_map_t map,
6426 vm_map_offset_t start,
6427 vm_map_offset_t end,
6428 vm_prot_t caller_prot,
6429 vm_tag_t tag,
6430 boolean_t user_wire,
6431 pmap_t map_pmap,
6432 vm_map_offset_t pmap_addr,
6433 ppnum_t *physpage_p)
6434 {
6435 vm_map_entry_t entry;
6436 vm_prot_t access_type;
6437 struct vm_map_entry *first_entry, tmp_entry;
6438 vm_map_t real_map;
6439 vm_map_offset_t s, e;
6440 kern_return_t rc;
6441 boolean_t need_wakeup;
6442 boolean_t main_map = FALSE;
6443 wait_interrupt_t interruptible_state;
6444 thread_t cur_thread;
6445 unsigned int last_timestamp;
6446 vm_map_size_t size;
6447 boolean_t wire_and_extract;
6448 vm_prot_t extra_prots;
6449
6450 extra_prots = VM_PROT_COPY;
6451 extra_prots |= VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6452 #if XNU_TARGET_OS_OSX
6453 if (map->pmap == kernel_pmap ||
6454 !vm_map_cs_enforcement(map)) {
6455 extra_prots &= ~VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6456 }
6457 #endif /* XNU_TARGET_OS_OSX */
6458
6459 access_type = (caller_prot & (VM_PROT_ALL | VM_PROT_ALLEXEC));
6460
6461 wire_and_extract = FALSE;
6462 if (physpage_p != NULL) {
6463 /*
6464 * The caller wants the physical page number of the
6465 * wired page. We return only one physical page number
6466 * so this works for only one page at a time.
6467 */
6468 if ((end - start) != PAGE_SIZE) {
6469 return KERN_INVALID_ARGUMENT;
6470 }
6471 wire_and_extract = TRUE;
6472 *physpage_p = 0;
6473 }
6474
6475 vm_map_lock(map);
6476 if (map_pmap == NULL) {
6477 main_map = TRUE;
6478 }
6479 last_timestamp = map->timestamp;
6480
6481 VM_MAP_RANGE_CHECK(map, start, end);
6482 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6483 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
6484
6485 if (start == end) {
6486 /* We wired what the caller asked for, zero pages */
6487 vm_map_unlock(map);
6488 return KERN_SUCCESS;
6489 }
6490
6491 need_wakeup = FALSE;
6492 cur_thread = current_thread();
6493
6494 s = start;
6495 rc = KERN_SUCCESS;
6496
6497 if (vm_map_lookup_entry(map, s, &first_entry)) {
6498 entry = first_entry;
6499 /*
6500 * vm_map_clip_start will be done later.
6501 * We don't want to unnest any nested submaps here !
6502 */
6503 } else {
6504 /* Start address is not in map */
6505 rc = KERN_INVALID_ADDRESS;
6506 goto done;
6507 }
6508
6509 while ((entry != vm_map_to_entry(map)) && (s < end)) {
6510 /*
6511 * At this point, we have wired from "start" to "s".
6512 * We still need to wire from "s" to "end".
6513 *
6514 * "entry" hasn't been clipped, so it could start before "s"
6515 * and/or end after "end".
6516 */
6517
6518 /* "e" is how far we want to wire in this entry */
6519 e = entry->vme_end;
6520 if (e > end) {
6521 e = end;
6522 }
6523
6524 /*
6525 * If another thread is wiring/unwiring this entry then
6526 * block after informing other thread to wake us up.
6527 */
6528 if (entry->in_transition) {
6529 wait_result_t wait_result;
6530
6531 /*
6532 * We have not clipped the entry. Make sure that
6533 * the start address is in range so that the lookup
6534 * below will succeed.
6535 * "s" is the current starting point: we've already
6536 * wired from "start" to "s" and we still have
6537 * to wire from "s" to "end".
6538 */
6539
6540 entry->needs_wakeup = TRUE;
6541
6542 /*
6543 * wake up anybody waiting on entries that we have
6544 * already wired.
6545 */
6546 if (need_wakeup) {
6547 vm_map_entry_wakeup(map);
6548 need_wakeup = FALSE;
6549 }
6550 /*
6551 * User wiring is interruptible
6552 */
6553 wait_result = vm_map_entry_wait(map,
6554 (user_wire) ? THREAD_ABORTSAFE :
6555 THREAD_UNINT);
6556 if (user_wire && wait_result == THREAD_INTERRUPTED) {
6557 /*
6558 * undo the wirings we have done so far
6559 * We do not clear the needs_wakeup flag,
6560 * because we cannot tell if we were the
6561 * only one waiting.
6562 */
6563 rc = KERN_FAILURE;
6564 goto done;
6565 }
6566
6567 /*
6568 * Cannot avoid a lookup here. reset timestamp.
6569 */
6570 last_timestamp = map->timestamp;
6571
6572 /*
6573 * The entry could have been clipped, look it up again.
6574 * Worse that can happen is, it may not exist anymore.
6575 */
6576 if (!vm_map_lookup_entry(map, s, &first_entry)) {
6577 /*
6578 * User: undo everything upto the previous
6579 * entry. let vm_map_unwire worry about
6580 * checking the validity of the range.
6581 */
6582 rc = KERN_FAILURE;
6583 goto done;
6584 }
6585 entry = first_entry;
6586 continue;
6587 }
6588
6589 if (entry->is_sub_map) {
6590 vm_map_offset_t sub_start;
6591 vm_map_offset_t sub_end;
6592 vm_map_offset_t local_start;
6593 vm_map_offset_t local_end;
6594 pmap_t pmap;
6595
6596 if (wire_and_extract) {
6597 /*
6598 * Wiring would result in copy-on-write
6599 * which would not be compatible with
6600 * the sharing we have with the original
6601 * provider of this memory.
6602 */
6603 rc = KERN_INVALID_ARGUMENT;
6604 goto done;
6605 }
6606
6607 vm_map_clip_start(map, entry, s);
6608 vm_map_clip_end(map, entry, end);
6609
6610 sub_start = VME_OFFSET(entry);
6611 sub_end = entry->vme_end;
6612 sub_end += VME_OFFSET(entry) - entry->vme_start;
6613
6614 local_end = entry->vme_end;
6615 if (map_pmap == NULL) {
6616 vm_object_t object;
6617 vm_object_offset_t offset;
6618 vm_prot_t prot;
6619 boolean_t wired;
6620 vm_map_entry_t local_entry;
6621 vm_map_version_t version;
6622 vm_map_t lookup_map;
6623
6624 if (entry->use_pmap) {
6625 pmap = VME_SUBMAP(entry)->pmap;
6626 /* ppc implementation requires that */
6627 /* submaps pmap address ranges line */
6628 /* up with parent map */
6629 #ifdef notdef
6630 pmap_addr = sub_start;
6631 #endif
6632 pmap_addr = s;
6633 } else {
6634 pmap = map->pmap;
6635 pmap_addr = s;
6636 }
6637
6638 if (entry->wired_count) {
6639 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6640 goto done;
6641 }
6642
6643 /*
6644 * The map was not unlocked:
6645 * no need to goto re-lookup.
6646 * Just go directly to next entry.
6647 */
6648 entry = entry->vme_next;
6649 s = entry->vme_start;
6650 continue;
6651 }
6652
6653 /* call vm_map_lookup_locked to */
6654 /* cause any needs copy to be */
6655 /* evaluated */
6656 local_start = entry->vme_start;
6657 lookup_map = map;
6658 vm_map_lock_write_to_read(map);
6659 rc = vm_map_lookup_locked(
6660 &lookup_map, local_start,
6661 (access_type | extra_prots),
6662 OBJECT_LOCK_EXCLUSIVE,
6663 &version, &object,
6664 &offset, &prot, &wired,
6665 NULL,
6666 &real_map, NULL);
6667 if (rc != KERN_SUCCESS) {
6668 vm_map_unlock_read(lookup_map);
6669 assert(map_pmap == NULL);
6670 vm_map_unwire(map, start,
6671 s, user_wire);
6672 return rc;
6673 }
6674 vm_object_unlock(object);
6675 if (real_map != lookup_map) {
6676 vm_map_unlock(real_map);
6677 }
6678 vm_map_unlock_read(lookup_map);
6679 vm_map_lock(map);
6680
6681 /* we unlocked, so must re-lookup */
6682 if (!vm_map_lookup_entry(map,
6683 local_start,
6684 &local_entry)) {
6685 rc = KERN_FAILURE;
6686 goto done;
6687 }
6688
6689 /*
6690 * entry could have been "simplified",
6691 * so re-clip
6692 */
6693 entry = local_entry;
6694 assert(s == local_start);
6695 vm_map_clip_start(map, entry, s);
6696 vm_map_clip_end(map, entry, end);
6697 /* re-compute "e" */
6698 e = entry->vme_end;
6699 if (e > end) {
6700 e = end;
6701 }
6702
6703 /* did we have a change of type? */
6704 if (!entry->is_sub_map) {
6705 last_timestamp = map->timestamp;
6706 continue;
6707 }
6708 } else {
6709 local_start = entry->vme_start;
6710 pmap = map_pmap;
6711 }
6712
6713 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6714 goto done;
6715 }
6716
6717 entry->in_transition = TRUE;
6718
6719 vm_map_unlock(map);
6720 rc = vm_map_wire_nested(VME_SUBMAP(entry),
6721 sub_start, sub_end,
6722 caller_prot, tag,
6723 user_wire, pmap, pmap_addr,
6724 NULL);
6725 vm_map_lock(map);
6726
6727 /*
6728 * Find the entry again. It could have been clipped
6729 * after we unlocked the map.
6730 */
6731 if (!vm_map_lookup_entry(map, local_start,
6732 &first_entry)) {
6733 panic("vm_map_wire: re-lookup failed");
6734 }
6735 entry = first_entry;
6736
6737 assert(local_start == s);
6738 /* re-compute "e" */
6739 e = entry->vme_end;
6740 if (e > end) {
6741 e = end;
6742 }
6743
6744 last_timestamp = map->timestamp;
6745 while ((entry != vm_map_to_entry(map)) &&
6746 (entry->vme_start < e)) {
6747 assert(entry->in_transition);
6748 entry->in_transition = FALSE;
6749 if (entry->needs_wakeup) {
6750 entry->needs_wakeup = FALSE;
6751 need_wakeup = TRUE;
6752 }
6753 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
6754 subtract_wire_counts(map, entry, user_wire);
6755 }
6756 entry = entry->vme_next;
6757 }
6758 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
6759 goto done;
6760 }
6761
6762 /* no need to relookup again */
6763 s = entry->vme_start;
6764 continue;
6765 }
6766
6767 /*
6768 * If this entry is already wired then increment
6769 * the appropriate wire reference count.
6770 */
6771 if (entry->wired_count) {
6772 if ((entry->protection & access_type) != access_type) {
6773 /* found a protection problem */
6774
6775 /*
6776 * XXX FBDP
6777 * We should always return an error
6778 * in this case but since we didn't
6779 * enforce it before, let's do
6780 * it only for the new "wire_and_extract"
6781 * code path for now...
6782 */
6783 if (wire_and_extract) {
6784 rc = KERN_PROTECTION_FAILURE;
6785 goto done;
6786 }
6787 }
6788
6789 /*
6790 * entry is already wired down, get our reference
6791 * after clipping to our range.
6792 */
6793 vm_map_clip_start(map, entry, s);
6794 vm_map_clip_end(map, entry, end);
6795
6796 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6797 goto done;
6798 }
6799
6800 if (wire_and_extract) {
6801 vm_object_t object;
6802 vm_object_offset_t offset;
6803 vm_page_t m;
6804
6805 /*
6806 * We don't have to "wire" the page again
6807 * bit we still have to "extract" its
6808 * physical page number, after some sanity
6809 * checks.
6810 */
6811 assert((entry->vme_end - entry->vme_start)
6812 == PAGE_SIZE);
6813 assert(!entry->needs_copy);
6814 assert(!entry->is_sub_map);
6815 assert(VME_OBJECT(entry));
6816 if (((entry->vme_end - entry->vme_start)
6817 != PAGE_SIZE) ||
6818 entry->needs_copy ||
6819 entry->is_sub_map ||
6820 VME_OBJECT(entry) == VM_OBJECT_NULL) {
6821 rc = KERN_INVALID_ARGUMENT;
6822 goto done;
6823 }
6824
6825 object = VME_OBJECT(entry);
6826 offset = VME_OFFSET(entry);
6827 /* need exclusive lock to update m->dirty */
6828 if (entry->protection & VM_PROT_WRITE) {
6829 vm_object_lock(object);
6830 } else {
6831 vm_object_lock_shared(object);
6832 }
6833 m = vm_page_lookup(object, offset);
6834 assert(m != VM_PAGE_NULL);
6835 assert(VM_PAGE_WIRED(m));
6836 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6837 *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
6838 if (entry->protection & VM_PROT_WRITE) {
6839 vm_object_lock_assert_exclusive(
6840 object);
6841 m->vmp_dirty = TRUE;
6842 }
6843 } else {
6844 /* not already wired !? */
6845 *physpage_p = 0;
6846 }
6847 vm_object_unlock(object);
6848 }
6849
6850 /* map was not unlocked: no need to relookup */
6851 entry = entry->vme_next;
6852 s = entry->vme_start;
6853 continue;
6854 }
6855
6856 /*
6857 * Unwired entry or wire request transmitted via submap
6858 */
6859
6860 /*
6861 * Wiring would copy the pages to the shadow object.
6862 * The shadow object would not be code-signed so
6863 * attempting to execute code from these copied pages
6864 * would trigger a code-signing violation.
6865 */
6866
6867 if ((entry->protection & VM_PROT_EXECUTE)
6868 #if XNU_TARGET_OS_OSX
6869 &&
6870 map->pmap != kernel_pmap &&
6871 (vm_map_cs_enforcement(map)
6872 #if __arm64__
6873 || !VM_MAP_IS_EXOTIC(map)
6874 #endif /* __arm64__ */
6875 )
6876 #endif /* XNU_TARGET_OS_OSX */
6877 ) {
6878 #if MACH_ASSERT
6879 printf("pid %d[%s] wiring executable range from "
6880 "0x%llx to 0x%llx: rejected to preserve "
6881 "code-signing\n",
6882 proc_selfpid(),
6883 (current_task()->bsd_info
6884 ? proc_name_address(current_task()->bsd_info)
6885 : "?"),
6886 (uint64_t) entry->vme_start,
6887 (uint64_t) entry->vme_end);
6888 #endif /* MACH_ASSERT */
6889 DTRACE_VM2(cs_executable_wire,
6890 uint64_t, (uint64_t)entry->vme_start,
6891 uint64_t, (uint64_t)entry->vme_end);
6892 cs_executable_wire++;
6893 rc = KERN_PROTECTION_FAILURE;
6894 goto done;
6895 }
6896
6897 /*
6898 * Perform actions of vm_map_lookup that need the write
6899 * lock on the map: create a shadow object for a
6900 * copy-on-write region, or an object for a zero-fill
6901 * region.
6902 */
6903 size = entry->vme_end - entry->vme_start;
6904 /*
6905 * If wiring a copy-on-write page, we need to copy it now
6906 * even if we're only (currently) requesting read access.
6907 * This is aggressive, but once it's wired we can't move it.
6908 */
6909 if (entry->needs_copy) {
6910 if (wire_and_extract) {
6911 /*
6912 * We're supposed to share with the original
6913 * provider so should not be "needs_copy"
6914 */
6915 rc = KERN_INVALID_ARGUMENT;
6916 goto done;
6917 }
6918
6919 VME_OBJECT_SHADOW(entry, size);
6920 entry->needs_copy = FALSE;
6921 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
6922 if (wire_and_extract) {
6923 /*
6924 * We're supposed to share with the original
6925 * provider so should already have an object.
6926 */
6927 rc = KERN_INVALID_ARGUMENT;
6928 goto done;
6929 }
6930 VME_OBJECT_SET(entry, vm_object_allocate(size));
6931 VME_OFFSET_SET(entry, (vm_object_offset_t)0);
6932 assert(entry->use_pmap);
6933 }
6934
6935 vm_map_clip_start(map, entry, s);
6936 vm_map_clip_end(map, entry, end);
6937
6938 /* re-compute "e" */
6939 e = entry->vme_end;
6940 if (e > end) {
6941 e = end;
6942 }
6943
6944 /*
6945 * Check for holes and protection mismatch.
6946 * Holes: Next entry should be contiguous unless this
6947 * is the end of the region.
6948 * Protection: Access requested must be allowed, unless
6949 * wiring is by protection class
6950 */
6951 if ((entry->vme_end < end) &&
6952 ((entry->vme_next == vm_map_to_entry(map)) ||
6953 (entry->vme_next->vme_start > entry->vme_end))) {
6954 /* found a hole */
6955 rc = KERN_INVALID_ADDRESS;
6956 goto done;
6957 }
6958 if ((entry->protection & access_type) != access_type) {
6959 /* found a protection problem */
6960 rc = KERN_PROTECTION_FAILURE;
6961 goto done;
6962 }
6963
6964 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
6965
6966 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6967 goto done;
6968 }
6969
6970 entry->in_transition = TRUE;
6971
6972 /*
6973 * This entry might get split once we unlock the map.
6974 * In vm_fault_wire(), we need the current range as
6975 * defined by this entry. In order for this to work
6976 * along with a simultaneous clip operation, we make a
6977 * temporary copy of this entry and use that for the
6978 * wiring. Note that the underlying objects do not
6979 * change during a clip.
6980 */
6981 tmp_entry = *entry;
6982
6983 /*
6984 * The in_transition state guarentees that the entry
6985 * (or entries for this range, if split occured) will be
6986 * there when the map lock is acquired for the second time.
6987 */
6988 vm_map_unlock(map);
6989
6990 if (!user_wire && cur_thread != THREAD_NULL) {
6991 interruptible_state = thread_interrupt_level(THREAD_UNINT);
6992 } else {
6993 interruptible_state = THREAD_UNINT;
6994 }
6995
6996 if (map_pmap) {
6997 rc = vm_fault_wire(map,
6998 &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
6999 physpage_p);
7000 } else {
7001 rc = vm_fault_wire(map,
7002 &tmp_entry, caller_prot, tag, map->pmap,
7003 tmp_entry.vme_start,
7004 physpage_p);
7005 }
7006
7007 if (!user_wire && cur_thread != THREAD_NULL) {
7008 thread_interrupt_level(interruptible_state);
7009 }
7010
7011 vm_map_lock(map);
7012
7013 if (last_timestamp + 1 != map->timestamp) {
7014 /*
7015 * Find the entry again. It could have been clipped
7016 * after we unlocked the map.
7017 */
7018 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7019 &first_entry)) {
7020 panic("vm_map_wire: re-lookup failed");
7021 }
7022
7023 entry = first_entry;
7024 }
7025
7026 last_timestamp = map->timestamp;
7027
7028 while ((entry != vm_map_to_entry(map)) &&
7029 (entry->vme_start < tmp_entry.vme_end)) {
7030 assert(entry->in_transition);
7031 entry->in_transition = FALSE;
7032 if (entry->needs_wakeup) {
7033 entry->needs_wakeup = FALSE;
7034 need_wakeup = TRUE;
7035 }
7036 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
7037 subtract_wire_counts(map, entry, user_wire);
7038 }
7039 entry = entry->vme_next;
7040 }
7041
7042 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
7043 goto done;
7044 }
7045
7046 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
7047 (tmp_entry.vme_end != end) && /* AND, we are not at the end of the requested range */
7048 (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
7049 /* found a "new" hole */
7050 s = tmp_entry.vme_end;
7051 rc = KERN_INVALID_ADDRESS;
7052 goto done;
7053 }
7054
7055 s = entry->vme_start;
7056 } /* end while loop through map entries */
7057
7058 done:
7059 if (rc == KERN_SUCCESS) {
7060 /* repair any damage we may have made to the VM map */
7061 vm_map_simplify_range(map, start, end);
7062 }
7063
7064 vm_map_unlock(map);
7065
7066 /*
7067 * wake up anybody waiting on entries we wired.
7068 */
7069 if (need_wakeup) {
7070 vm_map_entry_wakeup(map);
7071 }
7072
7073 if (rc != KERN_SUCCESS) {
7074 /* undo what has been wired so far */
7075 vm_map_unwire_nested(map, start, s, user_wire,
7076 map_pmap, pmap_addr);
7077 if (physpage_p) {
7078 *physpage_p = 0;
7079 }
7080 }
7081
7082 return rc;
7083 }
7084
7085 kern_return_t
vm_map_wire_external(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t caller_prot,boolean_t user_wire)7086 vm_map_wire_external(
7087 vm_map_t map,
7088 vm_map_offset_t start,
7089 vm_map_offset_t end,
7090 vm_prot_t caller_prot,
7091 boolean_t user_wire)
7092 {
7093 kern_return_t kret;
7094
7095 kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
7096 user_wire, (pmap_t)NULL, 0, NULL);
7097 return kret;
7098 }
7099
7100 kern_return_t
vm_map_wire_kernel(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t caller_prot,vm_tag_t tag,boolean_t user_wire)7101 vm_map_wire_kernel(
7102 vm_map_t map,
7103 vm_map_offset_t start,
7104 vm_map_offset_t end,
7105 vm_prot_t caller_prot,
7106 vm_tag_t tag,
7107 boolean_t user_wire)
7108 {
7109 kern_return_t kret;
7110
7111 kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
7112 user_wire, (pmap_t)NULL, 0, NULL);
7113 return kret;
7114 }
7115
7116 kern_return_t
vm_map_wire_and_extract_external(vm_map_t map,vm_map_offset_t start,vm_prot_t caller_prot,boolean_t user_wire,ppnum_t * physpage_p)7117 vm_map_wire_and_extract_external(
7118 vm_map_t map,
7119 vm_map_offset_t start,
7120 vm_prot_t caller_prot,
7121 boolean_t user_wire,
7122 ppnum_t *physpage_p)
7123 {
7124 kern_return_t kret;
7125
7126 kret = vm_map_wire_nested(map,
7127 start,
7128 start + VM_MAP_PAGE_SIZE(map),
7129 caller_prot,
7130 vm_tag_bt(),
7131 user_wire,
7132 (pmap_t)NULL,
7133 0,
7134 physpage_p);
7135 if (kret != KERN_SUCCESS &&
7136 physpage_p != NULL) {
7137 *physpage_p = 0;
7138 }
7139 return kret;
7140 }
7141
7142 kern_return_t
vm_map_wire_and_extract_kernel(vm_map_t map,vm_map_offset_t start,vm_prot_t caller_prot,vm_tag_t tag,boolean_t user_wire,ppnum_t * physpage_p)7143 vm_map_wire_and_extract_kernel(
7144 vm_map_t map,
7145 vm_map_offset_t start,
7146 vm_prot_t caller_prot,
7147 vm_tag_t tag,
7148 boolean_t user_wire,
7149 ppnum_t *physpage_p)
7150 {
7151 kern_return_t kret;
7152
7153 kret = vm_map_wire_nested(map,
7154 start,
7155 start + VM_MAP_PAGE_SIZE(map),
7156 caller_prot,
7157 tag,
7158 user_wire,
7159 (pmap_t)NULL,
7160 0,
7161 physpage_p);
7162 if (kret != KERN_SUCCESS &&
7163 physpage_p != NULL) {
7164 *physpage_p = 0;
7165 }
7166 return kret;
7167 }
7168
7169 /*
7170 * vm_map_unwire:
7171 *
7172 * Sets the pageability of the specified address range in the target
7173 * as pageable. Regions specified must have been wired previously.
7174 *
7175 * The map must not be locked, but a reference must remain to the map
7176 * throughout the call.
7177 *
7178 * Kernel will panic on failures. User unwire ignores holes and
7179 * unwired and intransition entries to avoid losing memory by leaving
7180 * it unwired.
7181 */
7182 static kern_return_t
vm_map_unwire_nested(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,boolean_t user_wire,pmap_t map_pmap,vm_map_offset_t pmap_addr)7183 vm_map_unwire_nested(
7184 vm_map_t map,
7185 vm_map_offset_t start,
7186 vm_map_offset_t end,
7187 boolean_t user_wire,
7188 pmap_t map_pmap,
7189 vm_map_offset_t pmap_addr)
7190 {
7191 vm_map_entry_t entry;
7192 struct vm_map_entry *first_entry, tmp_entry;
7193 boolean_t need_wakeup;
7194 boolean_t main_map = FALSE;
7195 unsigned int last_timestamp;
7196
7197 vm_map_lock(map);
7198 if (map_pmap == NULL) {
7199 main_map = TRUE;
7200 }
7201 last_timestamp = map->timestamp;
7202
7203 VM_MAP_RANGE_CHECK(map, start, end);
7204 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
7205 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
7206
7207 if (start == end) {
7208 /* We unwired what the caller asked for: zero pages */
7209 vm_map_unlock(map);
7210 return KERN_SUCCESS;
7211 }
7212
7213 if (vm_map_lookup_entry(map, start, &first_entry)) {
7214 entry = first_entry;
7215 /*
7216 * vm_map_clip_start will be done later.
7217 * We don't want to unnest any nested sub maps here !
7218 */
7219 } else {
7220 if (!user_wire) {
7221 panic("vm_map_unwire: start not found");
7222 }
7223 /* Start address is not in map. */
7224 vm_map_unlock(map);
7225 return KERN_INVALID_ADDRESS;
7226 }
7227
7228 if (entry->superpage_size) {
7229 /* superpages are always wired */
7230 vm_map_unlock(map);
7231 return KERN_INVALID_ADDRESS;
7232 }
7233
7234 need_wakeup = FALSE;
7235 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
7236 if (entry->in_transition) {
7237 /*
7238 * 1)
7239 * Another thread is wiring down this entry. Note
7240 * that if it is not for the other thread we would
7241 * be unwiring an unwired entry. This is not
7242 * permitted. If we wait, we will be unwiring memory
7243 * we did not wire.
7244 *
7245 * 2)
7246 * Another thread is unwiring this entry. We did not
7247 * have a reference to it, because if we did, this
7248 * entry will not be getting unwired now.
7249 */
7250 if (!user_wire) {
7251 /*
7252 * XXX FBDP
7253 * This could happen: there could be some
7254 * overlapping vslock/vsunlock operations
7255 * going on.
7256 * We should probably just wait and retry,
7257 * but then we have to be careful that this
7258 * entry could get "simplified" after
7259 * "in_transition" gets unset and before
7260 * we re-lookup the entry, so we would
7261 * have to re-clip the entry to avoid
7262 * re-unwiring what we have already unwired...
7263 * See vm_map_wire_nested().
7264 *
7265 * Or we could just ignore "in_transition"
7266 * here and proceed to decement the wired
7267 * count(s) on this entry. That should be fine
7268 * as long as "wired_count" doesn't drop all
7269 * the way to 0 (and we should panic if THAT
7270 * happens).
7271 */
7272 panic("vm_map_unwire: in_transition entry");
7273 }
7274
7275 entry = entry->vme_next;
7276 continue;
7277 }
7278
7279 if (entry->is_sub_map) {
7280 vm_map_offset_t sub_start;
7281 vm_map_offset_t sub_end;
7282 vm_map_offset_t local_end;
7283 pmap_t pmap;
7284
7285 vm_map_clip_start(map, entry, start);
7286 vm_map_clip_end(map, entry, end);
7287
7288 sub_start = VME_OFFSET(entry);
7289 sub_end = entry->vme_end - entry->vme_start;
7290 sub_end += VME_OFFSET(entry);
7291 local_end = entry->vme_end;
7292 if (map_pmap == NULL) {
7293 if (entry->use_pmap) {
7294 pmap = VME_SUBMAP(entry)->pmap;
7295 pmap_addr = sub_start;
7296 } else {
7297 pmap = map->pmap;
7298 pmap_addr = start;
7299 }
7300 if (entry->wired_count == 0 ||
7301 (user_wire && entry->user_wired_count == 0)) {
7302 if (!user_wire) {
7303 panic("vm_map_unwire: entry is unwired");
7304 }
7305 entry = entry->vme_next;
7306 continue;
7307 }
7308
7309 /*
7310 * Check for holes
7311 * Holes: Next entry should be contiguous unless
7312 * this is the end of the region.
7313 */
7314 if (((entry->vme_end < end) &&
7315 ((entry->vme_next == vm_map_to_entry(map)) ||
7316 (entry->vme_next->vme_start
7317 > entry->vme_end)))) {
7318 if (!user_wire) {
7319 panic("vm_map_unwire: non-contiguous region");
7320 }
7321 /*
7322 * entry = entry->vme_next;
7323 * continue;
7324 */
7325 }
7326
7327 subtract_wire_counts(map, entry, user_wire);
7328
7329 if (entry->wired_count != 0) {
7330 entry = entry->vme_next;
7331 continue;
7332 }
7333
7334 entry->in_transition = TRUE;
7335 tmp_entry = *entry;/* see comment in vm_map_wire() */
7336
7337 /*
7338 * We can unlock the map now. The in_transition state
7339 * guarantees existance of the entry.
7340 */
7341 vm_map_unlock(map);
7342 vm_map_unwire_nested(VME_SUBMAP(entry),
7343 sub_start, sub_end, user_wire, pmap, pmap_addr);
7344 vm_map_lock(map);
7345
7346 if (last_timestamp + 1 != map->timestamp) {
7347 /*
7348 * Find the entry again. It could have been
7349 * clipped or deleted after we unlocked the map.
7350 */
7351 if (!vm_map_lookup_entry(map,
7352 tmp_entry.vme_start,
7353 &first_entry)) {
7354 if (!user_wire) {
7355 panic("vm_map_unwire: re-lookup failed");
7356 }
7357 entry = first_entry->vme_next;
7358 } else {
7359 entry = first_entry;
7360 }
7361 }
7362 last_timestamp = map->timestamp;
7363
7364 /*
7365 * clear transition bit for all constituent entries
7366 * that were in the original entry (saved in
7367 * tmp_entry). Also check for waiters.
7368 */
7369 while ((entry != vm_map_to_entry(map)) &&
7370 (entry->vme_start < tmp_entry.vme_end)) {
7371 assert(entry->in_transition);
7372 entry->in_transition = FALSE;
7373 if (entry->needs_wakeup) {
7374 entry->needs_wakeup = FALSE;
7375 need_wakeup = TRUE;
7376 }
7377 entry = entry->vme_next;
7378 }
7379 continue;
7380 } else {
7381 tmp_entry = *entry;
7382 vm_map_unlock(map);
7383 vm_map_unwire_nested(VME_SUBMAP(entry),
7384 sub_start, sub_end, user_wire, map_pmap,
7385 pmap_addr);
7386 vm_map_lock(map);
7387
7388 if (last_timestamp + 1 != map->timestamp) {
7389 /*
7390 * Find the entry again. It could have been
7391 * clipped or deleted after we unlocked the map.
7392 */
7393 if (!vm_map_lookup_entry(map,
7394 tmp_entry.vme_start,
7395 &first_entry)) {
7396 if (!user_wire) {
7397 panic("vm_map_unwire: re-lookup failed");
7398 }
7399 entry = first_entry->vme_next;
7400 } else {
7401 entry = first_entry;
7402 }
7403 }
7404 last_timestamp = map->timestamp;
7405 }
7406 }
7407
7408
7409 if ((entry->wired_count == 0) ||
7410 (user_wire && entry->user_wired_count == 0)) {
7411 if (!user_wire) {
7412 panic("vm_map_unwire: entry is unwired");
7413 }
7414
7415 entry = entry->vme_next;
7416 continue;
7417 }
7418
7419 assert(entry->wired_count > 0 &&
7420 (!user_wire || entry->user_wired_count > 0));
7421
7422 vm_map_clip_start(map, entry, start);
7423 vm_map_clip_end(map, entry, end);
7424
7425 /*
7426 * Check for holes
7427 * Holes: Next entry should be contiguous unless
7428 * this is the end of the region.
7429 */
7430 if (((entry->vme_end < end) &&
7431 ((entry->vme_next == vm_map_to_entry(map)) ||
7432 (entry->vme_next->vme_start > entry->vme_end)))) {
7433 if (!user_wire) {
7434 panic("vm_map_unwire: non-contiguous region");
7435 }
7436 entry = entry->vme_next;
7437 continue;
7438 }
7439
7440 subtract_wire_counts(map, entry, user_wire);
7441
7442 if (entry->wired_count != 0) {
7443 entry = entry->vme_next;
7444 continue;
7445 }
7446
7447 if (entry->zero_wired_pages) {
7448 entry->zero_wired_pages = FALSE;
7449 }
7450
7451 entry->in_transition = TRUE;
7452 tmp_entry = *entry; /* see comment in vm_map_wire() */
7453
7454 /*
7455 * We can unlock the map now. The in_transition state
7456 * guarantees existance of the entry.
7457 */
7458 vm_map_unlock(map);
7459 if (map_pmap) {
7460 vm_fault_unwire(map,
7461 &tmp_entry, FALSE, map_pmap, pmap_addr);
7462 } else {
7463 vm_fault_unwire(map,
7464 &tmp_entry, FALSE, map->pmap,
7465 tmp_entry.vme_start);
7466 }
7467 vm_map_lock(map);
7468
7469 if (last_timestamp + 1 != map->timestamp) {
7470 /*
7471 * Find the entry again. It could have been clipped
7472 * or deleted after we unlocked the map.
7473 */
7474 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7475 &first_entry)) {
7476 if (!user_wire) {
7477 panic("vm_map_unwire: re-lookup failed");
7478 }
7479 entry = first_entry->vme_next;
7480 } else {
7481 entry = first_entry;
7482 }
7483 }
7484 last_timestamp = map->timestamp;
7485
7486 /*
7487 * clear transition bit for all constituent entries that
7488 * were in the original entry (saved in tmp_entry). Also
7489 * check for waiters.
7490 */
7491 while ((entry != vm_map_to_entry(map)) &&
7492 (entry->vme_start < tmp_entry.vme_end)) {
7493 assert(entry->in_transition);
7494 entry->in_transition = FALSE;
7495 if (entry->needs_wakeup) {
7496 entry->needs_wakeup = FALSE;
7497 need_wakeup = TRUE;
7498 }
7499 entry = entry->vme_next;
7500 }
7501 }
7502
7503 /*
7504 * We might have fragmented the address space when we wired this
7505 * range of addresses. Attempt to re-coalesce these VM map entries
7506 * with their neighbors now that they're no longer wired.
7507 * Under some circumstances, address space fragmentation can
7508 * prevent VM object shadow chain collapsing, which can cause
7509 * swap space leaks.
7510 */
7511 vm_map_simplify_range(map, start, end);
7512
7513 vm_map_unlock(map);
7514 /*
7515 * wake up anybody waiting on entries that we have unwired.
7516 */
7517 if (need_wakeup) {
7518 vm_map_entry_wakeup(map);
7519 }
7520 return KERN_SUCCESS;
7521 }
7522
7523 kern_return_t
vm_map_unwire(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,boolean_t user_wire)7524 vm_map_unwire(
7525 vm_map_t map,
7526 vm_map_offset_t start,
7527 vm_map_offset_t end,
7528 boolean_t user_wire)
7529 {
7530 return vm_map_unwire_nested(map, start, end,
7531 user_wire, (pmap_t)NULL, 0);
7532 }
7533
7534
7535 /*
7536 * vm_map_entry_delete: [ internal use only ]
7537 *
7538 * Deallocate the given entry from the target map.
7539 */
7540 static void
vm_map_entry_delete(vm_map_t map,vm_map_entry_t entry)7541 vm_map_entry_delete(
7542 vm_map_t map,
7543 vm_map_entry_t entry)
7544 {
7545 vm_map_offset_t s, e;
7546 vm_object_t object;
7547 vm_map_t submap;
7548
7549 s = entry->vme_start;
7550 e = entry->vme_end;
7551 assert(VM_MAP_PAGE_ALIGNED(s, FOURK_PAGE_MASK));
7552 assert(VM_MAP_PAGE_ALIGNED(e, FOURK_PAGE_MASK));
7553 if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK) {
7554 assert(page_aligned(s));
7555 assert(page_aligned(e));
7556 }
7557 if (entry->map_aligned == TRUE) {
7558 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
7559 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
7560 }
7561 assert(entry->wired_count == 0);
7562 assert(entry->user_wired_count == 0);
7563 assert(!entry->permanent);
7564
7565 if (entry->is_sub_map) {
7566 object = NULL;
7567 submap = VME_SUBMAP(entry);
7568 } else {
7569 submap = NULL;
7570 object = VME_OBJECT(entry);
7571 }
7572
7573 vm_map_store_entry_unlink(map, entry);
7574 map->size -= e - s;
7575
7576 vm_map_entry_dispose(map, entry);
7577
7578 vm_map_unlock(map);
7579 /*
7580 * Deallocate the object only after removing all
7581 * pmap entries pointing to its pages.
7582 */
7583 if (submap) {
7584 vm_map_deallocate(submap);
7585 } else {
7586 vm_object_deallocate(object);
7587 }
7588 }
7589
7590 void
vm_map_submap_pmap_clean(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_map_t sub_map,vm_map_offset_t offset)7591 vm_map_submap_pmap_clean(
7592 vm_map_t map,
7593 vm_map_offset_t start,
7594 vm_map_offset_t end,
7595 vm_map_t sub_map,
7596 vm_map_offset_t offset)
7597 {
7598 vm_map_offset_t submap_start;
7599 vm_map_offset_t submap_end;
7600 vm_map_size_t remove_size;
7601 vm_map_entry_t entry;
7602
7603 submap_end = offset + (end - start);
7604 submap_start = offset;
7605
7606 vm_map_lock_read(sub_map);
7607 if (vm_map_lookup_entry(sub_map, offset, &entry)) {
7608 remove_size = (entry->vme_end - entry->vme_start);
7609 if (offset > entry->vme_start) {
7610 remove_size -= offset - entry->vme_start;
7611 }
7612
7613
7614 if (submap_end < entry->vme_end) {
7615 remove_size -=
7616 entry->vme_end - submap_end;
7617 }
7618 if (entry->is_sub_map) {
7619 vm_map_submap_pmap_clean(
7620 sub_map,
7621 start,
7622 start + remove_size,
7623 VME_SUBMAP(entry),
7624 VME_OFFSET(entry));
7625 } else {
7626 if (map->mapped_in_other_pmaps &&
7627 os_ref_get_count_raw(&map->map_refcnt) != 0 &&
7628 VME_OBJECT(entry) != NULL) {
7629 vm_object_pmap_protect_options(
7630 VME_OBJECT(entry),
7631 (VME_OFFSET(entry) +
7632 offset -
7633 entry->vme_start),
7634 remove_size,
7635 PMAP_NULL,
7636 PAGE_SIZE,
7637 entry->vme_start,
7638 VM_PROT_NONE,
7639 PMAP_OPTIONS_REMOVE);
7640 } else {
7641 pmap_remove(map->pmap,
7642 (addr64_t)start,
7643 (addr64_t)(start + remove_size));
7644 }
7645 }
7646 }
7647
7648 entry = entry->vme_next;
7649
7650 while ((entry != vm_map_to_entry(sub_map))
7651 && (entry->vme_start < submap_end)) {
7652 remove_size = (entry->vme_end - entry->vme_start);
7653 if (submap_end < entry->vme_end) {
7654 remove_size -= entry->vme_end - submap_end;
7655 }
7656 if (entry->is_sub_map) {
7657 vm_map_submap_pmap_clean(
7658 sub_map,
7659 (start + entry->vme_start) - offset,
7660 ((start + entry->vme_start) - offset) + remove_size,
7661 VME_SUBMAP(entry),
7662 VME_OFFSET(entry));
7663 } else {
7664 if (map->mapped_in_other_pmaps &&
7665 os_ref_get_count_raw(&map->map_refcnt) != 0 &&
7666 VME_OBJECT(entry) != NULL) {
7667 vm_object_pmap_protect_options(
7668 VME_OBJECT(entry),
7669 VME_OFFSET(entry),
7670 remove_size,
7671 PMAP_NULL,
7672 PAGE_SIZE,
7673 entry->vme_start,
7674 VM_PROT_NONE,
7675 PMAP_OPTIONS_REMOVE);
7676 } else {
7677 pmap_remove(map->pmap,
7678 (addr64_t)((start + entry->vme_start)
7679 - offset),
7680 (addr64_t)(((start + entry->vme_start)
7681 - offset) + remove_size));
7682 }
7683 }
7684 entry = entry->vme_next;
7685 }
7686 vm_map_unlock_read(sub_map);
7687 return;
7688 }
7689
7690 /*
7691 * virt_memory_guard_ast:
7692 *
7693 * Handle the AST callout for a virtual memory guard.
7694 * raise an EXC_GUARD exception and terminate the task
7695 * if configured to do so.
7696 */
7697 void
virt_memory_guard_ast(thread_t thread,mach_exception_data_type_t code,mach_exception_data_type_t subcode)7698 virt_memory_guard_ast(
7699 thread_t thread,
7700 mach_exception_data_type_t code,
7701 mach_exception_data_type_t subcode)
7702 {
7703 task_t task = get_threadtask(thread);
7704 assert(task != kernel_task);
7705 assert(task == current_task());
7706 kern_return_t sync_exception_result;
7707 uint32_t behavior;
7708
7709 behavior = task->task_exc_guard;
7710
7711 /* Is delivery enabled */
7712 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7713 return;
7714 }
7715
7716 /* If only once, make sure we're that once */
7717 while (behavior & TASK_EXC_GUARD_VM_ONCE) {
7718 uint32_t new_behavior = behavior & ~TASK_EXC_GUARD_VM_DELIVER;
7719
7720 if (OSCompareAndSwap(behavior, new_behavior, &task->task_exc_guard)) {
7721 break;
7722 }
7723 behavior = task->task_exc_guard;
7724 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7725 return;
7726 }
7727 }
7728
7729 /* Raise exception synchronously and see if handler claimed it */
7730 sync_exception_result = task_exception_notify(EXC_GUARD, code, subcode);
7731
7732 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7733 /*
7734 * If Synchronous EXC_GUARD delivery was successful then
7735 * kill the process and return, else kill the process
7736 * and deliver the exception via EXC_CORPSE_NOTIFY.
7737 */
7738 if (sync_exception_result == KERN_SUCCESS) {
7739 task_bsdtask_kill(current_task());
7740 } else {
7741 exit_with_guard_exception(current_proc(), code, subcode);
7742 }
7743 } else if (task->task_exc_guard & TASK_EXC_GUARD_VM_CORPSE) {
7744 /*
7745 * If the synchronous EXC_GUARD delivery was not successful,
7746 * raise a simulated crash.
7747 */
7748 if (sync_exception_result != KERN_SUCCESS) {
7749 task_violated_guard(code, subcode, NULL);
7750 }
7751 }
7752 }
7753
7754 /*
7755 * vm_map_guard_exception:
7756 *
7757 * Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7758 *
7759 * Right now, we do this when we find nothing mapped, or a
7760 * gap in the mapping when a user address space deallocate
7761 * was requested. We report the address of the first gap found.
7762 */
7763 static void
vm_map_guard_exception(vm_map_offset_t gap_start,unsigned reason)7764 vm_map_guard_exception(
7765 vm_map_offset_t gap_start,
7766 unsigned reason)
7767 {
7768 mach_exception_code_t code = 0;
7769 unsigned int guard_type = GUARD_TYPE_VIRT_MEMORY;
7770 unsigned int target = 0; /* should we pass in pid associated with map? */
7771 mach_exception_data_type_t subcode = (uint64_t)gap_start;
7772 boolean_t fatal = FALSE;
7773
7774 task_t task = current_task();
7775
7776 /* Can't deliver exceptions to kernel task */
7777 if (task == kernel_task) {
7778 return;
7779 }
7780
7781 EXC_GUARD_ENCODE_TYPE(code, guard_type);
7782 EXC_GUARD_ENCODE_FLAVOR(code, reason);
7783 EXC_GUARD_ENCODE_TARGET(code, target);
7784
7785 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7786 fatal = TRUE;
7787 }
7788 thread_guard_violation(current_thread(), code, subcode, fatal);
7789 }
7790
7791 /*
7792 * vm_map_delete: [ internal use only ]
7793 *
7794 * Deallocates the given address range from the target map.
7795 * Removes all user wirings. Unwires one kernel wiring if
7796 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
7797 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
7798 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7799 *
7800 * This routine is called with map locked and leaves map locked.
7801 */
7802 static kern_return_t
vm_map_delete(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,int flags,vm_map_t zap_map)7803 vm_map_delete(
7804 vm_map_t map,
7805 vm_map_offset_t start,
7806 vm_map_offset_t end,
7807 int flags,
7808 vm_map_t zap_map)
7809 {
7810 vm_map_entry_t entry, next;
7811 struct vm_map_entry *first_entry, tmp_entry;
7812 vm_map_offset_t s;
7813 vm_object_t object;
7814 boolean_t need_wakeup;
7815 unsigned int last_timestamp = ~0; /* unlikely value */
7816 int interruptible;
7817 vm_map_offset_t gap_start;
7818 __unused vm_map_offset_t save_start = start;
7819 __unused vm_map_offset_t save_end = end;
7820 const vm_map_offset_t FIND_GAP = 1; /* a not page aligned value */
7821 const vm_map_offset_t GAPS_OK = 2; /* a different not page aligned value */
7822
7823 if (map != kernel_map && !(flags & VM_MAP_REMOVE_GAPS_OK) && !map->terminated) {
7824 gap_start = FIND_GAP;
7825 } else {
7826 gap_start = GAPS_OK;
7827 }
7828
7829 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
7830 THREAD_ABORTSAFE : THREAD_UNINT;
7831
7832 /*
7833 * All our DMA I/O operations in IOKit are currently done by
7834 * wiring through the map entries of the task requesting the I/O.
7835 * Because of this, we must always wait for kernel wirings
7836 * to go away on the entries before deleting them.
7837 *
7838 * Any caller who wants to actually remove a kernel wiring
7839 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7840 * properly remove one wiring instead of blasting through
7841 * them all.
7842 */
7843 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
7844
7845 while (1) {
7846 /*
7847 * Find the start of the region, and clip it
7848 */
7849 if (vm_map_lookup_entry(map, start, &first_entry)) {
7850 entry = first_entry;
7851 if (kalloc_owned_map(map) &&
7852 (entry->vme_start != start ||
7853 entry->vme_end != end)) {
7854 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7855 "mismatched entry %p [0x%llx:0x%llx]\n",
7856 map,
7857 (uint64_t)start,
7858 (uint64_t)end,
7859 entry,
7860 (uint64_t)entry->vme_start,
7861 (uint64_t)entry->vme_end);
7862 }
7863
7864 /*
7865 * If in a superpage, extend the range to include the start of the mapping.
7866 */
7867 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) {
7868 start = SUPERPAGE_ROUND_DOWN(start);
7869 continue;
7870 }
7871
7872 if (start == entry->vme_start) {
7873 /*
7874 * No need to clip. We don't want to cause
7875 * any unnecessary unnesting in this case...
7876 */
7877 } else {
7878 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7879 entry->map_aligned &&
7880 !VM_MAP_PAGE_ALIGNED(
7881 start,
7882 VM_MAP_PAGE_MASK(map))) {
7883 /*
7884 * The entry will no longer be
7885 * map-aligned after clipping
7886 * and the caller said it's OK.
7887 */
7888 entry->map_aligned = FALSE;
7889 }
7890 if (kalloc_owned_map(map)) {
7891 panic("vm_map_delete(%p,0x%llx,0x%llx):"
7892 " clipping %p at 0x%llx\n",
7893 map,
7894 (uint64_t)start,
7895 (uint64_t)end,
7896 entry,
7897 (uint64_t)start);
7898 }
7899 vm_map_clip_start(map, entry, start);
7900 }
7901
7902 /*
7903 * Fix the lookup hint now, rather than each
7904 * time through the loop.
7905 */
7906 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7907 } else {
7908 if (map->pmap == kernel_pmap &&
7909 os_ref_get_count_raw(&map->map_refcnt) != 0) {
7910 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7911 "no map entry at 0x%llx\n",
7912 map,
7913 (uint64_t)start,
7914 (uint64_t)end,
7915 (uint64_t)start);
7916 }
7917 entry = first_entry->vme_next;
7918 if (gap_start == FIND_GAP) {
7919 gap_start = start;
7920 }
7921 }
7922 break;
7923 }
7924 if (entry->superpage_size) {
7925 end = SUPERPAGE_ROUND_UP(end);
7926 }
7927
7928 need_wakeup = FALSE;
7929 /*
7930 * Step through all entries in this region
7931 */
7932 s = entry->vme_start;
7933 while ((entry != vm_map_to_entry(map)) && (s < end)) {
7934 /*
7935 * At this point, we have deleted all the memory entries
7936 * between "start" and "s". We still need to delete
7937 * all memory entries between "s" and "end".
7938 * While we were blocked and the map was unlocked, some
7939 * new memory entries could have been re-allocated between
7940 * "start" and "s" and we don't want to mess with those.
7941 * Some of those entries could even have been re-assembled
7942 * with an entry after "s" (in vm_map_simplify_entry()), so
7943 * we may have to vm_map_clip_start() again.
7944 */
7945
7946 if (entry->vme_start >= s) {
7947 /*
7948 * This entry starts on or after "s"
7949 * so no need to clip its start.
7950 */
7951 } else {
7952 /*
7953 * This entry has been re-assembled by a
7954 * vm_map_simplify_entry(). We need to
7955 * re-clip its start.
7956 */
7957 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7958 entry->map_aligned &&
7959 !VM_MAP_PAGE_ALIGNED(s,
7960 VM_MAP_PAGE_MASK(map))) {
7961 /*
7962 * The entry will no longer be map-aligned
7963 * after clipping and the caller said it's OK.
7964 */
7965 entry->map_aligned = FALSE;
7966 }
7967 if (kalloc_owned_map(map)) {
7968 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7969 "clipping %p at 0x%llx\n",
7970 map,
7971 (uint64_t)start,
7972 (uint64_t)end,
7973 entry,
7974 (uint64_t)s);
7975 }
7976 vm_map_clip_start(map, entry, s);
7977 }
7978 if (entry->vme_end <= end) {
7979 /*
7980 * This entry is going away completely, so no need
7981 * to clip and possibly cause an unnecessary unnesting.
7982 */
7983 } else {
7984 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7985 entry->map_aligned &&
7986 !VM_MAP_PAGE_ALIGNED(end,
7987 VM_MAP_PAGE_MASK(map))) {
7988 /*
7989 * The entry will no longer be map-aligned
7990 * after clipping and the caller said it's OK.
7991 */
7992 entry->map_aligned = FALSE;
7993 }
7994 if (kalloc_owned_map(map)) {
7995 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7996 "clipping %p at 0x%llx\n",
7997 map,
7998 (uint64_t)start,
7999 (uint64_t)end,
8000 entry,
8001 (uint64_t)end);
8002 }
8003 vm_map_clip_end(map, entry, end);
8004 }
8005
8006 if (entry->permanent) {
8007 if (map->pmap == kernel_pmap) {
8008 panic("%s(%p,0x%llx,0x%llx): "
8009 "attempt to remove permanent "
8010 "VM map entry "
8011 "%p [0x%llx:0x%llx]\n",
8012 __FUNCTION__,
8013 map,
8014 (uint64_t) start,
8015 (uint64_t) end,
8016 entry,
8017 (uint64_t) entry->vme_start,
8018 (uint64_t) entry->vme_end);
8019 } else if (flags & VM_MAP_REMOVE_IMMUTABLE) {
8020 // printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
8021 entry->permanent = FALSE;
8022 } else {
8023 if (vm_map_executable_immutable_verbose) {
8024 printf("%d[%s] %s(0x%llx,0x%llx): "
8025 "permanent entry [0x%llx:0x%llx] "
8026 "prot 0x%x/0x%x\n",
8027 proc_selfpid(),
8028 (current_task()->bsd_info
8029 ? proc_name_address(current_task()->bsd_info)
8030 : "?"),
8031 __FUNCTION__,
8032 (uint64_t) start,
8033 (uint64_t) end,
8034 (uint64_t)entry->vme_start,
8035 (uint64_t)entry->vme_end,
8036 entry->protection,
8037 entry->max_protection);
8038 }
8039 /*
8040 * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
8041 */
8042 DTRACE_VM5(vm_map_delete_permanent,
8043 vm_map_offset_t, entry->vme_start,
8044 vm_map_offset_t, entry->vme_end,
8045 vm_prot_t, entry->protection,
8046 vm_prot_t, entry->max_protection,
8047 int, VME_ALIAS(entry));
8048 }
8049 }
8050
8051
8052 if (entry->in_transition) {
8053 wait_result_t wait_result;
8054
8055 /*
8056 * Another thread is wiring/unwiring this entry.
8057 * Let the other thread know we are waiting.
8058 */
8059 assert(s == entry->vme_start);
8060 entry->needs_wakeup = TRUE;
8061
8062 /*
8063 * wake up anybody waiting on entries that we have
8064 * already unwired/deleted.
8065 */
8066 if (need_wakeup) {
8067 vm_map_entry_wakeup(map);
8068 need_wakeup = FALSE;
8069 }
8070
8071 wait_result = vm_map_entry_wait(map, interruptible);
8072
8073 if (interruptible &&
8074 wait_result == THREAD_INTERRUPTED) {
8075 /*
8076 * We do not clear the needs_wakeup flag,
8077 * since we cannot tell if we were the only one.
8078 */
8079 return KERN_ABORTED;
8080 }
8081
8082 /*
8083 * The entry could have been clipped or it
8084 * may not exist anymore. Look it up again.
8085 */
8086 if (!vm_map_lookup_entry(map, s, &first_entry)) {
8087 /*
8088 * User: use the next entry
8089 */
8090 if (gap_start == FIND_GAP) {
8091 gap_start = s;
8092 }
8093 entry = first_entry->vme_next;
8094 s = entry->vme_start;
8095 } else {
8096 entry = first_entry;
8097 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8098 }
8099 last_timestamp = map->timestamp;
8100 continue;
8101 } /* end in_transition */
8102
8103 if (entry->wired_count) {
8104 boolean_t user_wire;
8105
8106 user_wire = entry->user_wired_count > 0;
8107
8108 /*
8109 * Remove a kernel wiring if requested
8110 */
8111 if (flags & VM_MAP_REMOVE_KUNWIRE) {
8112 entry->wired_count--;
8113 }
8114
8115 /*
8116 * Remove all user wirings for proper accounting
8117 */
8118 if (entry->user_wired_count > 0) {
8119 while (entry->user_wired_count) {
8120 subtract_wire_counts(map, entry, user_wire);
8121 }
8122 }
8123
8124 if (entry->wired_count != 0) {
8125 assert(map != kernel_map);
8126 /*
8127 * Cannot continue. Typical case is when
8128 * a user thread has physical io pending on
8129 * on this page. Either wait for the
8130 * kernel wiring to go away or return an
8131 * error.
8132 */
8133 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
8134 wait_result_t wait_result;
8135
8136 assert(s == entry->vme_start);
8137 entry->needs_wakeup = TRUE;
8138 wait_result = vm_map_entry_wait(map,
8139 interruptible);
8140
8141 if (interruptible &&
8142 wait_result == THREAD_INTERRUPTED) {
8143 /*
8144 * We do not clear the
8145 * needs_wakeup flag, since we
8146 * cannot tell if we were the
8147 * only one.
8148 */
8149 return KERN_ABORTED;
8150 }
8151
8152 /*
8153 * The entry could have been clipped or
8154 * it may not exist anymore. Look it
8155 * up again.
8156 */
8157 if (!vm_map_lookup_entry(map, s,
8158 &first_entry)) {
8159 assert(map != kernel_map);
8160 /*
8161 * User: use the next entry
8162 */
8163 if (gap_start == FIND_GAP) {
8164 gap_start = s;
8165 }
8166 entry = first_entry->vme_next;
8167 s = entry->vme_start;
8168 } else {
8169 entry = first_entry;
8170 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8171 }
8172 last_timestamp = map->timestamp;
8173 continue;
8174 } else {
8175 return KERN_FAILURE;
8176 }
8177 }
8178
8179 entry->in_transition = TRUE;
8180 /*
8181 * copy current entry. see comment in vm_map_wire()
8182 */
8183 tmp_entry = *entry;
8184 assert(s == entry->vme_start);
8185
8186 /*
8187 * We can unlock the map now. The in_transition
8188 * state guarentees existance of the entry.
8189 */
8190 vm_map_unlock(map);
8191
8192 if (tmp_entry.is_sub_map) {
8193 vm_map_t sub_map;
8194 vm_map_offset_t sub_start, sub_end;
8195 pmap_t pmap;
8196 vm_map_offset_t pmap_addr;
8197
8198
8199 sub_map = VME_SUBMAP(&tmp_entry);
8200 sub_start = VME_OFFSET(&tmp_entry);
8201 sub_end = sub_start + (tmp_entry.vme_end -
8202 tmp_entry.vme_start);
8203 if (tmp_entry.use_pmap) {
8204 pmap = sub_map->pmap;
8205 pmap_addr = tmp_entry.vme_start;
8206 } else {
8207 pmap = map->pmap;
8208 pmap_addr = tmp_entry.vme_start;
8209 }
8210 (void) vm_map_unwire_nested(sub_map,
8211 sub_start, sub_end,
8212 user_wire,
8213 pmap, pmap_addr);
8214 } else {
8215 if (VME_OBJECT(&tmp_entry) == kernel_object) {
8216 pmap_protect_options(
8217 map->pmap,
8218 tmp_entry.vme_start,
8219 tmp_entry.vme_end,
8220 VM_PROT_NONE,
8221 PMAP_OPTIONS_REMOVE,
8222 NULL);
8223 }
8224 vm_fault_unwire(map, &tmp_entry,
8225 VME_OBJECT(&tmp_entry) == kernel_object,
8226 map->pmap, tmp_entry.vme_start);
8227 }
8228
8229 vm_map_lock(map);
8230
8231 if (last_timestamp + 1 != map->timestamp) {
8232 /*
8233 * Find the entry again. It could have
8234 * been clipped after we unlocked the map.
8235 */
8236 if (!vm_map_lookup_entry(map, s, &first_entry)) {
8237 assert((map != kernel_map) &&
8238 (!entry->is_sub_map));
8239 if (gap_start == FIND_GAP) {
8240 gap_start = s;
8241 }
8242 first_entry = first_entry->vme_next;
8243 s = first_entry->vme_start;
8244 } else {
8245 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8246 }
8247 } else {
8248 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8249 first_entry = entry;
8250 }
8251
8252 last_timestamp = map->timestamp;
8253
8254 entry = first_entry;
8255 while ((entry != vm_map_to_entry(map)) &&
8256 (entry->vme_start < tmp_entry.vme_end)) {
8257 assert(entry->in_transition);
8258 entry->in_transition = FALSE;
8259 if (entry->needs_wakeup) {
8260 entry->needs_wakeup = FALSE;
8261 need_wakeup = TRUE;
8262 }
8263 entry = entry->vme_next;
8264 }
8265 /*
8266 * We have unwired the entry(s). Go back and
8267 * delete them.
8268 */
8269 entry = first_entry;
8270 continue;
8271 }
8272
8273 /* entry is unwired */
8274 assert(entry->wired_count == 0);
8275 assert(entry->user_wired_count == 0);
8276
8277 assert(s == entry->vme_start);
8278
8279 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
8280 /*
8281 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
8282 * vm_map_delete(), some map entries might have been
8283 * transferred to a "zap_map", which doesn't have a
8284 * pmap. The original pmap has already been flushed
8285 * in the vm_map_delete() call targeting the original
8286 * map, but when we get to destroying the "zap_map",
8287 * we don't have any pmap to flush, so let's just skip
8288 * all this.
8289 */
8290 } else if (entry->is_sub_map) {
8291 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) >= VM_MAP_PAGE_SHIFT(map),
8292 "map %p (%d) entry %p submap %p (%d)\n",
8293 map, VM_MAP_PAGE_SHIFT(map), entry,
8294 VME_SUBMAP(entry),
8295 VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
8296 if (entry->use_pmap) {
8297 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) == VM_MAP_PAGE_SHIFT(map),
8298 "map %p (%d) entry %p submap %p (%d)\n",
8299 map, VM_MAP_PAGE_SHIFT(map), entry,
8300 VME_SUBMAP(entry),
8301 VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
8302 #ifndef NO_NESTED_PMAP
8303 int pmap_flags;
8304
8305 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
8306 /*
8307 * This is the final cleanup of the
8308 * address space being terminated.
8309 * No new mappings are expected and
8310 * we don't really need to unnest the
8311 * shared region (and lose the "global"
8312 * pmap mappings, if applicable).
8313 *
8314 * Tell the pmap layer that we're
8315 * "clean" wrt nesting.
8316 */
8317 pmap_flags = PMAP_UNNEST_CLEAN;
8318 } else {
8319 /*
8320 * We're unmapping part of the nested
8321 * shared region, so we can't keep the
8322 * nested pmap.
8323 */
8324 pmap_flags = 0;
8325 }
8326 pmap_unnest_options(
8327 map->pmap,
8328 (addr64_t)entry->vme_start,
8329 entry->vme_end - entry->vme_start,
8330 pmap_flags);
8331 #endif /* NO_NESTED_PMAP */
8332 if (map->mapped_in_other_pmaps &&
8333 os_ref_get_count_raw(&map->map_refcnt) != 0) {
8334 /* clean up parent map/maps */
8335 vm_map_submap_pmap_clean(
8336 map, entry->vme_start,
8337 entry->vme_end,
8338 VME_SUBMAP(entry),
8339 VME_OFFSET(entry));
8340 }
8341 } else {
8342 vm_map_submap_pmap_clean(
8343 map, entry->vme_start, entry->vme_end,
8344 VME_SUBMAP(entry),
8345 VME_OFFSET(entry));
8346 }
8347 } else if (VME_OBJECT(entry) != kernel_object &&
8348 VME_OBJECT(entry) != compressor_object) {
8349 object = VME_OBJECT(entry);
8350 if (map->mapped_in_other_pmaps &&
8351 os_ref_get_count_raw(&map->map_refcnt) != 0) {
8352 vm_object_pmap_protect_options(
8353 object, VME_OFFSET(entry),
8354 entry->vme_end - entry->vme_start,
8355 PMAP_NULL,
8356 PAGE_SIZE,
8357 entry->vme_start,
8358 VM_PROT_NONE,
8359 PMAP_OPTIONS_REMOVE);
8360 } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
8361 (map->pmap == kernel_pmap)) {
8362 /* Remove translations associated
8363 * with this range unless the entry
8364 * does not have an object, or
8365 * it's the kernel map or a descendant
8366 * since the platform could potentially
8367 * create "backdoor" mappings invisible
8368 * to the VM. It is expected that
8369 * objectless, non-kernel ranges
8370 * do not have such VM invisible
8371 * translations.
8372 */
8373 pmap_remove_options(map->pmap,
8374 (addr64_t)entry->vme_start,
8375 (addr64_t)entry->vme_end,
8376 PMAP_OPTIONS_REMOVE);
8377 }
8378 }
8379
8380 if (entry->iokit_acct) {
8381 /* alternate accounting */
8382 DTRACE_VM4(vm_map_iokit_unmapped_region,
8383 vm_map_t, map,
8384 vm_map_offset_t, entry->vme_start,
8385 vm_map_offset_t, entry->vme_end,
8386 int, VME_ALIAS(entry));
8387 vm_map_iokit_unmapped_region(map,
8388 (entry->vme_end -
8389 entry->vme_start));
8390 entry->iokit_acct = FALSE;
8391 entry->use_pmap = FALSE;
8392 }
8393
8394 /*
8395 * All pmap mappings for this map entry must have been
8396 * cleared by now.
8397 */
8398 #if DEBUG
8399 assert(pmap_is_empty(map->pmap,
8400 entry->vme_start,
8401 entry->vme_end));
8402 #endif /* DEBUG */
8403
8404 next = entry->vme_next;
8405
8406 if (map->pmap == kernel_pmap &&
8407 os_ref_get_count_raw(&map->map_refcnt) != 0) {
8408 if (entry->vme_end < end && (next == vm_map_to_entry(map) || next->vme_start != entry->vme_end)) {
8409 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8410 "hole after %p at 0x%llx\n",
8411 map,
8412 (uint64_t)start,
8413 (uint64_t)end,
8414 entry,
8415 (uint64_t)entry->vme_end);
8416 }
8417
8418 if (entry->vme_atomic && (entry->vme_start != start || entry->vme_end != end)) {
8419 /*
8420 * In the kernel map and its submaps, the removal of
8421 * an atomic entry is strict. An atomic entry is
8422 * processed only if it was specifically targeted. We
8423 * might have deleted non-atomic entries before it but
8424 * we won't remove this atomic entry OR anything after it.
8425 */
8426 #if DEVELOPMENT || DEBUG
8427 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8428 "request loosely encompasses atomic entry %p at (0x%llx,0x%llx)\n",
8429 map,
8430 (uint64_t)start,
8431 (uint64_t)end,
8432 entry,
8433 (uint64_t)entry->vme_start,
8434 (uint64_t)entry->vme_end);
8435 #endif /* DEVELOPMENT || DEBUG */
8436
8437 break;
8438 }
8439 }
8440
8441 /*
8442 * If the desired range didn't end with "entry", then there is a gap if
8443 * we wrapped around to the start of the map or if "entry" and "next"
8444 * aren't contiguous.
8445 *
8446 * The vm_map_round_page() is needed since an entry can be less than VM_MAP_PAGE_MASK() sized.
8447 * For example, devices which have h/w 4K pages, but entry sizes are all now 16K.
8448 */
8449 if (gap_start == FIND_GAP &&
8450 vm_map_round_page(entry->vme_end, VM_MAP_PAGE_MASK(map)) < end &&
8451 (next == vm_map_to_entry(map) || entry->vme_end != next->vme_start)) {
8452 gap_start = entry->vme_end;
8453 }
8454 s = next->vme_start;
8455 last_timestamp = map->timestamp;
8456
8457 if (entry->permanent) {
8458 /*
8459 * A permanent entry can not be removed, so leave it
8460 * in place but remove all access permissions.
8461 */
8462 entry->protection = VM_PROT_NONE;
8463 entry->max_protection = VM_PROT_NONE;
8464 } else if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
8465 zap_map != VM_MAP_NULL) {
8466 vm_map_size_t entry_size;
8467 /*
8468 * The caller wants to save the affected VM map entries
8469 * into the "zap_map". The caller will take care of
8470 * these entries.
8471 */
8472 /* unlink the entry from "map" ... */
8473 vm_map_store_entry_unlink(map, entry);
8474 /* ... and add it to the end of the "zap_map" */
8475 vm_map_store_entry_link(zap_map,
8476 vm_map_last_entry(zap_map),
8477 entry,
8478 VM_MAP_KERNEL_FLAGS_NONE);
8479 entry_size = entry->vme_end - entry->vme_start;
8480 map->size -= entry_size;
8481 zap_map->size += entry_size;
8482 /* we didn't unlock the map, so no timestamp increase */
8483 last_timestamp--;
8484 } else {
8485 vm_map_entry_delete(map, entry);
8486 /* vm_map_entry_delete unlocks the map */
8487 vm_map_lock(map);
8488 }
8489
8490 entry = next;
8491
8492 if (entry == vm_map_to_entry(map)) {
8493 break;
8494 }
8495 if (last_timestamp + 1 != map->timestamp) {
8496 /*
8497 * We are responsible for deleting everything
8498 * from the given space. If someone has interfered,
8499 * we pick up where we left off. Back fills should
8500 * be all right for anyone, except map_delete, and
8501 * we have to assume that the task has been fully
8502 * disabled before we get here
8503 */
8504 if (!vm_map_lookup_entry(map, s, &entry)) {
8505 entry = entry->vme_next;
8506
8507 /*
8508 * Nothing found for s. If we weren't already done, then there is a gap.
8509 */
8510 if (gap_start == FIND_GAP && s < end) {
8511 gap_start = s;
8512 }
8513 s = entry->vme_start;
8514 } else {
8515 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8516 }
8517 /*
8518 * others can not only allocate behind us, we can
8519 * also see coalesce while we don't have the map lock
8520 */
8521 if (entry == vm_map_to_entry(map)) {
8522 break;
8523 }
8524 }
8525 last_timestamp = map->timestamp;
8526 }
8527
8528 if (map->wait_for_space) {
8529 thread_wakeup((event_t) map);
8530 }
8531 /*
8532 * wake up anybody waiting on entries that we have already deleted.
8533 */
8534 if (need_wakeup) {
8535 vm_map_entry_wakeup(map);
8536 }
8537
8538 if (gap_start != FIND_GAP && gap_start != GAPS_OK) {
8539 DTRACE_VM3(kern_vm_deallocate_gap,
8540 vm_map_offset_t, gap_start,
8541 vm_map_offset_t, save_start,
8542 vm_map_offset_t, save_end);
8543 if (!(flags & VM_MAP_REMOVE_GAPS_OK)) {
8544 vm_map_guard_exception(gap_start, kGUARD_EXC_DEALLOC_GAP);
8545 }
8546 }
8547
8548 return KERN_SUCCESS;
8549 }
8550
8551
8552 /*
8553 * vm_map_terminate:
8554 *
8555 * Clean out a task's map.
8556 */
8557 kern_return_t
vm_map_terminate(vm_map_t map)8558 vm_map_terminate(
8559 vm_map_t map)
8560 {
8561 vm_map_lock(map);
8562 map->terminated = TRUE;
8563 vm_map_unlock(map);
8564
8565 return vm_map_remove(map,
8566 map->min_offset,
8567 map->max_offset,
8568 /*
8569 * Final cleanup:
8570 * + no unnesting
8571 * + remove immutable mappings
8572 * + allow gaps in range
8573 */
8574 (VM_MAP_REMOVE_NO_UNNESTING |
8575 VM_MAP_REMOVE_IMMUTABLE |
8576 VM_MAP_REMOVE_GAPS_OK));
8577 }
8578
8579 /*
8580 * vm_map_remove:
8581 *
8582 * Remove the given address range from the target map.
8583 * This is the exported form of vm_map_delete.
8584 */
8585 kern_return_t
vm_map_remove(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,boolean_t flags)8586 vm_map_remove(
8587 vm_map_t map,
8588 vm_map_offset_t start,
8589 vm_map_offset_t end,
8590 boolean_t flags)
8591 {
8592 kern_return_t result;
8593
8594 vm_map_lock(map);
8595 VM_MAP_RANGE_CHECK(map, start, end);
8596 /*
8597 * For the zone maps, the kernel controls the allocation/freeing of memory.
8598 * Any free to the zone maps should be within the bounds of the map and
8599 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
8600 * free to the zone maps into a no-op, there is a problem and we should
8601 * panic.
8602 */
8603 if ((start == end) && zone_maps_owned(start, 1)) {
8604 panic("Nothing being freed to a zone map. start = end = %p", (void *)start);
8605 }
8606 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8607 vm_map_unlock(map);
8608
8609 return result;
8610 }
8611
8612 /*
8613 * vm_map_remove_locked:
8614 *
8615 * Remove the given address range from the target locked map.
8616 * This is the exported form of vm_map_delete.
8617 */
8618 kern_return_t
vm_map_remove_locked(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,boolean_t flags)8619 vm_map_remove_locked(
8620 vm_map_t map,
8621 vm_map_offset_t start,
8622 vm_map_offset_t end,
8623 boolean_t flags)
8624 {
8625 kern_return_t result;
8626
8627 VM_MAP_RANGE_CHECK(map, start, end);
8628 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8629 return result;
8630 }
8631
8632
8633 /*
8634 * Routine: vm_map_copy_allocate
8635 *
8636 * Description:
8637 * Allocates and initializes a map copy object.
8638 */
8639 static vm_map_copy_t
vm_map_copy_allocate(void)8640 vm_map_copy_allocate(void)
8641 {
8642 vm_map_copy_t new_copy;
8643
8644 new_copy = zalloc_flags(vm_map_copy_zone, Z_WAITOK | Z_ZERO);
8645 new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8646 vm_map_copy_first_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8647 vm_map_copy_last_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8648 return new_copy;
8649 }
8650
8651 /*
8652 * Routine: vm_map_copy_discard
8653 *
8654 * Description:
8655 * Dispose of a map copy object (returned by
8656 * vm_map_copyin).
8657 */
8658 void
vm_map_copy_discard(vm_map_copy_t copy)8659 vm_map_copy_discard(
8660 vm_map_copy_t copy)
8661 {
8662 if (copy == VM_MAP_COPY_NULL) {
8663 return;
8664 }
8665
8666 switch (copy->type) {
8667 case VM_MAP_COPY_ENTRY_LIST:
8668 while (vm_map_copy_first_entry(copy) !=
8669 vm_map_copy_to_entry(copy)) {
8670 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
8671
8672 vm_map_copy_entry_unlink(copy, entry);
8673 if (entry->is_sub_map) {
8674 vm_map_deallocate(VME_SUBMAP(entry));
8675 } else {
8676 vm_object_deallocate(VME_OBJECT(entry));
8677 }
8678 vm_map_copy_entry_dispose(copy, entry);
8679 }
8680 break;
8681 case VM_MAP_COPY_OBJECT:
8682 vm_object_deallocate(copy->cpy_object);
8683 break;
8684 case VM_MAP_COPY_KERNEL_BUFFER:
8685
8686 /*
8687 * The vm_map_copy_t and possibly the data buffer were
8688 * allocated by a single call to kalloc_data(), i.e. the
8689 * vm_map_copy_t was not allocated out of the zone.
8690 */
8691 if (copy->size > msg_ool_size_small || copy->offset) {
8692 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8693 (long long)copy->size, (long long)copy->offset);
8694 }
8695 kfree_data(copy->cpy_kdata, copy->size);
8696 }
8697 zfree(vm_map_copy_zone, copy);
8698 }
8699
8700 /*
8701 * Routine: vm_map_copy_copy
8702 *
8703 * Description:
8704 * Move the information in a map copy object to
8705 * a new map copy object, leaving the old one
8706 * empty.
8707 *
8708 * This is used by kernel routines that need
8709 * to look at out-of-line data (in copyin form)
8710 * before deciding whether to return SUCCESS.
8711 * If the routine returns FAILURE, the original
8712 * copy object will be deallocated; therefore,
8713 * these routines must make a copy of the copy
8714 * object and leave the original empty so that
8715 * deallocation will not fail.
8716 */
8717 vm_map_copy_t
vm_map_copy_copy(vm_map_copy_t copy)8718 vm_map_copy_copy(
8719 vm_map_copy_t copy)
8720 {
8721 vm_map_copy_t new_copy;
8722
8723 if (copy == VM_MAP_COPY_NULL) {
8724 return VM_MAP_COPY_NULL;
8725 }
8726
8727 /*
8728 * Allocate a new copy object, and copy the information
8729 * from the old one into it.
8730 */
8731
8732 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8733 memcpy((void *) new_copy, (void *) copy, sizeof(struct vm_map_copy));
8734 #if __has_feature(ptrauth_calls)
8735 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8736 new_copy->cpy_kdata = copy->cpy_kdata;
8737 }
8738 #endif
8739
8740 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
8741 /*
8742 * The links in the entry chain must be
8743 * changed to point to the new copy object.
8744 */
8745 vm_map_copy_first_entry(copy)->vme_prev
8746 = vm_map_copy_to_entry(new_copy);
8747 vm_map_copy_last_entry(copy)->vme_next
8748 = vm_map_copy_to_entry(new_copy);
8749 }
8750
8751 /*
8752 * Change the old copy object into one that contains
8753 * nothing to be deallocated.
8754 */
8755 copy->type = VM_MAP_COPY_OBJECT;
8756 copy->cpy_object = VM_OBJECT_NULL;
8757
8758 /*
8759 * Return the new object.
8760 */
8761 return new_copy;
8762 }
8763
8764 static boolean_t
vm_map_entry_is_overwritable(vm_map_t dst_map __unused,vm_map_entry_t entry)8765 vm_map_entry_is_overwritable(
8766 vm_map_t dst_map __unused,
8767 vm_map_entry_t entry)
8768 {
8769 if (!(entry->protection & VM_PROT_WRITE)) {
8770 /* can't overwrite if not writable */
8771 return FALSE;
8772 }
8773 #if !__x86_64__
8774 if (entry->used_for_jit &&
8775 vm_map_cs_enforcement(dst_map) &&
8776 !dst_map->cs_debugged) {
8777 /*
8778 * Can't overwrite a JIT region while cs_enforced
8779 * and not cs_debugged.
8780 */
8781 return FALSE;
8782 }
8783 #endif /* !__x86_64__ */
8784 return TRUE;
8785 }
8786
8787 static kern_return_t
vm_map_overwrite_submap_recurse(vm_map_t dst_map,vm_map_offset_t dst_addr,vm_map_size_t dst_size)8788 vm_map_overwrite_submap_recurse(
8789 vm_map_t dst_map,
8790 vm_map_offset_t dst_addr,
8791 vm_map_size_t dst_size)
8792 {
8793 vm_map_offset_t dst_end;
8794 vm_map_entry_t tmp_entry;
8795 vm_map_entry_t entry;
8796 kern_return_t result;
8797 boolean_t encountered_sub_map = FALSE;
8798
8799
8800
8801 /*
8802 * Verify that the destination is all writeable
8803 * initially. We have to trunc the destination
8804 * address and round the copy size or we'll end up
8805 * splitting entries in strange ways.
8806 */
8807
8808 dst_end = vm_map_round_page(dst_addr + dst_size,
8809 VM_MAP_PAGE_MASK(dst_map));
8810 vm_map_lock(dst_map);
8811
8812 start_pass_1:
8813 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8814 vm_map_unlock(dst_map);
8815 return KERN_INVALID_ADDRESS;
8816 }
8817
8818 vm_map_clip_start(dst_map,
8819 tmp_entry,
8820 vm_map_trunc_page(dst_addr,
8821 VM_MAP_PAGE_MASK(dst_map)));
8822 if (tmp_entry->is_sub_map) {
8823 /* clipping did unnest if needed */
8824 assert(!tmp_entry->use_pmap);
8825 }
8826
8827 for (entry = tmp_entry;;) {
8828 vm_map_entry_t next;
8829
8830 next = entry->vme_next;
8831 while (entry->is_sub_map) {
8832 vm_map_offset_t sub_start;
8833 vm_map_offset_t sub_end;
8834 vm_map_offset_t local_end;
8835
8836 if (entry->in_transition) {
8837 /*
8838 * Say that we are waiting, and wait for entry.
8839 */
8840 entry->needs_wakeup = TRUE;
8841 vm_map_entry_wait(dst_map, THREAD_UNINT);
8842
8843 goto start_pass_1;
8844 }
8845
8846 encountered_sub_map = TRUE;
8847 sub_start = VME_OFFSET(entry);
8848
8849 if (entry->vme_end < dst_end) {
8850 sub_end = entry->vme_end;
8851 } else {
8852 sub_end = dst_end;
8853 }
8854 sub_end -= entry->vme_start;
8855 sub_end += VME_OFFSET(entry);
8856 local_end = entry->vme_end;
8857 vm_map_unlock(dst_map);
8858
8859 result = vm_map_overwrite_submap_recurse(
8860 VME_SUBMAP(entry),
8861 sub_start,
8862 sub_end - sub_start);
8863
8864 if (result != KERN_SUCCESS) {
8865 return result;
8866 }
8867 if (dst_end <= entry->vme_end) {
8868 return KERN_SUCCESS;
8869 }
8870 vm_map_lock(dst_map);
8871 if (!vm_map_lookup_entry(dst_map, local_end,
8872 &tmp_entry)) {
8873 vm_map_unlock(dst_map);
8874 return KERN_INVALID_ADDRESS;
8875 }
8876 entry = tmp_entry;
8877 next = entry->vme_next;
8878 }
8879
8880 if (!(entry->protection & VM_PROT_WRITE)) {
8881 vm_map_unlock(dst_map);
8882 return KERN_PROTECTION_FAILURE;
8883 }
8884
8885 if (!vm_map_entry_is_overwritable(dst_map, entry)) {
8886 vm_map_unlock(dst_map);
8887 return KERN_PROTECTION_FAILURE;
8888 }
8889
8890 /*
8891 * If the entry is in transition, we must wait
8892 * for it to exit that state. Anything could happen
8893 * when we unlock the map, so start over.
8894 */
8895 if (entry->in_transition) {
8896 /*
8897 * Say that we are waiting, and wait for entry.
8898 */
8899 entry->needs_wakeup = TRUE;
8900 vm_map_entry_wait(dst_map, THREAD_UNINT);
8901
8902 goto start_pass_1;
8903 }
8904
8905 /*
8906 * our range is contained completely within this map entry
8907 */
8908 if (dst_end <= entry->vme_end) {
8909 vm_map_unlock(dst_map);
8910 return KERN_SUCCESS;
8911 }
8912 /*
8913 * check that range specified is contiguous region
8914 */
8915 if ((next == vm_map_to_entry(dst_map)) ||
8916 (next->vme_start != entry->vme_end)) {
8917 vm_map_unlock(dst_map);
8918 return KERN_INVALID_ADDRESS;
8919 }
8920
8921 /*
8922 * Check for permanent objects in the destination.
8923 */
8924 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8925 ((!VME_OBJECT(entry)->internal) ||
8926 (VME_OBJECT(entry)->true_share))) {
8927 if (encountered_sub_map) {
8928 vm_map_unlock(dst_map);
8929 return KERN_FAILURE;
8930 }
8931 }
8932
8933
8934 entry = next;
8935 }/* for */
8936 vm_map_unlock(dst_map);
8937 return KERN_SUCCESS;
8938 }
8939
8940 /*
8941 * Routine: vm_map_copy_overwrite
8942 *
8943 * Description:
8944 * Copy the memory described by the map copy
8945 * object (copy; returned by vm_map_copyin) onto
8946 * the specified destination region (dst_map, dst_addr).
8947 * The destination must be writeable.
8948 *
8949 * Unlike vm_map_copyout, this routine actually
8950 * writes over previously-mapped memory. If the
8951 * previous mapping was to a permanent (user-supplied)
8952 * memory object, it is preserved.
8953 *
8954 * The attributes (protection and inheritance) of the
8955 * destination region are preserved.
8956 *
8957 * If successful, consumes the copy object.
8958 * Otherwise, the caller is responsible for it.
8959 *
8960 * Implementation notes:
8961 * To overwrite aligned temporary virtual memory, it is
8962 * sufficient to remove the previous mapping and insert
8963 * the new copy. This replacement is done either on
8964 * the whole region (if no permanent virtual memory
8965 * objects are embedded in the destination region) or
8966 * in individual map entries.
8967 *
8968 * To overwrite permanent virtual memory , it is necessary
8969 * to copy each page, as the external memory management
8970 * interface currently does not provide any optimizations.
8971 *
8972 * Unaligned memory also has to be copied. It is possible
8973 * to use 'vm_trickery' to copy the aligned data. This is
8974 * not done but not hard to implement.
8975 *
8976 * Once a page of permanent memory has been overwritten,
8977 * it is impossible to interrupt this function; otherwise,
8978 * the call would be neither atomic nor location-independent.
8979 * The kernel-state portion of a user thread must be
8980 * interruptible.
8981 *
8982 * It may be expensive to forward all requests that might
8983 * overwrite permanent memory (vm_write, vm_copy) to
8984 * uninterruptible kernel threads. This routine may be
8985 * called by interruptible threads; however, success is
8986 * not guaranteed -- if the request cannot be performed
8987 * atomically and interruptibly, an error indication is
8988 * returned.
8989 */
8990
8991 static kern_return_t
vm_map_copy_overwrite_nested(vm_map_t dst_map,vm_map_address_t dst_addr,vm_map_copy_t copy,boolean_t interruptible,pmap_t pmap,boolean_t discard_on_success)8992 vm_map_copy_overwrite_nested(
8993 vm_map_t dst_map,
8994 vm_map_address_t dst_addr,
8995 vm_map_copy_t copy,
8996 boolean_t interruptible,
8997 pmap_t pmap,
8998 boolean_t discard_on_success)
8999 {
9000 vm_map_offset_t dst_end;
9001 vm_map_entry_t tmp_entry;
9002 vm_map_entry_t entry;
9003 kern_return_t kr;
9004 boolean_t aligned = TRUE;
9005 boolean_t contains_permanent_objects = FALSE;
9006 boolean_t encountered_sub_map = FALSE;
9007 vm_map_offset_t base_addr;
9008 vm_map_size_t copy_size;
9009 vm_map_size_t total_size;
9010 uint16_t copy_page_shift;
9011
9012
9013 /*
9014 * Check for null copy object.
9015 */
9016
9017 if (copy == VM_MAP_COPY_NULL) {
9018 return KERN_SUCCESS;
9019 }
9020
9021 /*
9022 * Assert that the vm_map_copy is coming from the right
9023 * zone and hasn't been forged
9024 */
9025 vm_map_copy_require(copy);
9026
9027 /*
9028 * Check for special kernel buffer allocated
9029 * by new_ipc_kmsg_copyin.
9030 */
9031
9032 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
9033 return vm_map_copyout_kernel_buffer(
9034 dst_map, &dst_addr,
9035 copy, copy->size, TRUE, discard_on_success);
9036 }
9037
9038 /*
9039 * Only works for entry lists at the moment. Will
9040 * support page lists later.
9041 */
9042
9043 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9044
9045 if (copy->size == 0) {
9046 if (discard_on_success) {
9047 vm_map_copy_discard(copy);
9048 }
9049 return KERN_SUCCESS;
9050 }
9051
9052 copy_page_shift = copy->cpy_hdr.page_shift;
9053
9054 /*
9055 * Verify that the destination is all writeable
9056 * initially. We have to trunc the destination
9057 * address and round the copy size or we'll end up
9058 * splitting entries in strange ways.
9059 */
9060
9061 if (!VM_MAP_PAGE_ALIGNED(copy->size,
9062 VM_MAP_PAGE_MASK(dst_map)) ||
9063 !VM_MAP_PAGE_ALIGNED(copy->offset,
9064 VM_MAP_PAGE_MASK(dst_map)) ||
9065 !VM_MAP_PAGE_ALIGNED(dst_addr,
9066 VM_MAP_PAGE_MASK(dst_map)) ||
9067 copy_page_shift != VM_MAP_PAGE_SHIFT(dst_map)) {
9068 aligned = FALSE;
9069 dst_end = vm_map_round_page(dst_addr + copy->size,
9070 VM_MAP_PAGE_MASK(dst_map));
9071 } else {
9072 dst_end = dst_addr + copy->size;
9073 }
9074
9075 vm_map_lock(dst_map);
9076
9077 /* LP64todo - remove this check when vm_map_commpage64()
9078 * no longer has to stuff in a map_entry for the commpage
9079 * above the map's max_offset.
9080 */
9081 if (dst_addr >= dst_map->max_offset) {
9082 vm_map_unlock(dst_map);
9083 return KERN_INVALID_ADDRESS;
9084 }
9085
9086 start_pass_1:
9087 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
9088 vm_map_unlock(dst_map);
9089 return KERN_INVALID_ADDRESS;
9090 }
9091 vm_map_clip_start(dst_map,
9092 tmp_entry,
9093 vm_map_trunc_page(dst_addr,
9094 VM_MAP_PAGE_MASK(dst_map)));
9095 for (entry = tmp_entry;;) {
9096 vm_map_entry_t next = entry->vme_next;
9097
9098 while (entry->is_sub_map) {
9099 vm_map_offset_t sub_start;
9100 vm_map_offset_t sub_end;
9101 vm_map_offset_t local_end;
9102
9103 if (entry->in_transition) {
9104 /*
9105 * Say that we are waiting, and wait for entry.
9106 */
9107 entry->needs_wakeup = TRUE;
9108 vm_map_entry_wait(dst_map, THREAD_UNINT);
9109
9110 goto start_pass_1;
9111 }
9112
9113 local_end = entry->vme_end;
9114 if (!(entry->needs_copy)) {
9115 /* if needs_copy we are a COW submap */
9116 /* in such a case we just replace so */
9117 /* there is no need for the follow- */
9118 /* ing check. */
9119 encountered_sub_map = TRUE;
9120 sub_start = VME_OFFSET(entry);
9121
9122 if (entry->vme_end < dst_end) {
9123 sub_end = entry->vme_end;
9124 } else {
9125 sub_end = dst_end;
9126 }
9127 sub_end -= entry->vme_start;
9128 sub_end += VME_OFFSET(entry);
9129 vm_map_unlock(dst_map);
9130
9131 kr = vm_map_overwrite_submap_recurse(
9132 VME_SUBMAP(entry),
9133 sub_start,
9134 sub_end - sub_start);
9135 if (kr != KERN_SUCCESS) {
9136 return kr;
9137 }
9138 vm_map_lock(dst_map);
9139 }
9140
9141 if (dst_end <= entry->vme_end) {
9142 goto start_overwrite;
9143 }
9144 if (!vm_map_lookup_entry(dst_map, local_end,
9145 &entry)) {
9146 vm_map_unlock(dst_map);
9147 return KERN_INVALID_ADDRESS;
9148 }
9149 next = entry->vme_next;
9150 }
9151
9152 if (!(entry->protection & VM_PROT_WRITE)) {
9153 vm_map_unlock(dst_map);
9154 return KERN_PROTECTION_FAILURE;
9155 }
9156
9157 if (!vm_map_entry_is_overwritable(dst_map, entry)) {
9158 vm_map_unlock(dst_map);
9159 return KERN_PROTECTION_FAILURE;
9160 }
9161
9162 /*
9163 * If the entry is in transition, we must wait
9164 * for it to exit that state. Anything could happen
9165 * when we unlock the map, so start over.
9166 */
9167 if (entry->in_transition) {
9168 /*
9169 * Say that we are waiting, and wait for entry.
9170 */
9171 entry->needs_wakeup = TRUE;
9172 vm_map_entry_wait(dst_map, THREAD_UNINT);
9173
9174 goto start_pass_1;
9175 }
9176
9177 /*
9178 * our range is contained completely within this map entry
9179 */
9180 if (dst_end <= entry->vme_end) {
9181 break;
9182 }
9183 /*
9184 * check that range specified is contiguous region
9185 */
9186 if ((next == vm_map_to_entry(dst_map)) ||
9187 (next->vme_start != entry->vme_end)) {
9188 vm_map_unlock(dst_map);
9189 return KERN_INVALID_ADDRESS;
9190 }
9191
9192
9193 /*
9194 * Check for permanent objects in the destination.
9195 */
9196 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
9197 ((!VME_OBJECT(entry)->internal) ||
9198 (VME_OBJECT(entry)->true_share))) {
9199 contains_permanent_objects = TRUE;
9200 }
9201
9202 entry = next;
9203 }/* for */
9204
9205 start_overwrite:
9206 /*
9207 * If there are permanent objects in the destination, then
9208 * the copy cannot be interrupted.
9209 */
9210
9211 if (interruptible && contains_permanent_objects) {
9212 vm_map_unlock(dst_map);
9213 return KERN_FAILURE; /* XXX */
9214 }
9215
9216 /*
9217 *
9218 * Make a second pass, overwriting the data
9219 * At the beginning of each loop iteration,
9220 * the next entry to be overwritten is "tmp_entry"
9221 * (initially, the value returned from the lookup above),
9222 * and the starting address expected in that entry
9223 * is "start".
9224 */
9225
9226 total_size = copy->size;
9227 if (encountered_sub_map) {
9228 copy_size = 0;
9229 /* re-calculate tmp_entry since we've had the map */
9230 /* unlocked */
9231 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
9232 vm_map_unlock(dst_map);
9233 return KERN_INVALID_ADDRESS;
9234 }
9235 } else {
9236 copy_size = copy->size;
9237 }
9238
9239 base_addr = dst_addr;
9240 while (TRUE) {
9241 /* deconstruct the copy object and do in parts */
9242 /* only in sub_map, interruptable case */
9243 vm_map_entry_t copy_entry;
9244 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
9245 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
9246 int nentries;
9247 int remaining_entries = 0;
9248 vm_map_offset_t new_offset = 0;
9249
9250 for (entry = tmp_entry; copy_size == 0;) {
9251 vm_map_entry_t next;
9252
9253 next = entry->vme_next;
9254
9255 /* tmp_entry and base address are moved along */
9256 /* each time we encounter a sub-map. Otherwise */
9257 /* entry can outpase tmp_entry, and the copy_size */
9258 /* may reflect the distance between them */
9259 /* if the current entry is found to be in transition */
9260 /* we will start over at the beginning or the last */
9261 /* encounter of a submap as dictated by base_addr */
9262 /* we will zero copy_size accordingly. */
9263 if (entry->in_transition) {
9264 /*
9265 * Say that we are waiting, and wait for entry.
9266 */
9267 entry->needs_wakeup = TRUE;
9268 vm_map_entry_wait(dst_map, THREAD_UNINT);
9269
9270 if (!vm_map_lookup_entry(dst_map, base_addr,
9271 &tmp_entry)) {
9272 vm_map_unlock(dst_map);
9273 return KERN_INVALID_ADDRESS;
9274 }
9275 copy_size = 0;
9276 entry = tmp_entry;
9277 continue;
9278 }
9279 if (entry->is_sub_map) {
9280 vm_map_offset_t sub_start;
9281 vm_map_offset_t sub_end;
9282 vm_map_offset_t local_end;
9283
9284 if (entry->needs_copy) {
9285 /* if this is a COW submap */
9286 /* just back the range with a */
9287 /* anonymous entry */
9288 if (entry->vme_end < dst_end) {
9289 sub_end = entry->vme_end;
9290 } else {
9291 sub_end = dst_end;
9292 }
9293 if (entry->vme_start < base_addr) {
9294 sub_start = base_addr;
9295 } else {
9296 sub_start = entry->vme_start;
9297 }
9298 vm_map_clip_end(
9299 dst_map, entry, sub_end);
9300 vm_map_clip_start(
9301 dst_map, entry, sub_start);
9302 assert(!entry->use_pmap);
9303 assert(!entry->iokit_acct);
9304 entry->use_pmap = TRUE;
9305 entry->is_sub_map = FALSE;
9306 vm_map_deallocate(
9307 VME_SUBMAP(entry));
9308 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
9309 VME_OFFSET_SET(entry, 0);
9310 entry->is_shared = FALSE;
9311 entry->needs_copy = FALSE;
9312 entry->protection = VM_PROT_DEFAULT;
9313 entry->max_protection = VM_PROT_ALL;
9314 entry->wired_count = 0;
9315 entry->user_wired_count = 0;
9316 if (entry->inheritance
9317 == VM_INHERIT_SHARE) {
9318 entry->inheritance = VM_INHERIT_COPY;
9319 }
9320 continue;
9321 }
9322 /* first take care of any non-sub_map */
9323 /* entries to send */
9324 if (base_addr < entry->vme_start) {
9325 /* stuff to send */
9326 copy_size =
9327 entry->vme_start - base_addr;
9328 break;
9329 }
9330 sub_start = VME_OFFSET(entry);
9331
9332 if (entry->vme_end < dst_end) {
9333 sub_end = entry->vme_end;
9334 } else {
9335 sub_end = dst_end;
9336 }
9337 sub_end -= entry->vme_start;
9338 sub_end += VME_OFFSET(entry);
9339 local_end = entry->vme_end;
9340 vm_map_unlock(dst_map);
9341 copy_size = sub_end - sub_start;
9342
9343 /* adjust the copy object */
9344 if (total_size > copy_size) {
9345 vm_map_size_t local_size = 0;
9346 vm_map_size_t entry_size;
9347
9348 nentries = 1;
9349 new_offset = copy->offset;
9350 copy_entry = vm_map_copy_first_entry(copy);
9351 while (copy_entry !=
9352 vm_map_copy_to_entry(copy)) {
9353 entry_size = copy_entry->vme_end -
9354 copy_entry->vme_start;
9355 if ((local_size < copy_size) &&
9356 ((local_size + entry_size)
9357 >= copy_size)) {
9358 vm_map_copy_clip_end(copy,
9359 copy_entry,
9360 copy_entry->vme_start +
9361 (copy_size - local_size));
9362 entry_size = copy_entry->vme_end -
9363 copy_entry->vme_start;
9364 local_size += entry_size;
9365 new_offset += entry_size;
9366 }
9367 if (local_size >= copy_size) {
9368 next_copy = copy_entry->vme_next;
9369 copy_entry->vme_next =
9370 vm_map_copy_to_entry(copy);
9371 previous_prev =
9372 copy->cpy_hdr.links.prev;
9373 copy->cpy_hdr.links.prev = copy_entry;
9374 copy->size = copy_size;
9375 remaining_entries =
9376 copy->cpy_hdr.nentries;
9377 remaining_entries -= nentries;
9378 copy->cpy_hdr.nentries = nentries;
9379 break;
9380 } else {
9381 local_size += entry_size;
9382 new_offset += entry_size;
9383 nentries++;
9384 }
9385 copy_entry = copy_entry->vme_next;
9386 }
9387 }
9388
9389 if ((entry->use_pmap) && (pmap == NULL)) {
9390 kr = vm_map_copy_overwrite_nested(
9391 VME_SUBMAP(entry),
9392 sub_start,
9393 copy,
9394 interruptible,
9395 VME_SUBMAP(entry)->pmap,
9396 TRUE);
9397 } else if (pmap != NULL) {
9398 kr = vm_map_copy_overwrite_nested(
9399 VME_SUBMAP(entry),
9400 sub_start,
9401 copy,
9402 interruptible, pmap,
9403 TRUE);
9404 } else {
9405 kr = vm_map_copy_overwrite_nested(
9406 VME_SUBMAP(entry),
9407 sub_start,
9408 copy,
9409 interruptible,
9410 dst_map->pmap,
9411 TRUE);
9412 }
9413 if (kr != KERN_SUCCESS) {
9414 if (next_copy != NULL) {
9415 copy->cpy_hdr.nentries +=
9416 remaining_entries;
9417 copy->cpy_hdr.links.prev->vme_next =
9418 next_copy;
9419 copy->cpy_hdr.links.prev
9420 = previous_prev;
9421 copy->size = total_size;
9422 }
9423 return kr;
9424 }
9425 if (dst_end <= local_end) {
9426 return KERN_SUCCESS;
9427 }
9428 /* otherwise copy no longer exists, it was */
9429 /* destroyed after successful copy_overwrite */
9430 copy = vm_map_copy_allocate();
9431 copy->type = VM_MAP_COPY_ENTRY_LIST;
9432 copy->offset = new_offset;
9433 copy->cpy_hdr.page_shift = copy_page_shift;
9434
9435 /*
9436 * XXX FBDP
9437 * this does not seem to deal with
9438 * the VM map store (R&B tree)
9439 */
9440
9441 total_size -= copy_size;
9442 copy_size = 0;
9443 /* put back remainder of copy in container */
9444 if (next_copy != NULL) {
9445 copy->cpy_hdr.nentries = remaining_entries;
9446 copy->cpy_hdr.links.next = next_copy;
9447 copy->cpy_hdr.links.prev = previous_prev;
9448 copy->size = total_size;
9449 next_copy->vme_prev =
9450 vm_map_copy_to_entry(copy);
9451 next_copy = NULL;
9452 }
9453 base_addr = local_end;
9454 vm_map_lock(dst_map);
9455 if (!vm_map_lookup_entry(dst_map,
9456 local_end, &tmp_entry)) {
9457 vm_map_unlock(dst_map);
9458 return KERN_INVALID_ADDRESS;
9459 }
9460 entry = tmp_entry;
9461 continue;
9462 }
9463 if (dst_end <= entry->vme_end) {
9464 copy_size = dst_end - base_addr;
9465 break;
9466 }
9467
9468 if ((next == vm_map_to_entry(dst_map)) ||
9469 (next->vme_start != entry->vme_end)) {
9470 vm_map_unlock(dst_map);
9471 return KERN_INVALID_ADDRESS;
9472 }
9473
9474 entry = next;
9475 }/* for */
9476
9477 next_copy = NULL;
9478 nentries = 1;
9479
9480 /* adjust the copy object */
9481 if (total_size > copy_size) {
9482 vm_map_size_t local_size = 0;
9483 vm_map_size_t entry_size;
9484
9485 new_offset = copy->offset;
9486 copy_entry = vm_map_copy_first_entry(copy);
9487 while (copy_entry != vm_map_copy_to_entry(copy)) {
9488 entry_size = copy_entry->vme_end -
9489 copy_entry->vme_start;
9490 if ((local_size < copy_size) &&
9491 ((local_size + entry_size)
9492 >= copy_size)) {
9493 vm_map_copy_clip_end(copy, copy_entry,
9494 copy_entry->vme_start +
9495 (copy_size - local_size));
9496 entry_size = copy_entry->vme_end -
9497 copy_entry->vme_start;
9498 local_size += entry_size;
9499 new_offset += entry_size;
9500 }
9501 if (local_size >= copy_size) {
9502 next_copy = copy_entry->vme_next;
9503 copy_entry->vme_next =
9504 vm_map_copy_to_entry(copy);
9505 previous_prev =
9506 copy->cpy_hdr.links.prev;
9507 copy->cpy_hdr.links.prev = copy_entry;
9508 copy->size = copy_size;
9509 remaining_entries =
9510 copy->cpy_hdr.nentries;
9511 remaining_entries -= nentries;
9512 copy->cpy_hdr.nentries = nentries;
9513 break;
9514 } else {
9515 local_size += entry_size;
9516 new_offset += entry_size;
9517 nentries++;
9518 }
9519 copy_entry = copy_entry->vme_next;
9520 }
9521 }
9522
9523 if (aligned) {
9524 pmap_t local_pmap;
9525
9526 if (pmap) {
9527 local_pmap = pmap;
9528 } else {
9529 local_pmap = dst_map->pmap;
9530 }
9531
9532 if ((kr = vm_map_copy_overwrite_aligned(
9533 dst_map, tmp_entry, copy,
9534 base_addr, local_pmap)) != KERN_SUCCESS) {
9535 if (next_copy != NULL) {
9536 copy->cpy_hdr.nentries +=
9537 remaining_entries;
9538 copy->cpy_hdr.links.prev->vme_next =
9539 next_copy;
9540 copy->cpy_hdr.links.prev =
9541 previous_prev;
9542 copy->size += copy_size;
9543 }
9544 return kr;
9545 }
9546 vm_map_unlock(dst_map);
9547 } else {
9548 /*
9549 * Performance gain:
9550 *
9551 * if the copy and dst address are misaligned but the same
9552 * offset within the page we can copy_not_aligned the
9553 * misaligned parts and copy aligned the rest. If they are
9554 * aligned but len is unaligned we simply need to copy
9555 * the end bit unaligned. We'll need to split the misaligned
9556 * bits of the region in this case !
9557 */
9558 /* ALWAYS UNLOCKS THE dst_map MAP */
9559 kr = vm_map_copy_overwrite_unaligned(
9560 dst_map,
9561 tmp_entry,
9562 copy,
9563 base_addr,
9564 discard_on_success);
9565 if (kr != KERN_SUCCESS) {
9566 if (next_copy != NULL) {
9567 copy->cpy_hdr.nentries +=
9568 remaining_entries;
9569 copy->cpy_hdr.links.prev->vme_next =
9570 next_copy;
9571 copy->cpy_hdr.links.prev =
9572 previous_prev;
9573 copy->size += copy_size;
9574 }
9575 return kr;
9576 }
9577 }
9578 total_size -= copy_size;
9579 if (total_size == 0) {
9580 break;
9581 }
9582 base_addr += copy_size;
9583 copy_size = 0;
9584 copy->offset = new_offset;
9585 if (next_copy != NULL) {
9586 copy->cpy_hdr.nentries = remaining_entries;
9587 copy->cpy_hdr.links.next = next_copy;
9588 copy->cpy_hdr.links.prev = previous_prev;
9589 next_copy->vme_prev = vm_map_copy_to_entry(copy);
9590 copy->size = total_size;
9591 }
9592 vm_map_lock(dst_map);
9593 while (TRUE) {
9594 if (!vm_map_lookup_entry(dst_map,
9595 base_addr, &tmp_entry)) {
9596 vm_map_unlock(dst_map);
9597 return KERN_INVALID_ADDRESS;
9598 }
9599 if (tmp_entry->in_transition) {
9600 entry->needs_wakeup = TRUE;
9601 vm_map_entry_wait(dst_map, THREAD_UNINT);
9602 } else {
9603 break;
9604 }
9605 }
9606 vm_map_clip_start(dst_map,
9607 tmp_entry,
9608 vm_map_trunc_page(base_addr,
9609 VM_MAP_PAGE_MASK(dst_map)));
9610
9611 entry = tmp_entry;
9612 } /* while */
9613
9614 /*
9615 * Throw away the vm_map_copy object
9616 */
9617 if (discard_on_success) {
9618 vm_map_copy_discard(copy);
9619 }
9620
9621 return KERN_SUCCESS;
9622 }/* vm_map_copy_overwrite */
9623
9624 kern_return_t
vm_map_copy_overwrite(vm_map_t dst_map,vm_map_offset_t dst_addr,vm_map_copy_t copy,vm_map_size_t copy_size,boolean_t interruptible)9625 vm_map_copy_overwrite(
9626 vm_map_t dst_map,
9627 vm_map_offset_t dst_addr,
9628 vm_map_copy_t copy,
9629 vm_map_size_t copy_size,
9630 boolean_t interruptible)
9631 {
9632 vm_map_size_t head_size, tail_size;
9633 vm_map_copy_t head_copy, tail_copy;
9634 vm_map_offset_t head_addr, tail_addr;
9635 vm_map_entry_t entry;
9636 kern_return_t kr;
9637 vm_map_offset_t effective_page_mask, effective_page_size;
9638 uint16_t copy_page_shift;
9639
9640 head_size = 0;
9641 tail_size = 0;
9642 head_copy = NULL;
9643 tail_copy = NULL;
9644 head_addr = 0;
9645 tail_addr = 0;
9646
9647 if (interruptible ||
9648 copy == VM_MAP_COPY_NULL ||
9649 copy->type != VM_MAP_COPY_ENTRY_LIST) {
9650 /*
9651 * We can't split the "copy" map if we're interruptible
9652 * or if we don't have a "copy" map...
9653 */
9654 blunt_copy:
9655 return vm_map_copy_overwrite_nested(dst_map,
9656 dst_addr,
9657 copy,
9658 interruptible,
9659 (pmap_t) NULL,
9660 TRUE);
9661 }
9662
9663 copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy);
9664 if (copy_page_shift < PAGE_SHIFT ||
9665 VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
9666 goto blunt_copy;
9667 }
9668
9669 if (VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
9670 effective_page_mask = VM_MAP_PAGE_MASK(dst_map);
9671 } else {
9672 effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
9673 effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
9674 effective_page_mask);
9675 }
9676 effective_page_size = effective_page_mask + 1;
9677
9678 if (copy_size < VM_MAP_COPY_OVERWRITE_OPTIMIZATION_THRESHOLD_PAGES * effective_page_size) {
9679 /*
9680 * Too small to bother with optimizing...
9681 */
9682 goto blunt_copy;
9683 }
9684
9685 if ((dst_addr & effective_page_mask) !=
9686 (copy->offset & effective_page_mask)) {
9687 /*
9688 * Incompatible mis-alignment of source and destination...
9689 */
9690 goto blunt_copy;
9691 }
9692
9693 /*
9694 * Proper alignment or identical mis-alignment at the beginning.
9695 * Let's try and do a small unaligned copy first (if needed)
9696 * and then an aligned copy for the rest.
9697 */
9698 if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
9699 head_addr = dst_addr;
9700 head_size = (effective_page_size -
9701 (copy->offset & effective_page_mask));
9702 head_size = MIN(head_size, copy_size);
9703 }
9704 if (!vm_map_page_aligned(copy->offset + copy_size,
9705 effective_page_mask)) {
9706 /*
9707 * Mis-alignment at the end.
9708 * Do an aligned copy up to the last page and
9709 * then an unaligned copy for the remaining bytes.
9710 */
9711 tail_size = ((copy->offset + copy_size) &
9712 effective_page_mask);
9713 tail_size = MIN(tail_size, copy_size);
9714 tail_addr = dst_addr + copy_size - tail_size;
9715 assert(tail_addr >= head_addr + head_size);
9716 }
9717 assert(head_size + tail_size <= copy_size);
9718
9719 if (head_size + tail_size == copy_size) {
9720 /*
9721 * It's all unaligned, no optimization possible...
9722 */
9723 goto blunt_copy;
9724 }
9725
9726 /*
9727 * Can't optimize if there are any submaps in the
9728 * destination due to the way we free the "copy" map
9729 * progressively in vm_map_copy_overwrite_nested()
9730 * in that case.
9731 */
9732 vm_map_lock_read(dst_map);
9733 if (!vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
9734 vm_map_unlock_read(dst_map);
9735 goto blunt_copy;
9736 }
9737 for (;
9738 (entry != vm_map_copy_to_entry(copy) &&
9739 entry->vme_start < dst_addr + copy_size);
9740 entry = entry->vme_next) {
9741 if (entry->is_sub_map) {
9742 vm_map_unlock_read(dst_map);
9743 goto blunt_copy;
9744 }
9745 }
9746 vm_map_unlock_read(dst_map);
9747
9748 if (head_size) {
9749 /*
9750 * Unaligned copy of the first "head_size" bytes, to reach
9751 * a page boundary.
9752 */
9753
9754 /*
9755 * Extract "head_copy" out of "copy".
9756 */
9757 head_copy = vm_map_copy_allocate();
9758 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
9759 head_copy->cpy_hdr.entries_pageable =
9760 copy->cpy_hdr.entries_pageable;
9761 vm_map_store_init(&head_copy->cpy_hdr);
9762 head_copy->cpy_hdr.page_shift = copy_page_shift;
9763
9764 entry = vm_map_copy_first_entry(copy);
9765 if (entry->vme_end < copy->offset + head_size) {
9766 head_size = entry->vme_end - copy->offset;
9767 }
9768
9769 head_copy->offset = copy->offset;
9770 head_copy->size = head_size;
9771 copy->offset += head_size;
9772 copy->size -= head_size;
9773 copy_size -= head_size;
9774 assert(copy_size > 0);
9775
9776 vm_map_copy_clip_end(copy, entry, copy->offset);
9777 vm_map_copy_entry_unlink(copy, entry);
9778 vm_map_copy_entry_link(head_copy,
9779 vm_map_copy_to_entry(head_copy),
9780 entry);
9781
9782 /*
9783 * Do the unaligned copy.
9784 */
9785 kr = vm_map_copy_overwrite_nested(dst_map,
9786 head_addr,
9787 head_copy,
9788 interruptible,
9789 (pmap_t) NULL,
9790 FALSE);
9791 if (kr != KERN_SUCCESS) {
9792 goto done;
9793 }
9794 }
9795
9796 if (tail_size) {
9797 /*
9798 * Extract "tail_copy" out of "copy".
9799 */
9800 tail_copy = vm_map_copy_allocate();
9801 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
9802 tail_copy->cpy_hdr.entries_pageable =
9803 copy->cpy_hdr.entries_pageable;
9804 vm_map_store_init(&tail_copy->cpy_hdr);
9805 tail_copy->cpy_hdr.page_shift = copy_page_shift;
9806
9807 tail_copy->offset = copy->offset + copy_size - tail_size;
9808 tail_copy->size = tail_size;
9809
9810 copy->size -= tail_size;
9811 copy_size -= tail_size;
9812 assert(copy_size > 0);
9813
9814 entry = vm_map_copy_last_entry(copy);
9815 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
9816 entry = vm_map_copy_last_entry(copy);
9817 vm_map_copy_entry_unlink(copy, entry);
9818 vm_map_copy_entry_link(tail_copy,
9819 vm_map_copy_last_entry(tail_copy),
9820 entry);
9821 }
9822
9823 /*
9824 * If we are here from ipc_kmsg_copyout_ool_descriptor(),
9825 * we want to avoid TOCTOU issues w.r.t copy->size but
9826 * we don't need to change vm_map_copy_overwrite_nested()
9827 * and all other vm_map_copy_overwrite variants.
9828 *
9829 * So we assign the original copy_size that was passed into
9830 * this routine back to copy.
9831 *
9832 * This use of local 'copy_size' passed into this routine is
9833 * to try and protect against TOCTOU attacks where the kernel
9834 * has been exploited. We don't expect this to be an issue
9835 * during normal system operation.
9836 */
9837 assertf(copy->size == copy_size,
9838 "Mismatch of copy sizes. Expected 0x%llx, Got 0x%llx\n", (uint64_t) copy_size, (uint64_t) copy->size);
9839 copy->size = copy_size;
9840
9841 /*
9842 * Copy most (or possibly all) of the data.
9843 */
9844 kr = vm_map_copy_overwrite_nested(dst_map,
9845 dst_addr + head_size,
9846 copy,
9847 interruptible,
9848 (pmap_t) NULL,
9849 FALSE);
9850 if (kr != KERN_SUCCESS) {
9851 goto done;
9852 }
9853
9854 if (tail_size) {
9855 kr = vm_map_copy_overwrite_nested(dst_map,
9856 tail_addr,
9857 tail_copy,
9858 interruptible,
9859 (pmap_t) NULL,
9860 FALSE);
9861 }
9862
9863 done:
9864 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9865 if (kr == KERN_SUCCESS) {
9866 /*
9867 * Discard all the copy maps.
9868 */
9869 if (head_copy) {
9870 vm_map_copy_discard(head_copy);
9871 head_copy = NULL;
9872 }
9873 vm_map_copy_discard(copy);
9874 if (tail_copy) {
9875 vm_map_copy_discard(tail_copy);
9876 tail_copy = NULL;
9877 }
9878 } else {
9879 /*
9880 * Re-assemble the original copy map.
9881 */
9882 if (head_copy) {
9883 entry = vm_map_copy_first_entry(head_copy);
9884 vm_map_copy_entry_unlink(head_copy, entry);
9885 vm_map_copy_entry_link(copy,
9886 vm_map_copy_to_entry(copy),
9887 entry);
9888 copy->offset -= head_size;
9889 copy->size += head_size;
9890 vm_map_copy_discard(head_copy);
9891 head_copy = NULL;
9892 }
9893 if (tail_copy) {
9894 entry = vm_map_copy_last_entry(tail_copy);
9895 vm_map_copy_entry_unlink(tail_copy, entry);
9896 vm_map_copy_entry_link(copy,
9897 vm_map_copy_last_entry(copy),
9898 entry);
9899 copy->size += tail_size;
9900 vm_map_copy_discard(tail_copy);
9901 tail_copy = NULL;
9902 }
9903 }
9904 return kr;
9905 }
9906
9907
9908 /*
9909 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
9910 *
9911 * Decription:
9912 * Physically copy unaligned data
9913 *
9914 * Implementation:
9915 * Unaligned parts of pages have to be physically copied. We use
9916 * a modified form of vm_fault_copy (which understands none-aligned
9917 * page offsets and sizes) to do the copy. We attempt to copy as
9918 * much memory in one go as possibly, however vm_fault_copy copies
9919 * within 1 memory object so we have to find the smaller of "amount left"
9920 * "source object data size" and "target object data size". With
9921 * unaligned data we don't need to split regions, therefore the source
9922 * (copy) object should be one map entry, the target range may be split
9923 * over multiple map entries however. In any event we are pessimistic
9924 * about these assumptions.
9925 *
9926 * Assumptions:
9927 * dst_map is locked on entry and is return locked on success,
9928 * unlocked on error.
9929 */
9930
9931 static kern_return_t
vm_map_copy_overwrite_unaligned(vm_map_t dst_map,vm_map_entry_t entry,vm_map_copy_t copy,vm_map_offset_t start,boolean_t discard_on_success)9932 vm_map_copy_overwrite_unaligned(
9933 vm_map_t dst_map,
9934 vm_map_entry_t entry,
9935 vm_map_copy_t copy,
9936 vm_map_offset_t start,
9937 boolean_t discard_on_success)
9938 {
9939 vm_map_entry_t copy_entry;
9940 vm_map_entry_t copy_entry_next;
9941 vm_map_version_t version;
9942 vm_object_t dst_object;
9943 vm_object_offset_t dst_offset;
9944 vm_object_offset_t src_offset;
9945 vm_object_offset_t entry_offset;
9946 vm_map_offset_t entry_end;
9947 vm_map_size_t src_size,
9948 dst_size,
9949 copy_size,
9950 amount_left;
9951 kern_return_t kr = KERN_SUCCESS;
9952
9953
9954 copy_entry = vm_map_copy_first_entry(copy);
9955
9956 vm_map_lock_write_to_read(dst_map);
9957
9958 src_offset = copy->offset - trunc_page_mask_64(copy->offset, VM_MAP_COPY_PAGE_MASK(copy));
9959 amount_left = copy->size;
9960 /*
9961 * unaligned so we never clipped this entry, we need the offset into
9962 * the vm_object not just the data.
9963 */
9964 while (amount_left > 0) {
9965 if (entry == vm_map_to_entry(dst_map)) {
9966 vm_map_unlock_read(dst_map);
9967 return KERN_INVALID_ADDRESS;
9968 }
9969
9970 /* "start" must be within the current map entry */
9971 assert((start >= entry->vme_start) && (start < entry->vme_end));
9972
9973 dst_offset = start - entry->vme_start;
9974
9975 dst_size = entry->vme_end - start;
9976
9977 src_size = copy_entry->vme_end -
9978 (copy_entry->vme_start + src_offset);
9979
9980 if (dst_size < src_size) {
9981 /*
9982 * we can only copy dst_size bytes before
9983 * we have to get the next destination entry
9984 */
9985 copy_size = dst_size;
9986 } else {
9987 /*
9988 * we can only copy src_size bytes before
9989 * we have to get the next source copy entry
9990 */
9991 copy_size = src_size;
9992 }
9993
9994 if (copy_size > amount_left) {
9995 copy_size = amount_left;
9996 }
9997 /*
9998 * Entry needs copy, create a shadow shadow object for
9999 * Copy on write region.
10000 */
10001 if (entry->needs_copy &&
10002 ((entry->protection & VM_PROT_WRITE) != 0)) {
10003 if (vm_map_lock_read_to_write(dst_map)) {
10004 vm_map_lock_read(dst_map);
10005 goto RetryLookup;
10006 }
10007 VME_OBJECT_SHADOW(entry,
10008 (vm_map_size_t)(entry->vme_end
10009 - entry->vme_start));
10010 entry->needs_copy = FALSE;
10011 vm_map_lock_write_to_read(dst_map);
10012 }
10013 dst_object = VME_OBJECT(entry);
10014 /*
10015 * unlike with the virtual (aligned) copy we're going
10016 * to fault on it therefore we need a target object.
10017 */
10018 if (dst_object == VM_OBJECT_NULL) {
10019 if (vm_map_lock_read_to_write(dst_map)) {
10020 vm_map_lock_read(dst_map);
10021 goto RetryLookup;
10022 }
10023 dst_object = vm_object_allocate((vm_map_size_t)
10024 entry->vme_end - entry->vme_start);
10025 VME_OBJECT_SET(entry, dst_object);
10026 VME_OFFSET_SET(entry, 0);
10027 assert(entry->use_pmap);
10028 vm_map_lock_write_to_read(dst_map);
10029 }
10030 /*
10031 * Take an object reference and unlock map. The "entry" may
10032 * disappear or change when the map is unlocked.
10033 */
10034 vm_object_reference(dst_object);
10035 version.main_timestamp = dst_map->timestamp;
10036 entry_offset = VME_OFFSET(entry);
10037 entry_end = entry->vme_end;
10038 vm_map_unlock_read(dst_map);
10039 /*
10040 * Copy as much as possible in one pass
10041 */
10042 kr = vm_fault_copy(
10043 VME_OBJECT(copy_entry),
10044 VME_OFFSET(copy_entry) + src_offset,
10045 ©_size,
10046 dst_object,
10047 entry_offset + dst_offset,
10048 dst_map,
10049 &version,
10050 THREAD_UNINT );
10051
10052 start += copy_size;
10053 src_offset += copy_size;
10054 amount_left -= copy_size;
10055 /*
10056 * Release the object reference
10057 */
10058 vm_object_deallocate(dst_object);
10059 /*
10060 * If a hard error occurred, return it now
10061 */
10062 if (kr != KERN_SUCCESS) {
10063 return kr;
10064 }
10065
10066 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
10067 || amount_left == 0) {
10068 /*
10069 * all done with this copy entry, dispose.
10070 */
10071 copy_entry_next = copy_entry->vme_next;
10072
10073 if (discard_on_success) {
10074 vm_map_copy_entry_unlink(copy, copy_entry);
10075 assert(!copy_entry->is_sub_map);
10076 vm_object_deallocate(VME_OBJECT(copy_entry));
10077 vm_map_copy_entry_dispose(copy, copy_entry);
10078 }
10079
10080 if (copy_entry_next == vm_map_copy_to_entry(copy) &&
10081 amount_left) {
10082 /*
10083 * not finished copying but run out of source
10084 */
10085 return KERN_INVALID_ADDRESS;
10086 }
10087
10088 copy_entry = copy_entry_next;
10089
10090 src_offset = 0;
10091 }
10092
10093 if (amount_left == 0) {
10094 return KERN_SUCCESS;
10095 }
10096
10097 vm_map_lock_read(dst_map);
10098 if (version.main_timestamp == dst_map->timestamp) {
10099 if (start == entry_end) {
10100 /*
10101 * destination region is split. Use the version
10102 * information to avoid a lookup in the normal
10103 * case.
10104 */
10105 entry = entry->vme_next;
10106 /*
10107 * should be contiguous. Fail if we encounter
10108 * a hole in the destination.
10109 */
10110 if (start != entry->vme_start) {
10111 vm_map_unlock_read(dst_map);
10112 return KERN_INVALID_ADDRESS;
10113 }
10114 }
10115 } else {
10116 /*
10117 * Map version check failed.
10118 * we must lookup the entry because somebody
10119 * might have changed the map behind our backs.
10120 */
10121 RetryLookup:
10122 if (!vm_map_lookup_entry(dst_map, start, &entry)) {
10123 vm_map_unlock_read(dst_map);
10124 return KERN_INVALID_ADDRESS;
10125 }
10126 }
10127 }/* while */
10128
10129 return KERN_SUCCESS;
10130 }/* vm_map_copy_overwrite_unaligned */
10131
10132 /*
10133 * Routine: vm_map_copy_overwrite_aligned [internal use only]
10134 *
10135 * Description:
10136 * Does all the vm_trickery possible for whole pages.
10137 *
10138 * Implementation:
10139 *
10140 * If there are no permanent objects in the destination,
10141 * and the source and destination map entry zones match,
10142 * and the destination map entry is not shared,
10143 * then the map entries can be deleted and replaced
10144 * with those from the copy. The following code is the
10145 * basic idea of what to do, but there are lots of annoying
10146 * little details about getting protection and inheritance
10147 * right. Should add protection, inheritance, and sharing checks
10148 * to the above pass and make sure that no wiring is involved.
10149 */
10150
10151 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
10152 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
10153 int vm_map_copy_overwrite_aligned_src_large = 0;
10154
10155 static kern_return_t
vm_map_copy_overwrite_aligned(vm_map_t dst_map,vm_map_entry_t tmp_entry,vm_map_copy_t copy,vm_map_offset_t start,__unused pmap_t pmap)10156 vm_map_copy_overwrite_aligned(
10157 vm_map_t dst_map,
10158 vm_map_entry_t tmp_entry,
10159 vm_map_copy_t copy,
10160 vm_map_offset_t start,
10161 __unused pmap_t pmap)
10162 {
10163 vm_object_t object;
10164 vm_map_entry_t copy_entry;
10165 vm_map_size_t copy_size;
10166 vm_map_size_t size;
10167 vm_map_entry_t entry;
10168
10169 while ((copy_entry = vm_map_copy_first_entry(copy))
10170 != vm_map_copy_to_entry(copy)) {
10171 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
10172
10173 entry = tmp_entry;
10174 if (entry->is_sub_map) {
10175 /* unnested when clipped earlier */
10176 assert(!entry->use_pmap);
10177 }
10178 if (entry == vm_map_to_entry(dst_map)) {
10179 vm_map_unlock(dst_map);
10180 return KERN_INVALID_ADDRESS;
10181 }
10182 size = (entry->vme_end - entry->vme_start);
10183 /*
10184 * Make sure that no holes popped up in the
10185 * address map, and that the protection is
10186 * still valid, in case the map was unlocked
10187 * earlier.
10188 */
10189
10190 if ((entry->vme_start != start) || ((entry->is_sub_map)
10191 && !entry->needs_copy)) {
10192 vm_map_unlock(dst_map);
10193 return KERN_INVALID_ADDRESS;
10194 }
10195 assert(entry != vm_map_to_entry(dst_map));
10196
10197 /*
10198 * Check protection again
10199 */
10200
10201 if (!(entry->protection & VM_PROT_WRITE)) {
10202 vm_map_unlock(dst_map);
10203 return KERN_PROTECTION_FAILURE;
10204 }
10205
10206 if (!vm_map_entry_is_overwritable(dst_map, entry)) {
10207 vm_map_unlock(dst_map);
10208 return KERN_PROTECTION_FAILURE;
10209 }
10210
10211 /*
10212 * Adjust to source size first
10213 */
10214
10215 if (copy_size < size) {
10216 if (entry->map_aligned &&
10217 !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
10218 VM_MAP_PAGE_MASK(dst_map))) {
10219 /* no longer map-aligned */
10220 entry->map_aligned = FALSE;
10221 }
10222 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
10223 size = copy_size;
10224 }
10225
10226 /*
10227 * Adjust to destination size
10228 */
10229
10230 if (size < copy_size) {
10231 vm_map_copy_clip_end(copy, copy_entry,
10232 copy_entry->vme_start + size);
10233 copy_size = size;
10234 }
10235
10236 assert((entry->vme_end - entry->vme_start) == size);
10237 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
10238 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
10239
10240 /*
10241 * If the destination contains temporary unshared memory,
10242 * we can perform the copy by throwing it away and
10243 * installing the source data.
10244 */
10245
10246 object = VME_OBJECT(entry);
10247 if ((!entry->is_shared &&
10248 ((object == VM_OBJECT_NULL) ||
10249 (object->internal && !object->true_share))) ||
10250 entry->needs_copy) {
10251 vm_object_t old_object = VME_OBJECT(entry);
10252 vm_object_offset_t old_offset = VME_OFFSET(entry);
10253 vm_object_offset_t offset;
10254
10255 /*
10256 * Ensure that the source and destination aren't
10257 * identical
10258 */
10259 if (old_object == VME_OBJECT(copy_entry) &&
10260 old_offset == VME_OFFSET(copy_entry)) {
10261 vm_map_copy_entry_unlink(copy, copy_entry);
10262 vm_map_copy_entry_dispose(copy, copy_entry);
10263
10264 if (old_object != VM_OBJECT_NULL) {
10265 vm_object_deallocate(old_object);
10266 }
10267
10268 start = tmp_entry->vme_end;
10269 tmp_entry = tmp_entry->vme_next;
10270 continue;
10271 }
10272
10273 #if XNU_TARGET_OS_OSX
10274 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
10275 #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
10276 if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
10277 VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
10278 copy_size <= __TRADEOFF1_COPY_SIZE) {
10279 /*
10280 * Virtual vs. Physical copy tradeoff #1.
10281 *
10282 * Copying only a few pages out of a large
10283 * object: do a physical copy instead of
10284 * a virtual copy, to avoid possibly keeping
10285 * the entire large object alive because of
10286 * those few copy-on-write pages.
10287 */
10288 vm_map_copy_overwrite_aligned_src_large++;
10289 goto slow_copy;
10290 }
10291 #endif /* XNU_TARGET_OS_OSX */
10292
10293 if ((dst_map->pmap != kernel_pmap) &&
10294 (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
10295 (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_MEDIUM)) {
10296 vm_object_t new_object, new_shadow;
10297
10298 /*
10299 * We're about to map something over a mapping
10300 * established by malloc()...
10301 */
10302 new_object = VME_OBJECT(copy_entry);
10303 if (new_object != VM_OBJECT_NULL) {
10304 vm_object_lock_shared(new_object);
10305 }
10306 while (new_object != VM_OBJECT_NULL &&
10307 #if XNU_TARGET_OS_OSX
10308 !new_object->true_share &&
10309 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10310 #endif /* XNU_TARGET_OS_OSX */
10311 new_object->internal) {
10312 new_shadow = new_object->shadow;
10313 if (new_shadow == VM_OBJECT_NULL) {
10314 break;
10315 }
10316 vm_object_lock_shared(new_shadow);
10317 vm_object_unlock(new_object);
10318 new_object = new_shadow;
10319 }
10320 if (new_object != VM_OBJECT_NULL) {
10321 if (!new_object->internal) {
10322 /*
10323 * The new mapping is backed
10324 * by an external object. We
10325 * don't want malloc'ed memory
10326 * to be replaced with such a
10327 * non-anonymous mapping, so
10328 * let's go off the optimized
10329 * path...
10330 */
10331 vm_map_copy_overwrite_aligned_src_not_internal++;
10332 vm_object_unlock(new_object);
10333 goto slow_copy;
10334 }
10335 #if XNU_TARGET_OS_OSX
10336 if (new_object->true_share ||
10337 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
10338 /*
10339 * Same if there's a "true_share"
10340 * object in the shadow chain, or
10341 * an object with a non-default
10342 * (SYMMETRIC) copy strategy.
10343 */
10344 vm_map_copy_overwrite_aligned_src_not_symmetric++;
10345 vm_object_unlock(new_object);
10346 goto slow_copy;
10347 }
10348 #endif /* XNU_TARGET_OS_OSX */
10349 vm_object_unlock(new_object);
10350 }
10351 /*
10352 * The new mapping is still backed by
10353 * anonymous (internal) memory, so it's
10354 * OK to substitute it for the original
10355 * malloc() mapping.
10356 */
10357 }
10358
10359 if (old_object != VM_OBJECT_NULL) {
10360 if (entry->is_sub_map) {
10361 if (entry->use_pmap) {
10362 #ifndef NO_NESTED_PMAP
10363 pmap_unnest(dst_map->pmap,
10364 (addr64_t)entry->vme_start,
10365 entry->vme_end - entry->vme_start);
10366 #endif /* NO_NESTED_PMAP */
10367 if (dst_map->mapped_in_other_pmaps) {
10368 /* clean up parent */
10369 /* map/maps */
10370 vm_map_submap_pmap_clean(
10371 dst_map, entry->vme_start,
10372 entry->vme_end,
10373 VME_SUBMAP(entry),
10374 VME_OFFSET(entry));
10375 }
10376 } else {
10377 vm_map_submap_pmap_clean(
10378 dst_map, entry->vme_start,
10379 entry->vme_end,
10380 VME_SUBMAP(entry),
10381 VME_OFFSET(entry));
10382 }
10383 vm_map_deallocate(VME_SUBMAP(entry));
10384 } else {
10385 if (dst_map->mapped_in_other_pmaps) {
10386 vm_object_pmap_protect_options(
10387 VME_OBJECT(entry),
10388 VME_OFFSET(entry),
10389 entry->vme_end
10390 - entry->vme_start,
10391 PMAP_NULL,
10392 PAGE_SIZE,
10393 entry->vme_start,
10394 VM_PROT_NONE,
10395 PMAP_OPTIONS_REMOVE);
10396 } else {
10397 pmap_remove_options(
10398 dst_map->pmap,
10399 (addr64_t)(entry->vme_start),
10400 (addr64_t)(entry->vme_end),
10401 PMAP_OPTIONS_REMOVE);
10402 }
10403 vm_object_deallocate(old_object);
10404 }
10405 }
10406
10407 if (entry->iokit_acct) {
10408 /* keep using iokit accounting */
10409 entry->use_pmap = FALSE;
10410 } else {
10411 /* use pmap accounting */
10412 entry->use_pmap = TRUE;
10413 }
10414 entry->is_sub_map = FALSE;
10415 VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
10416 object = VME_OBJECT(entry);
10417 entry->needs_copy = copy_entry->needs_copy;
10418 entry->wired_count = 0;
10419 entry->user_wired_count = 0;
10420 offset = VME_OFFSET(copy_entry);
10421 VME_OFFSET_SET(entry, offset);
10422
10423 vm_map_copy_entry_unlink(copy, copy_entry);
10424 vm_map_copy_entry_dispose(copy, copy_entry);
10425
10426 /*
10427 * we could try to push pages into the pmap at this point, BUT
10428 * this optimization only saved on average 2 us per page if ALL
10429 * the pages in the source were currently mapped
10430 * and ALL the pages in the dest were touched, if there were fewer
10431 * than 2/3 of the pages touched, this optimization actually cost more cycles
10432 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
10433 */
10434
10435 /*
10436 * Set up for the next iteration. The map
10437 * has not been unlocked, so the next
10438 * address should be at the end of this
10439 * entry, and the next map entry should be
10440 * the one following it.
10441 */
10442
10443 start = tmp_entry->vme_end;
10444 tmp_entry = tmp_entry->vme_next;
10445 } else {
10446 vm_map_version_t version;
10447 vm_object_t dst_object;
10448 vm_object_offset_t dst_offset;
10449 kern_return_t r;
10450
10451 slow_copy:
10452 if (entry->needs_copy) {
10453 VME_OBJECT_SHADOW(entry,
10454 (entry->vme_end -
10455 entry->vme_start));
10456 entry->needs_copy = FALSE;
10457 }
10458
10459 dst_object = VME_OBJECT(entry);
10460 dst_offset = VME_OFFSET(entry);
10461
10462 /*
10463 * Take an object reference, and record
10464 * the map version information so that the
10465 * map can be safely unlocked.
10466 */
10467
10468 if (dst_object == VM_OBJECT_NULL) {
10469 /*
10470 * We would usually have just taken the
10471 * optimized path above if the destination
10472 * object has not been allocated yet. But we
10473 * now disable that optimization if the copy
10474 * entry's object is not backed by anonymous
10475 * memory to avoid replacing malloc'ed
10476 * (i.e. re-usable) anonymous memory with a
10477 * not-so-anonymous mapping.
10478 * So we have to handle this case here and
10479 * allocate a new VM object for this map entry.
10480 */
10481 dst_object = vm_object_allocate(
10482 entry->vme_end - entry->vme_start);
10483 dst_offset = 0;
10484 VME_OBJECT_SET(entry, dst_object);
10485 VME_OFFSET_SET(entry, dst_offset);
10486 assert(entry->use_pmap);
10487 }
10488
10489 vm_object_reference(dst_object);
10490
10491 /* account for unlock bumping up timestamp */
10492 version.main_timestamp = dst_map->timestamp + 1;
10493
10494 vm_map_unlock(dst_map);
10495
10496 /*
10497 * Copy as much as possible in one pass
10498 */
10499
10500 copy_size = size;
10501 r = vm_fault_copy(
10502 VME_OBJECT(copy_entry),
10503 VME_OFFSET(copy_entry),
10504 ©_size,
10505 dst_object,
10506 dst_offset,
10507 dst_map,
10508 &version,
10509 THREAD_UNINT );
10510
10511 /*
10512 * Release the object reference
10513 */
10514
10515 vm_object_deallocate(dst_object);
10516
10517 /*
10518 * If a hard error occurred, return it now
10519 */
10520
10521 if (r != KERN_SUCCESS) {
10522 return r;
10523 }
10524
10525 if (copy_size != 0) {
10526 /*
10527 * Dispose of the copied region
10528 */
10529
10530 vm_map_copy_clip_end(copy, copy_entry,
10531 copy_entry->vme_start + copy_size);
10532 vm_map_copy_entry_unlink(copy, copy_entry);
10533 vm_object_deallocate(VME_OBJECT(copy_entry));
10534 vm_map_copy_entry_dispose(copy, copy_entry);
10535 }
10536
10537 /*
10538 * Pick up in the destination map where we left off.
10539 *
10540 * Use the version information to avoid a lookup
10541 * in the normal case.
10542 */
10543
10544 start += copy_size;
10545 vm_map_lock(dst_map);
10546 if (version.main_timestamp == dst_map->timestamp &&
10547 copy_size != 0) {
10548 /* We can safely use saved tmp_entry value */
10549
10550 if (tmp_entry->map_aligned &&
10551 !VM_MAP_PAGE_ALIGNED(
10552 start,
10553 VM_MAP_PAGE_MASK(dst_map))) {
10554 /* no longer map-aligned */
10555 tmp_entry->map_aligned = FALSE;
10556 }
10557 vm_map_clip_end(dst_map, tmp_entry, start);
10558 tmp_entry = tmp_entry->vme_next;
10559 } else {
10560 /* Must do lookup of tmp_entry */
10561
10562 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
10563 vm_map_unlock(dst_map);
10564 return KERN_INVALID_ADDRESS;
10565 }
10566 if (tmp_entry->map_aligned &&
10567 !VM_MAP_PAGE_ALIGNED(
10568 start,
10569 VM_MAP_PAGE_MASK(dst_map))) {
10570 /* no longer map-aligned */
10571 tmp_entry->map_aligned = FALSE;
10572 }
10573 vm_map_clip_start(dst_map, tmp_entry, start);
10574 }
10575 }
10576 }/* while */
10577
10578 return KERN_SUCCESS;
10579 }/* vm_map_copy_overwrite_aligned */
10580
10581 /*
10582 * Routine: vm_map_copyin_kernel_buffer [internal use only]
10583 *
10584 * Description:
10585 * Copy in data to a kernel buffer from space in the
10586 * source map. The original space may be optionally
10587 * deallocated.
10588 *
10589 * If successful, returns a new copy object.
10590 */
10591 static kern_return_t
vm_map_copyin_kernel_buffer(vm_map_t src_map,vm_map_offset_t src_addr,vm_map_size_t len,boolean_t src_destroy,vm_map_copy_t * copy_result)10592 vm_map_copyin_kernel_buffer(
10593 vm_map_t src_map,
10594 vm_map_offset_t src_addr,
10595 vm_map_size_t len,
10596 boolean_t src_destroy,
10597 vm_map_copy_t *copy_result)
10598 {
10599 kern_return_t kr;
10600 vm_map_copy_t copy;
10601
10602 if (len > msg_ool_size_small) {
10603 return KERN_INVALID_ARGUMENT;
10604 }
10605
10606 copy = zalloc_flags(vm_map_copy_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
10607 copy->cpy_kdata = kalloc_data(len, Z_WAITOK);
10608 if (copy->cpy_kdata == NULL) {
10609 zfree(vm_map_copy_zone, copy);
10610 return KERN_RESOURCE_SHORTAGE;
10611 }
10612
10613 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
10614 copy->size = len;
10615 copy->offset = 0;
10616
10617 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
10618 if (kr != KERN_SUCCESS) {
10619 kfree_data(copy->cpy_kdata, len);
10620 zfree(vm_map_copy_zone, copy);
10621 return kr;
10622 }
10623 if (src_destroy) {
10624 (void) vm_map_remove(
10625 src_map,
10626 vm_map_trunc_page(src_addr,
10627 VM_MAP_PAGE_MASK(src_map)),
10628 vm_map_round_page(src_addr + len,
10629 VM_MAP_PAGE_MASK(src_map)),
10630 (VM_MAP_REMOVE_INTERRUPTIBLE |
10631 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
10632 ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : VM_MAP_REMOVE_NO_FLAGS)));
10633 }
10634 *copy_result = copy;
10635 return KERN_SUCCESS;
10636 }
10637
10638 /*
10639 * Routine: vm_map_copyout_kernel_buffer [internal use only]
10640 *
10641 * Description:
10642 * Copy out data from a kernel buffer into space in the
10643 * destination map. The space may be otpionally dynamically
10644 * allocated.
10645 *
10646 * If successful, consumes the copy object.
10647 * Otherwise, the caller is responsible for it.
10648 */
10649 static int vm_map_copyout_kernel_buffer_failures = 0;
10650 static kern_return_t
vm_map_copyout_kernel_buffer(vm_map_t map,vm_map_address_t * addr,vm_map_copy_t copy,vm_map_size_t copy_size,boolean_t overwrite,boolean_t consume_on_success)10651 vm_map_copyout_kernel_buffer(
10652 vm_map_t map,
10653 vm_map_address_t *addr, /* IN/OUT */
10654 vm_map_copy_t copy,
10655 vm_map_size_t copy_size,
10656 boolean_t overwrite,
10657 boolean_t consume_on_success)
10658 {
10659 kern_return_t kr = KERN_SUCCESS;
10660 thread_t thread = current_thread();
10661
10662 assert(copy->size == copy_size);
10663
10664 /*
10665 * check for corrupted vm_map_copy structure
10666 */
10667 if (copy_size > msg_ool_size_small || copy->offset) {
10668 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
10669 (long long)copy->size, (long long)copy->offset);
10670 }
10671
10672 if (!overwrite) {
10673 /*
10674 * Allocate space in the target map for the data
10675 */
10676 *addr = 0;
10677 kr = vm_map_enter(map,
10678 addr,
10679 vm_map_round_page(copy_size,
10680 VM_MAP_PAGE_MASK(map)),
10681 (vm_map_offset_t) 0,
10682 VM_FLAGS_ANYWHERE,
10683 VM_MAP_KERNEL_FLAGS_NONE,
10684 VM_KERN_MEMORY_NONE,
10685 VM_OBJECT_NULL,
10686 (vm_object_offset_t) 0,
10687 FALSE,
10688 VM_PROT_DEFAULT,
10689 VM_PROT_ALL,
10690 VM_INHERIT_DEFAULT);
10691 if (kr != KERN_SUCCESS) {
10692 return kr;
10693 }
10694 #if KASAN
10695 if (map->pmap == kernel_pmap) {
10696 kasan_notify_address(*addr, copy->size);
10697 }
10698 #endif
10699 }
10700
10701 /*
10702 * Copyout the data from the kernel buffer to the target map.
10703 */
10704 if (thread->map == map) {
10705 /*
10706 * If the target map is the current map, just do
10707 * the copy.
10708 */
10709 assert((vm_size_t)copy_size == copy_size);
10710 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10711 kr = KERN_INVALID_ADDRESS;
10712 }
10713 } else {
10714 vm_map_t oldmap;
10715
10716 /*
10717 * If the target map is another map, assume the
10718 * target's address space identity for the duration
10719 * of the copy.
10720 */
10721 vm_map_reference(map);
10722 oldmap = vm_map_switch(map);
10723
10724 assert((vm_size_t)copy_size == copy_size);
10725 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10726 vm_map_copyout_kernel_buffer_failures++;
10727 kr = KERN_INVALID_ADDRESS;
10728 }
10729
10730 (void) vm_map_switch(oldmap);
10731 vm_map_deallocate(map);
10732 }
10733
10734 if (kr != KERN_SUCCESS) {
10735 /* the copy failed, clean up */
10736 if (!overwrite) {
10737 /*
10738 * Deallocate the space we allocated in the target map.
10739 */
10740 (void) vm_map_remove(
10741 map,
10742 vm_map_trunc_page(*addr,
10743 VM_MAP_PAGE_MASK(map)),
10744 vm_map_round_page((*addr +
10745 vm_map_round_page(copy_size,
10746 VM_MAP_PAGE_MASK(map))),
10747 VM_MAP_PAGE_MASK(map)),
10748 VM_MAP_REMOVE_NO_FLAGS);
10749 *addr = 0;
10750 }
10751 } else {
10752 /* copy was successful, dicard the copy structure */
10753 if (consume_on_success) {
10754 kfree_data(copy->cpy_kdata, copy_size);
10755 zfree(vm_map_copy_zone, copy);
10756 }
10757 }
10758
10759 return kr;
10760 }
10761
10762 /*
10763 * Routine: vm_map_copy_insert [internal use only]
10764 *
10765 * Description:
10766 * Link a copy chain ("copy") into a map at the
10767 * specified location (after "where").
10768 * Side effects:
10769 * The copy chain is destroyed.
10770 */
10771 static void
vm_map_copy_insert(vm_map_t map,vm_map_entry_t after_where,vm_map_copy_t copy)10772 vm_map_copy_insert(
10773 vm_map_t map,
10774 vm_map_entry_t after_where,
10775 vm_map_copy_t copy)
10776 {
10777 vm_map_entry_t entry;
10778
10779 while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
10780 entry = vm_map_copy_first_entry(copy);
10781 vm_map_copy_entry_unlink(copy, entry);
10782 vm_map_store_entry_link(map, after_where, entry,
10783 VM_MAP_KERNEL_FLAGS_NONE);
10784 after_where = entry;
10785 }
10786 zfree(vm_map_copy_zone, copy);
10787 }
10788
10789 void
vm_map_copy_remap(vm_map_t map,vm_map_entry_t where,vm_map_copy_t copy,vm_map_offset_t adjustment,vm_prot_t cur_prot,vm_prot_t max_prot,vm_inherit_t inheritance)10790 vm_map_copy_remap(
10791 vm_map_t map,
10792 vm_map_entry_t where,
10793 vm_map_copy_t copy,
10794 vm_map_offset_t adjustment,
10795 vm_prot_t cur_prot,
10796 vm_prot_t max_prot,
10797 vm_inherit_t inheritance)
10798 {
10799 vm_map_entry_t copy_entry, new_entry;
10800
10801 for (copy_entry = vm_map_copy_first_entry(copy);
10802 copy_entry != vm_map_copy_to_entry(copy);
10803 copy_entry = copy_entry->vme_next) {
10804 /* get a new VM map entry for the map */
10805 new_entry = vm_map_entry_create(map,
10806 !map->hdr.entries_pageable);
10807 /* copy the "copy entry" to the new entry */
10808 vm_map_entry_copy(map, new_entry, copy_entry);
10809 /* adjust "start" and "end" */
10810 new_entry->vme_start += adjustment;
10811 new_entry->vme_end += adjustment;
10812 /* clear some attributes */
10813 new_entry->inheritance = inheritance;
10814 new_entry->protection = cur_prot;
10815 new_entry->max_protection = max_prot;
10816 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
10817 /* take an extra reference on the entry's "object" */
10818 if (new_entry->is_sub_map) {
10819 assert(!new_entry->use_pmap); /* not nested */
10820 vm_map_lock(VME_SUBMAP(new_entry));
10821 vm_map_reference(VME_SUBMAP(new_entry));
10822 vm_map_unlock(VME_SUBMAP(new_entry));
10823 } else {
10824 vm_object_reference(VME_OBJECT(new_entry));
10825 }
10826 /* insert the new entry in the map */
10827 vm_map_store_entry_link(map, where, new_entry,
10828 VM_MAP_KERNEL_FLAGS_NONE);
10829 /* continue inserting the "copy entries" after the new entry */
10830 where = new_entry;
10831 }
10832 }
10833
10834
10835 /*
10836 * Returns true if *size matches (or is in the range of) copy->size.
10837 * Upon returning true, the *size field is updated with the actual size of the
10838 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
10839 */
10840 boolean_t
vm_map_copy_validate_size(vm_map_t dst_map,vm_map_copy_t copy,vm_map_size_t * size)10841 vm_map_copy_validate_size(
10842 vm_map_t dst_map,
10843 vm_map_copy_t copy,
10844 vm_map_size_t *size)
10845 {
10846 if (copy == VM_MAP_COPY_NULL) {
10847 return FALSE;
10848 }
10849 vm_map_size_t copy_sz = copy->size;
10850 vm_map_size_t sz = *size;
10851 switch (copy->type) {
10852 case VM_MAP_COPY_OBJECT:
10853 case VM_MAP_COPY_KERNEL_BUFFER:
10854 if (sz == copy_sz) {
10855 return TRUE;
10856 }
10857 break;
10858 case VM_MAP_COPY_ENTRY_LIST:
10859 /*
10860 * potential page-size rounding prevents us from exactly
10861 * validating this flavor of vm_map_copy, but we can at least
10862 * assert that it's within a range.
10863 */
10864 if (copy_sz >= sz &&
10865 copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
10866 *size = copy_sz;
10867 return TRUE;
10868 }
10869 break;
10870 default:
10871 break;
10872 }
10873 return FALSE;
10874 }
10875
10876 /*
10877 * Routine: vm_map_copyout_size
10878 *
10879 * Description:
10880 * Copy out a copy chain ("copy") into newly-allocated
10881 * space in the destination map. Uses a prevalidated
10882 * size for the copy object (vm_map_copy_validate_size).
10883 *
10884 * If successful, consumes the copy object.
10885 * Otherwise, the caller is responsible for it.
10886 */
10887 kern_return_t
vm_map_copyout_size(vm_map_t dst_map,vm_map_address_t * dst_addr,vm_map_copy_t copy,vm_map_size_t copy_size)10888 vm_map_copyout_size(
10889 vm_map_t dst_map,
10890 vm_map_address_t *dst_addr, /* OUT */
10891 vm_map_copy_t copy,
10892 vm_map_size_t copy_size)
10893 {
10894 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
10895 TRUE, /* consume_on_success */
10896 VM_PROT_DEFAULT,
10897 VM_PROT_ALL,
10898 VM_INHERIT_DEFAULT);
10899 }
10900
10901 /*
10902 * Routine: vm_map_copyout
10903 *
10904 * Description:
10905 * Copy out a copy chain ("copy") into newly-allocated
10906 * space in the destination map.
10907 *
10908 * If successful, consumes the copy object.
10909 * Otherwise, the caller is responsible for it.
10910 */
10911 kern_return_t
vm_map_copyout(vm_map_t dst_map,vm_map_address_t * dst_addr,vm_map_copy_t copy)10912 vm_map_copyout(
10913 vm_map_t dst_map,
10914 vm_map_address_t *dst_addr, /* OUT */
10915 vm_map_copy_t copy)
10916 {
10917 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
10918 TRUE, /* consume_on_success */
10919 VM_PROT_DEFAULT,
10920 VM_PROT_ALL,
10921 VM_INHERIT_DEFAULT);
10922 }
10923
10924 kern_return_t
vm_map_copyout_internal(vm_map_t dst_map,vm_map_address_t * dst_addr,vm_map_copy_t copy,vm_map_size_t copy_size,boolean_t consume_on_success,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)10925 vm_map_copyout_internal(
10926 vm_map_t dst_map,
10927 vm_map_address_t *dst_addr, /* OUT */
10928 vm_map_copy_t copy,
10929 vm_map_size_t copy_size,
10930 boolean_t consume_on_success,
10931 vm_prot_t cur_protection,
10932 vm_prot_t max_protection,
10933 vm_inherit_t inheritance)
10934 {
10935 vm_map_size_t size;
10936 vm_map_size_t adjustment;
10937 vm_map_offset_t start;
10938 vm_object_offset_t vm_copy_start;
10939 vm_map_entry_t last;
10940 vm_map_entry_t entry;
10941 vm_map_entry_t hole_entry;
10942 vm_map_copy_t original_copy;
10943
10944 /*
10945 * Check for null copy object.
10946 */
10947
10948 if (copy == VM_MAP_COPY_NULL) {
10949 *dst_addr = 0;
10950 return KERN_SUCCESS;
10951 }
10952
10953 /*
10954 * Assert that the vm_map_copy is coming from the right
10955 * zone and hasn't been forged
10956 */
10957 vm_map_copy_require(copy);
10958
10959 if (copy->size != copy_size) {
10960 *dst_addr = 0;
10961 return KERN_FAILURE;
10962 }
10963
10964 /*
10965 * Check for special copy object, created
10966 * by vm_map_copyin_object.
10967 */
10968
10969 if (copy->type == VM_MAP_COPY_OBJECT) {
10970 vm_object_t object = copy->cpy_object;
10971 kern_return_t kr;
10972 vm_object_offset_t offset;
10973
10974 offset = vm_object_trunc_page(copy->offset);
10975 size = vm_map_round_page((copy_size +
10976 (vm_map_size_t)(copy->offset -
10977 offset)),
10978 VM_MAP_PAGE_MASK(dst_map));
10979 *dst_addr = 0;
10980 kr = vm_map_enter(dst_map, dst_addr, size,
10981 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
10982 VM_MAP_KERNEL_FLAGS_NONE,
10983 VM_KERN_MEMORY_NONE,
10984 object, offset, FALSE,
10985 VM_PROT_DEFAULT, VM_PROT_ALL,
10986 VM_INHERIT_DEFAULT);
10987 if (kr != KERN_SUCCESS) {
10988 return kr;
10989 }
10990 /* Account for non-pagealigned copy object */
10991 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
10992 if (consume_on_success) {
10993 zfree(vm_map_copy_zone, copy);
10994 }
10995 return KERN_SUCCESS;
10996 }
10997
10998 /*
10999 * Check for special kernel buffer allocated
11000 * by new_ipc_kmsg_copyin.
11001 */
11002
11003 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
11004 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
11005 copy, copy_size, FALSE,
11006 consume_on_success);
11007 }
11008
11009 original_copy = copy;
11010 if (copy->cpy_hdr.page_shift != VM_MAP_PAGE_SHIFT(dst_map)) {
11011 kern_return_t kr;
11012 vm_map_copy_t target_copy;
11013 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
11014
11015 target_copy = VM_MAP_COPY_NULL;
11016 DEBUG4K_ADJUST("adjusting...\n");
11017 kr = vm_map_copy_adjust_to_target(
11018 copy,
11019 0, /* offset */
11020 copy->size, /* size */
11021 dst_map,
11022 TRUE, /* copy */
11023 &target_copy,
11024 &overmap_start,
11025 &overmap_end,
11026 &trimmed_start);
11027 if (kr != KERN_SUCCESS) {
11028 DEBUG4K_COPY("adjust failed 0x%x\n", kr);
11029 return kr;
11030 }
11031 DEBUG4K_COPY("copy %p (%d 0x%llx 0x%llx) dst_map %p (%d) target_copy %p (%d 0x%llx 0x%llx) overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx\n", copy, copy->cpy_hdr.page_shift, copy->offset, (uint64_t)copy->size, dst_map, VM_MAP_PAGE_SHIFT(dst_map), target_copy, target_copy->cpy_hdr.page_shift, target_copy->offset, (uint64_t)target_copy->size, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start);
11032 if (target_copy != copy) {
11033 copy = target_copy;
11034 }
11035 copy_size = copy->size;
11036 }
11037
11038 /*
11039 * Find space for the data
11040 */
11041
11042 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
11043 VM_MAP_COPY_PAGE_MASK(copy));
11044 size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
11045 VM_MAP_COPY_PAGE_MASK(copy))
11046 - vm_copy_start;
11047
11048
11049 StartAgain:;
11050
11051 vm_map_lock(dst_map);
11052 if (dst_map->disable_vmentry_reuse == TRUE) {
11053 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
11054 last = entry;
11055 } else {
11056 if (dst_map->holelistenabled) {
11057 hole_entry = CAST_TO_VM_MAP_ENTRY(dst_map->holes_list);
11058
11059 if (hole_entry == NULL) {
11060 /*
11061 * No more space in the map?
11062 */
11063 vm_map_unlock(dst_map);
11064 return KERN_NO_SPACE;
11065 }
11066
11067 last = hole_entry;
11068 start = last->vme_start;
11069 } else {
11070 assert(first_free_is_valid(dst_map));
11071 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
11072 vm_map_min(dst_map) : last->vme_end;
11073 }
11074 start = vm_map_round_page(start,
11075 VM_MAP_PAGE_MASK(dst_map));
11076 }
11077
11078 while (TRUE) {
11079 vm_map_entry_t next = last->vme_next;
11080 vm_map_offset_t end = start + size;
11081
11082 if ((end > dst_map->max_offset) || (end < start)) {
11083 if (dst_map->wait_for_space) {
11084 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
11085 assert_wait((event_t) dst_map,
11086 THREAD_INTERRUPTIBLE);
11087 vm_map_unlock(dst_map);
11088 thread_block(THREAD_CONTINUE_NULL);
11089 goto StartAgain;
11090 }
11091 }
11092 vm_map_unlock(dst_map);
11093 return KERN_NO_SPACE;
11094 }
11095
11096 if (dst_map->holelistenabled) {
11097 if (last->vme_end >= end) {
11098 break;
11099 }
11100 } else {
11101 /*
11102 * If there are no more entries, we must win.
11103 *
11104 * OR
11105 *
11106 * If there is another entry, it must be
11107 * after the end of the potential new region.
11108 */
11109
11110 if (next == vm_map_to_entry(dst_map)) {
11111 break;
11112 }
11113
11114 if (next->vme_start >= end) {
11115 break;
11116 }
11117 }
11118
11119 last = next;
11120
11121 if (dst_map->holelistenabled) {
11122 if (last == CAST_TO_VM_MAP_ENTRY(dst_map->holes_list)) {
11123 /*
11124 * Wrapped around
11125 */
11126 vm_map_unlock(dst_map);
11127 return KERN_NO_SPACE;
11128 }
11129 start = last->vme_start;
11130 } else {
11131 start = last->vme_end;
11132 }
11133 start = vm_map_round_page(start,
11134 VM_MAP_PAGE_MASK(dst_map));
11135 }
11136
11137 if (dst_map->holelistenabled) {
11138 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
11139 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.", last, (unsigned long long)last->vme_start);
11140 }
11141 }
11142
11143
11144 adjustment = start - vm_copy_start;
11145 if (!consume_on_success) {
11146 /*
11147 * We're not allowed to consume "copy", so we'll have to
11148 * copy its map entries into the destination map below.
11149 * No need to re-allocate map entries from the correct
11150 * (pageable or not) zone, since we'll get new map entries
11151 * during the transfer.
11152 * We'll also adjust the map entries's "start" and "end"
11153 * during the transfer, to keep "copy"'s entries consistent
11154 * with its "offset".
11155 */
11156 goto after_adjustments;
11157 }
11158
11159 /*
11160 * Since we're going to just drop the map
11161 * entries from the copy into the destination
11162 * map, they must come from the same pool.
11163 */
11164
11165 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
11166 /*
11167 * Mismatches occur when dealing with the default
11168 * pager.
11169 */
11170 vm_map_entry_t next, new;
11171
11172 /*
11173 * Find the zone that the copies were allocated from
11174 */
11175
11176 entry = vm_map_copy_first_entry(copy);
11177
11178 /*
11179 * Reinitialize the copy so that vm_map_copy_entry_link
11180 * will work.
11181 */
11182 vm_map_store_copy_reset(copy, entry);
11183 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
11184
11185 /*
11186 * Copy each entry.
11187 */
11188 while (entry != vm_map_copy_to_entry(copy)) {
11189 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11190 vm_map_entry_copy_full(new, entry);
11191 new->vme_no_copy_on_read = FALSE;
11192 assert(!new->iokit_acct);
11193 if (new->is_sub_map) {
11194 /* clr address space specifics */
11195 new->use_pmap = FALSE;
11196 }
11197 vm_map_copy_entry_link(copy,
11198 vm_map_copy_last_entry(copy),
11199 new);
11200 next = entry->vme_next;
11201 _vm_map_entry_dispose(NULL, entry);
11202 entry = next;
11203 }
11204 }
11205
11206 /*
11207 * Adjust the addresses in the copy chain, and
11208 * reset the region attributes.
11209 */
11210
11211 for (entry = vm_map_copy_first_entry(copy);
11212 entry != vm_map_copy_to_entry(copy);
11213 entry = entry->vme_next) {
11214 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
11215 /*
11216 * We're injecting this copy entry into a map that
11217 * has the standard page alignment, so clear
11218 * "map_aligned" (which might have been inherited
11219 * from the original map entry).
11220 */
11221 entry->map_aligned = FALSE;
11222 }
11223
11224 entry->vme_start += adjustment;
11225 entry->vme_end += adjustment;
11226
11227 if (entry->map_aligned) {
11228 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
11229 VM_MAP_PAGE_MASK(dst_map)));
11230 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
11231 VM_MAP_PAGE_MASK(dst_map)));
11232 }
11233
11234 entry->inheritance = VM_INHERIT_DEFAULT;
11235 entry->protection = VM_PROT_DEFAULT;
11236 entry->max_protection = VM_PROT_ALL;
11237 entry->behavior = VM_BEHAVIOR_DEFAULT;
11238
11239 /*
11240 * If the entry is now wired,
11241 * map the pages into the destination map.
11242 */
11243 if (entry->wired_count != 0) {
11244 vm_map_offset_t va;
11245 vm_object_offset_t offset;
11246 vm_object_t object;
11247 vm_prot_t prot;
11248 int type_of_fault;
11249
11250 /* TODO4K would need to use actual page size */
11251 assert(VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT);
11252
11253 object = VME_OBJECT(entry);
11254 offset = VME_OFFSET(entry);
11255 va = entry->vme_start;
11256
11257 pmap_pageable(dst_map->pmap,
11258 entry->vme_start,
11259 entry->vme_end,
11260 TRUE);
11261
11262 while (va < entry->vme_end) {
11263 vm_page_t m;
11264 struct vm_object_fault_info fault_info = {};
11265
11266 /*
11267 * Look up the page in the object.
11268 * Assert that the page will be found in the
11269 * top object:
11270 * either
11271 * the object was newly created by
11272 * vm_object_copy_slowly, and has
11273 * copies of all of the pages from
11274 * the source object
11275 * or
11276 * the object was moved from the old
11277 * map entry; because the old map
11278 * entry was wired, all of the pages
11279 * were in the top-level object.
11280 * (XXX not true if we wire pages for
11281 * reading)
11282 */
11283 vm_object_lock(object);
11284
11285 m = vm_page_lookup(object, offset);
11286 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
11287 m->vmp_absent) {
11288 panic("vm_map_copyout: wiring %p", m);
11289 }
11290
11291 prot = entry->protection;
11292
11293 if (override_nx(dst_map, VME_ALIAS(entry)) &&
11294 prot) {
11295 prot |= VM_PROT_EXECUTE;
11296 }
11297
11298 type_of_fault = DBG_CACHE_HIT_FAULT;
11299
11300 fault_info.user_tag = VME_ALIAS(entry);
11301 fault_info.pmap_options = 0;
11302 if (entry->iokit_acct ||
11303 (!entry->is_sub_map && !entry->use_pmap)) {
11304 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
11305 }
11306
11307 vm_fault_enter(m,
11308 dst_map->pmap,
11309 va,
11310 PAGE_SIZE, 0,
11311 prot,
11312 prot,
11313 VM_PAGE_WIRED(m),
11314 FALSE, /* change_wiring */
11315 VM_KERN_MEMORY_NONE, /* tag - not wiring */
11316 &fault_info,
11317 NULL, /* need_retry */
11318 &type_of_fault);
11319
11320 vm_object_unlock(object);
11321
11322 offset += PAGE_SIZE_64;
11323 va += PAGE_SIZE;
11324 }
11325 }
11326 }
11327
11328 after_adjustments:
11329
11330 /*
11331 * Correct the page alignment for the result
11332 */
11333
11334 *dst_addr = start + (copy->offset - vm_copy_start);
11335
11336 #if KASAN
11337 kasan_notify_address(*dst_addr, size);
11338 #endif
11339
11340 /*
11341 * Update the hints and the map size
11342 */
11343
11344 if (consume_on_success) {
11345 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
11346 } else {
11347 SAVE_HINT_MAP_WRITE(dst_map, last);
11348 }
11349
11350 dst_map->size += size;
11351
11352 /*
11353 * Link in the copy
11354 */
11355
11356 if (consume_on_success) {
11357 vm_map_copy_insert(dst_map, last, copy);
11358 if (copy != original_copy) {
11359 vm_map_copy_discard(original_copy);
11360 original_copy = VM_MAP_COPY_NULL;
11361 }
11362 } else {
11363 vm_map_copy_remap(dst_map, last, copy, adjustment,
11364 cur_protection, max_protection,
11365 inheritance);
11366 if (copy != original_copy && original_copy != VM_MAP_COPY_NULL) {
11367 vm_map_copy_discard(copy);
11368 copy = original_copy;
11369 }
11370 }
11371
11372
11373 vm_map_unlock(dst_map);
11374
11375 /*
11376 * XXX If wiring_required, call vm_map_pageable
11377 */
11378
11379 return KERN_SUCCESS;
11380 }
11381
11382 /*
11383 * Routine: vm_map_copyin
11384 *
11385 * Description:
11386 * see vm_map_copyin_common. Exported via Unsupported.exports.
11387 *
11388 */
11389
11390 #undef vm_map_copyin
11391
11392 kern_return_t
vm_map_copyin(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,boolean_t src_destroy,vm_map_copy_t * copy_result)11393 vm_map_copyin(
11394 vm_map_t src_map,
11395 vm_map_address_t src_addr,
11396 vm_map_size_t len,
11397 boolean_t src_destroy,
11398 vm_map_copy_t *copy_result) /* OUT */
11399 {
11400 return vm_map_copyin_common(src_map, src_addr, len, src_destroy,
11401 FALSE, copy_result, FALSE);
11402 }
11403
11404 /*
11405 * Routine: vm_map_copyin_common
11406 *
11407 * Description:
11408 * Copy the specified region (src_addr, len) from the
11409 * source address space (src_map), possibly removing
11410 * the region from the source address space (src_destroy).
11411 *
11412 * Returns:
11413 * A vm_map_copy_t object (copy_result), suitable for
11414 * insertion into another address space (using vm_map_copyout),
11415 * copying over another address space region (using
11416 * vm_map_copy_overwrite). If the copy is unused, it
11417 * should be destroyed (using vm_map_copy_discard).
11418 *
11419 * In/out conditions:
11420 * The source map should not be locked on entry.
11421 */
11422
11423 typedef struct submap_map {
11424 vm_map_t parent_map;
11425 vm_map_offset_t base_start;
11426 vm_map_offset_t base_end;
11427 vm_map_size_t base_len;
11428 struct submap_map *next;
11429 } submap_map_t;
11430
11431 kern_return_t
vm_map_copyin_common(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,boolean_t src_destroy,__unused boolean_t src_volatile,vm_map_copy_t * copy_result,boolean_t use_maxprot)11432 vm_map_copyin_common(
11433 vm_map_t src_map,
11434 vm_map_address_t src_addr,
11435 vm_map_size_t len,
11436 boolean_t src_destroy,
11437 __unused boolean_t src_volatile,
11438 vm_map_copy_t *copy_result, /* OUT */
11439 boolean_t use_maxprot)
11440 {
11441 int flags;
11442
11443 flags = 0;
11444 if (src_destroy) {
11445 flags |= VM_MAP_COPYIN_SRC_DESTROY;
11446 }
11447 if (use_maxprot) {
11448 flags |= VM_MAP_COPYIN_USE_MAXPROT;
11449 }
11450 return vm_map_copyin_internal(src_map,
11451 src_addr,
11452 len,
11453 flags,
11454 copy_result);
11455 }
11456 kern_return_t
vm_map_copyin_internal(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,int flags,vm_map_copy_t * copy_result)11457 vm_map_copyin_internal(
11458 vm_map_t src_map,
11459 vm_map_address_t src_addr,
11460 vm_map_size_t len,
11461 int flags,
11462 vm_map_copy_t *copy_result) /* OUT */
11463 {
11464 vm_map_entry_t tmp_entry; /* Result of last map lookup --
11465 * in multi-level lookup, this
11466 * entry contains the actual
11467 * vm_object/offset.
11468 */
11469 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
11470
11471 vm_map_offset_t src_start; /* Start of current entry --
11472 * where copy is taking place now
11473 */
11474 vm_map_offset_t src_end; /* End of entire region to be
11475 * copied */
11476 vm_map_offset_t src_base;
11477 vm_map_t base_map = src_map;
11478 boolean_t map_share = FALSE;
11479 submap_map_t *parent_maps = NULL;
11480
11481 vm_map_copy_t copy; /* Resulting copy */
11482 vm_map_address_t copy_addr;
11483 vm_map_size_t copy_size;
11484 boolean_t src_destroy;
11485 boolean_t use_maxprot;
11486 boolean_t preserve_purgeable;
11487 boolean_t entry_was_shared;
11488 vm_map_entry_t saved_src_entry;
11489
11490 if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
11491 return KERN_INVALID_ARGUMENT;
11492 }
11493
11494 src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
11495 use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
11496 preserve_purgeable =
11497 (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
11498
11499 /*
11500 * Check for copies of zero bytes.
11501 */
11502
11503 if (len == 0) {
11504 *copy_result = VM_MAP_COPY_NULL;
11505 return KERN_SUCCESS;
11506 }
11507
11508 /*
11509 * Check that the end address doesn't overflow
11510 */
11511 src_end = src_addr + len;
11512 if (src_end < src_addr) {
11513 return KERN_INVALID_ADDRESS;
11514 }
11515
11516 /*
11517 * Compute (page aligned) start and end of region
11518 */
11519 src_start = vm_map_trunc_page(src_addr,
11520 VM_MAP_PAGE_MASK(src_map));
11521 src_end = vm_map_round_page(src_end,
11522 VM_MAP_PAGE_MASK(src_map));
11523
11524 /*
11525 * If the copy is sufficiently small, use a kernel buffer instead
11526 * of making a virtual copy. The theory being that the cost of
11527 * setting up VM (and taking C-O-W faults) dominates the copy costs
11528 * for small regions.
11529 */
11530 if ((len < msg_ool_size_small) &&
11531 !use_maxprot &&
11532 !preserve_purgeable &&
11533 !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
11534 /*
11535 * Since the "msg_ool_size_small" threshold was increased and
11536 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
11537 * address space limits, we revert to doing a virtual copy if the
11538 * copied range goes beyond those limits. Otherwise, mach_vm_read()
11539 * of the commpage would now fail when it used to work.
11540 */
11541 (src_start >= vm_map_min(src_map) &&
11542 src_start < vm_map_max(src_map) &&
11543 src_end >= vm_map_min(src_map) &&
11544 src_end < vm_map_max(src_map))) {
11545 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
11546 src_destroy, copy_result);
11547 }
11548
11549 /*
11550 * Allocate a header element for the list.
11551 *
11552 * Use the start and end in the header to
11553 * remember the endpoints prior to rounding.
11554 */
11555
11556 copy = vm_map_copy_allocate();
11557 copy->type = VM_MAP_COPY_ENTRY_LIST;
11558 copy->cpy_hdr.entries_pageable = TRUE;
11559 copy->cpy_hdr.page_shift = (uint16_t)VM_MAP_PAGE_SHIFT(src_map);
11560
11561 vm_map_store_init( &(copy->cpy_hdr));
11562
11563 copy->offset = src_addr;
11564 copy->size = len;
11565
11566 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11567
11568 #define RETURN(x) \
11569 MACRO_BEGIN \
11570 vm_map_unlock(src_map); \
11571 if(src_map != base_map) \
11572 vm_map_deallocate(src_map); \
11573 if (new_entry != VM_MAP_ENTRY_NULL) \
11574 vm_map_copy_entry_dispose(copy,new_entry); \
11575 vm_map_copy_discard(copy); \
11576 { \
11577 submap_map_t *_ptr; \
11578 \
11579 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11580 parent_maps=parent_maps->next; \
11581 if (_ptr->parent_map != base_map) \
11582 vm_map_deallocate(_ptr->parent_map); \
11583 kfree_type(submap_map_t, _ptr); \
11584 } \
11585 } \
11586 MACRO_RETURN(x); \
11587 MACRO_END
11588
11589 /*
11590 * Find the beginning of the region.
11591 */
11592
11593 vm_map_lock(src_map);
11594
11595 /*
11596 * Lookup the original "src_addr" rather than the truncated
11597 * "src_start", in case "src_start" falls in a non-map-aligned
11598 * map entry *before* the map entry that contains "src_addr"...
11599 */
11600 if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry)) {
11601 RETURN(KERN_INVALID_ADDRESS);
11602 }
11603 if (!tmp_entry->is_sub_map) {
11604 /*
11605 * ... but clip to the map-rounded "src_start" rather than
11606 * "src_addr" to preserve map-alignment. We'll adjust the
11607 * first copy entry at the end, if needed.
11608 */
11609 vm_map_clip_start(src_map, tmp_entry, src_start);
11610 }
11611 if (src_start < tmp_entry->vme_start) {
11612 /*
11613 * Move "src_start" up to the start of the
11614 * first map entry to copy.
11615 */
11616 src_start = tmp_entry->vme_start;
11617 }
11618 /* set for later submap fix-up */
11619 copy_addr = src_start;
11620
11621 /*
11622 * Go through entries until we get to the end.
11623 */
11624
11625 while (TRUE) {
11626 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
11627 vm_map_size_t src_size; /* Size of source
11628 * map entry (in both
11629 * maps)
11630 */
11631
11632 vm_object_t src_object; /* Object to copy */
11633 vm_object_offset_t src_offset;
11634
11635 vm_object_t new_copy_object;/* vm_object_copy_* result */
11636
11637 boolean_t src_needs_copy; /* Should source map
11638 * be made read-only
11639 * for copy-on-write?
11640 */
11641
11642 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
11643
11644 boolean_t was_wired; /* Was source wired? */
11645 boolean_t saved_used_for_jit; /* Saved used_for_jit. */
11646 vm_map_version_t version; /* Version before locks
11647 * dropped to make copy
11648 */
11649 kern_return_t result; /* Return value from
11650 * copy_strategically.
11651 */
11652 while (tmp_entry->is_sub_map) {
11653 vm_map_size_t submap_len;
11654 submap_map_t *ptr;
11655
11656 ptr = kalloc_type(submap_map_t, Z_WAITOK);
11657 ptr->next = parent_maps;
11658 parent_maps = ptr;
11659 ptr->parent_map = src_map;
11660 ptr->base_start = src_start;
11661 ptr->base_end = src_end;
11662 submap_len = tmp_entry->vme_end - src_start;
11663 if (submap_len > (src_end - src_start)) {
11664 submap_len = src_end - src_start;
11665 }
11666 ptr->base_len = submap_len;
11667
11668 src_start -= tmp_entry->vme_start;
11669 src_start += VME_OFFSET(tmp_entry);
11670 src_end = src_start + submap_len;
11671 src_map = VME_SUBMAP(tmp_entry);
11672 vm_map_lock(src_map);
11673 /* keep an outstanding reference for all maps in */
11674 /* the parents tree except the base map */
11675 vm_map_reference(src_map);
11676 vm_map_unlock(ptr->parent_map);
11677 if (!vm_map_lookup_entry(
11678 src_map, src_start, &tmp_entry)) {
11679 RETURN(KERN_INVALID_ADDRESS);
11680 }
11681 map_share = TRUE;
11682 if (!tmp_entry->is_sub_map) {
11683 vm_map_clip_start(src_map, tmp_entry, src_start);
11684 }
11685 src_entry = tmp_entry;
11686 }
11687 /* we are now in the lowest level submap... */
11688
11689 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
11690 (VME_OBJECT(tmp_entry)->phys_contiguous)) {
11691 /* This is not, supported for now.In future */
11692 /* we will need to detect the phys_contig */
11693 /* condition and then upgrade copy_slowly */
11694 /* to do physical copy from the device mem */
11695 /* based object. We can piggy-back off of */
11696 /* the was wired boolean to set-up the */
11697 /* proper handling */
11698 RETURN(KERN_PROTECTION_FAILURE);
11699 }
11700 /*
11701 * Create a new address map entry to hold the result.
11702 * Fill in the fields from the appropriate source entries.
11703 * We must unlock the source map to do this if we need
11704 * to allocate a map entry.
11705 */
11706 if (new_entry == VM_MAP_ENTRY_NULL) {
11707 version.main_timestamp = src_map->timestamp;
11708 vm_map_unlock(src_map);
11709
11710 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11711
11712 vm_map_lock(src_map);
11713 if ((version.main_timestamp + 1) != src_map->timestamp) {
11714 if (!vm_map_lookup_entry(src_map, src_start,
11715 &tmp_entry)) {
11716 RETURN(KERN_INVALID_ADDRESS);
11717 }
11718 if (!tmp_entry->is_sub_map) {
11719 vm_map_clip_start(src_map, tmp_entry, src_start);
11720 }
11721 continue; /* restart w/ new tmp_entry */
11722 }
11723 }
11724
11725 /*
11726 * Verify that the region can be read.
11727 */
11728 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
11729 !use_maxprot) ||
11730 (src_entry->max_protection & VM_PROT_READ) == 0) {
11731 RETURN(KERN_PROTECTION_FAILURE);
11732 }
11733
11734 /*
11735 * Clip against the endpoints of the entire region.
11736 */
11737
11738 vm_map_clip_end(src_map, src_entry, src_end);
11739
11740 src_size = src_entry->vme_end - src_start;
11741 src_object = VME_OBJECT(src_entry);
11742 src_offset = VME_OFFSET(src_entry);
11743 was_wired = (src_entry->wired_count != 0);
11744
11745 vm_map_entry_copy(src_map, new_entry, src_entry);
11746 if (new_entry->is_sub_map) {
11747 /* clr address space specifics */
11748 new_entry->use_pmap = FALSE;
11749 } else {
11750 /*
11751 * We're dealing with a copy-on-write operation,
11752 * so the resulting mapping should not inherit the
11753 * original mapping's accounting settings.
11754 * "iokit_acct" should have been cleared in
11755 * vm_map_entry_copy().
11756 * "use_pmap" should be reset to its default (TRUE)
11757 * so that the new mapping gets accounted for in
11758 * the task's memory footprint.
11759 */
11760 assert(!new_entry->iokit_acct);
11761 new_entry->use_pmap = TRUE;
11762 }
11763
11764 /*
11765 * Attempt non-blocking copy-on-write optimizations.
11766 */
11767
11768 /*
11769 * If we are destroying the source, and the object
11770 * is internal, we could move the object reference
11771 * from the source to the copy. The copy is
11772 * copy-on-write only if the source is.
11773 * We make another reference to the object, because
11774 * destroying the source entry will deallocate it.
11775 *
11776 * This memory transfer has to be atomic, (to prevent
11777 * the VM object from being shared or copied while
11778 * it's being moved here), so we could only do this
11779 * if we won't have to unlock the VM map until the
11780 * original mapping has been fully removed.
11781 */
11782
11783 RestartCopy:
11784 if ((src_object == VM_OBJECT_NULL ||
11785 (!was_wired && !map_share && !tmp_entry->is_shared
11786 && !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT))) &&
11787 vm_object_copy_quickly(
11788 VME_OBJECT(new_entry),
11789 src_offset,
11790 src_size,
11791 &src_needs_copy,
11792 &new_entry_needs_copy)) {
11793 new_entry->needs_copy = new_entry_needs_copy;
11794
11795 /*
11796 * Handle copy-on-write obligations
11797 */
11798
11799 if (src_needs_copy && !tmp_entry->needs_copy) {
11800 vm_prot_t prot;
11801
11802 prot = src_entry->protection & ~VM_PROT_WRITE;
11803
11804 if (override_nx(src_map, VME_ALIAS(src_entry))
11805 && prot) {
11806 prot |= VM_PROT_EXECUTE;
11807 }
11808
11809 vm_object_pmap_protect(
11810 src_object,
11811 src_offset,
11812 src_size,
11813 (src_entry->is_shared ?
11814 PMAP_NULL
11815 : src_map->pmap),
11816 VM_MAP_PAGE_SIZE(src_map),
11817 src_entry->vme_start,
11818 prot);
11819
11820 assert(tmp_entry->wired_count == 0);
11821 tmp_entry->needs_copy = TRUE;
11822 }
11823
11824 /*
11825 * The map has never been unlocked, so it's safe
11826 * to move to the next entry rather than doing
11827 * another lookup.
11828 */
11829
11830 goto CopySuccessful;
11831 }
11832
11833 entry_was_shared = tmp_entry->is_shared;
11834
11835 /*
11836 * Take an object reference, so that we may
11837 * release the map lock(s).
11838 */
11839
11840 assert(src_object != VM_OBJECT_NULL);
11841 vm_object_reference(src_object);
11842
11843 /*
11844 * Record the timestamp for later verification.
11845 * Unlock the map.
11846 */
11847
11848 version.main_timestamp = src_map->timestamp;
11849 vm_map_unlock(src_map); /* Increments timestamp once! */
11850 saved_src_entry = src_entry;
11851 tmp_entry = VM_MAP_ENTRY_NULL;
11852 src_entry = VM_MAP_ENTRY_NULL;
11853
11854 /*
11855 * Perform the copy
11856 */
11857
11858 if (was_wired ||
11859 (debug4k_no_cow_copyin &&
11860 VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT)) {
11861 CopySlowly:
11862 vm_object_lock(src_object);
11863 result = vm_object_copy_slowly(
11864 src_object,
11865 src_offset,
11866 src_size,
11867 THREAD_UNINT,
11868 &new_copy_object);
11869 /* VME_OBJECT_SET will reset used_for_jit, so preserve it. */
11870 saved_used_for_jit = new_entry->used_for_jit;
11871 VME_OBJECT_SET(new_entry, new_copy_object);
11872 new_entry->used_for_jit = saved_used_for_jit;
11873 VME_OFFSET_SET(new_entry,
11874 src_offset - vm_object_trunc_page(src_offset));
11875 new_entry->needs_copy = FALSE;
11876 } else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11877 (entry_was_shared || map_share)) {
11878 vm_object_t new_object;
11879
11880 vm_object_lock_shared(src_object);
11881 new_object = vm_object_copy_delayed(
11882 src_object,
11883 src_offset,
11884 src_size,
11885 TRUE);
11886 if (new_object == VM_OBJECT_NULL) {
11887 goto CopySlowly;
11888 }
11889
11890 VME_OBJECT_SET(new_entry, new_object);
11891 assert(new_entry->wired_count == 0);
11892 new_entry->needs_copy = TRUE;
11893 assert(!new_entry->iokit_acct);
11894 assert(new_object->purgable == VM_PURGABLE_DENY);
11895 assertf(new_entry->use_pmap, "src_map %p new_entry %p\n", src_map, new_entry);
11896 result = KERN_SUCCESS;
11897 } else {
11898 vm_object_offset_t new_offset;
11899 new_offset = VME_OFFSET(new_entry);
11900 result = vm_object_copy_strategically(src_object,
11901 src_offset,
11902 src_size,
11903 &new_copy_object,
11904 &new_offset,
11905 &new_entry_needs_copy);
11906 /* VME_OBJECT_SET will reset used_for_jit, so preserve it. */
11907 saved_used_for_jit = new_entry->used_for_jit;
11908 VME_OBJECT_SET(new_entry, new_copy_object);
11909 new_entry->used_for_jit = saved_used_for_jit;
11910 if (new_offset != VME_OFFSET(new_entry)) {
11911 VME_OFFSET_SET(new_entry, new_offset);
11912 }
11913
11914 new_entry->needs_copy = new_entry_needs_copy;
11915 }
11916
11917 if (result == KERN_SUCCESS &&
11918 ((preserve_purgeable &&
11919 src_object->purgable != VM_PURGABLE_DENY) ||
11920 new_entry->used_for_jit)) {
11921 /*
11922 * Purgeable objects should be COPY_NONE, true share;
11923 * this should be propogated to the copy.
11924 *
11925 * Also force mappings the pmap specially protects to
11926 * be COPY_NONE; trying to COW these mappings would
11927 * change the effective protections, which could have
11928 * side effects if the pmap layer relies on the
11929 * specified protections.
11930 */
11931
11932 vm_object_t new_object;
11933
11934 new_object = VME_OBJECT(new_entry);
11935 assert(new_object != src_object);
11936 vm_object_lock(new_object);
11937 assert(new_object->ref_count == 1);
11938 assert(new_object->shadow == VM_OBJECT_NULL);
11939 assert(new_object->copy == VM_OBJECT_NULL);
11940 assert(new_object->vo_owner == NULL);
11941
11942 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
11943
11944 if (preserve_purgeable &&
11945 src_object->purgable != VM_PURGABLE_DENY) {
11946 new_object->true_share = TRUE;
11947
11948 /* start as non-volatile with no owner... */
11949 new_object->purgable = VM_PURGABLE_NONVOLATILE;
11950 vm_purgeable_nonvolatile_enqueue(new_object, NULL);
11951 /* ... and move to src_object's purgeable state */
11952 if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
11953 int state;
11954 state = src_object->purgable;
11955 vm_object_purgable_control(
11956 new_object,
11957 VM_PURGABLE_SET_STATE_FROM_KERNEL,
11958 &state);
11959 }
11960 /* no pmap accounting for purgeable objects */
11961 new_entry->use_pmap = FALSE;
11962 }
11963
11964 vm_object_unlock(new_object);
11965 new_object = VM_OBJECT_NULL;
11966 }
11967
11968 if (result != KERN_SUCCESS &&
11969 result != KERN_MEMORY_RESTART_COPY) {
11970 vm_map_lock(src_map);
11971 RETURN(result);
11972 }
11973
11974 /*
11975 * Throw away the extra reference
11976 */
11977
11978 vm_object_deallocate(src_object);
11979
11980 /*
11981 * Verify that the map has not substantially
11982 * changed while the copy was being made.
11983 */
11984
11985 vm_map_lock(src_map);
11986
11987 if ((version.main_timestamp + 1) == src_map->timestamp) {
11988 /* src_map hasn't changed: src_entry is still valid */
11989 src_entry = saved_src_entry;
11990 goto VerificationSuccessful;
11991 }
11992
11993 /*
11994 * Simple version comparison failed.
11995 *
11996 * Retry the lookup and verify that the
11997 * same object/offset are still present.
11998 *
11999 * [Note: a memory manager that colludes with
12000 * the calling task can detect that we have
12001 * cheated. While the map was unlocked, the
12002 * mapping could have been changed and restored.]
12003 */
12004
12005 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
12006 if (result != KERN_MEMORY_RESTART_COPY) {
12007 vm_object_deallocate(VME_OBJECT(new_entry));
12008 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
12009 /* reset accounting state */
12010 new_entry->iokit_acct = FALSE;
12011 new_entry->use_pmap = TRUE;
12012 }
12013 RETURN(KERN_INVALID_ADDRESS);
12014 }
12015
12016 src_entry = tmp_entry;
12017 vm_map_clip_start(src_map, src_entry, src_start);
12018
12019 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
12020 !use_maxprot) ||
12021 ((src_entry->max_protection & VM_PROT_READ) == 0)) {
12022 goto VerificationFailed;
12023 }
12024
12025 if (src_entry->vme_end < new_entry->vme_end) {
12026 /*
12027 * This entry might have been shortened
12028 * (vm_map_clip_end) or been replaced with
12029 * an entry that ends closer to "src_start"
12030 * than before.
12031 * Adjust "new_entry" accordingly; copying
12032 * less memory would be correct but we also
12033 * redo the copy (see below) if the new entry
12034 * no longer points at the same object/offset.
12035 */
12036 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
12037 VM_MAP_COPY_PAGE_MASK(copy)));
12038 new_entry->vme_end = src_entry->vme_end;
12039 src_size = new_entry->vme_end - src_start;
12040 } else if (src_entry->vme_end > new_entry->vme_end) {
12041 /*
12042 * This entry might have been extended
12043 * (vm_map_entry_simplify() or coalesce)
12044 * or been replaced with an entry that ends farther
12045 * from "src_start" than before.
12046 *
12047 * We've called vm_object_copy_*() only on
12048 * the previous <start:end> range, so we can't
12049 * just extend new_entry. We have to re-do
12050 * the copy based on the new entry as if it was
12051 * pointing at a different object/offset (see
12052 * "Verification failed" below).
12053 */
12054 }
12055
12056 if ((VME_OBJECT(src_entry) != src_object) ||
12057 (VME_OFFSET(src_entry) != src_offset) ||
12058 (src_entry->vme_end > new_entry->vme_end)) {
12059 /*
12060 * Verification failed.
12061 *
12062 * Start over with this top-level entry.
12063 */
12064
12065 VerificationFailed: ;
12066
12067 vm_object_deallocate(VME_OBJECT(new_entry));
12068 tmp_entry = src_entry;
12069 continue;
12070 }
12071
12072 /*
12073 * Verification succeeded.
12074 */
12075
12076 VerificationSuccessful:;
12077
12078 if (result == KERN_MEMORY_RESTART_COPY) {
12079 goto RestartCopy;
12080 }
12081
12082 /*
12083 * Copy succeeded.
12084 */
12085
12086 CopySuccessful: ;
12087
12088 /*
12089 * Link in the new copy entry.
12090 */
12091
12092 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
12093 new_entry);
12094
12095 /*
12096 * Determine whether the entire region
12097 * has been copied.
12098 */
12099 src_base = src_start;
12100 src_start = new_entry->vme_end;
12101 new_entry = VM_MAP_ENTRY_NULL;
12102 while ((src_start >= src_end) && (src_end != 0)) {
12103 submap_map_t *ptr;
12104
12105 if (src_map == base_map) {
12106 /* back to the top */
12107 break;
12108 }
12109
12110 ptr = parent_maps;
12111 assert(ptr != NULL);
12112 parent_maps = parent_maps->next;
12113
12114 /* fix up the damage we did in that submap */
12115 vm_map_simplify_range(src_map,
12116 src_base,
12117 src_end);
12118
12119 vm_map_unlock(src_map);
12120 vm_map_deallocate(src_map);
12121 vm_map_lock(ptr->parent_map);
12122 src_map = ptr->parent_map;
12123 src_base = ptr->base_start;
12124 src_start = ptr->base_start + ptr->base_len;
12125 src_end = ptr->base_end;
12126 if (!vm_map_lookup_entry(src_map,
12127 src_start,
12128 &tmp_entry) &&
12129 (src_end > src_start)) {
12130 RETURN(KERN_INVALID_ADDRESS);
12131 }
12132 kfree_type(submap_map_t, ptr);
12133 if (parent_maps == NULL) {
12134 map_share = FALSE;
12135 }
12136 src_entry = tmp_entry->vme_prev;
12137 }
12138
12139 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
12140 (src_start >= src_addr + len) &&
12141 (src_addr + len != 0)) {
12142 /*
12143 * Stop copying now, even though we haven't reached
12144 * "src_end". We'll adjust the end of the last copy
12145 * entry at the end, if needed.
12146 *
12147 * If src_map's aligment is different from the
12148 * system's page-alignment, there could be
12149 * extra non-map-aligned map entries between
12150 * the original (non-rounded) "src_addr + len"
12151 * and the rounded "src_end".
12152 * We do not want to copy those map entries since
12153 * they're not part of the copied range.
12154 */
12155 break;
12156 }
12157
12158 if ((src_start >= src_end) && (src_end != 0)) {
12159 break;
12160 }
12161
12162 /*
12163 * Verify that there are no gaps in the region
12164 */
12165
12166 tmp_entry = src_entry->vme_next;
12167 if ((tmp_entry->vme_start != src_start) ||
12168 (tmp_entry == vm_map_to_entry(src_map))) {
12169 RETURN(KERN_INVALID_ADDRESS);
12170 }
12171 }
12172
12173 /*
12174 * If the source should be destroyed, do it now, since the
12175 * copy was successful.
12176 */
12177 if (src_destroy) {
12178 (void) vm_map_delete(
12179 src_map,
12180 vm_map_trunc_page(src_addr,
12181 VM_MAP_PAGE_MASK(src_map)),
12182 src_end,
12183 ((src_map == kernel_map) ?
12184 VM_MAP_REMOVE_KUNWIRE :
12185 VM_MAP_REMOVE_NO_FLAGS),
12186 VM_MAP_NULL);
12187 } else {
12188 /* fix up the damage we did in the base map */
12189 vm_map_simplify_range(
12190 src_map,
12191 vm_map_trunc_page(src_addr,
12192 VM_MAP_PAGE_MASK(src_map)),
12193 vm_map_round_page(src_end,
12194 VM_MAP_PAGE_MASK(src_map)));
12195 }
12196
12197 vm_map_unlock(src_map);
12198 tmp_entry = VM_MAP_ENTRY_NULL;
12199
12200 if (VM_MAP_PAGE_SHIFT(src_map) > PAGE_SHIFT &&
12201 VM_MAP_PAGE_SHIFT(src_map) != VM_MAP_COPY_PAGE_SHIFT(copy)) {
12202 vm_map_offset_t original_start, original_offset, original_end;
12203
12204 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
12205
12206 /* adjust alignment of first copy_entry's "vme_start" */
12207 tmp_entry = vm_map_copy_first_entry(copy);
12208 if (tmp_entry != vm_map_copy_to_entry(copy)) {
12209 vm_map_offset_t adjustment;
12210
12211 original_start = tmp_entry->vme_start;
12212 original_offset = VME_OFFSET(tmp_entry);
12213
12214 /* map-align the start of the first copy entry... */
12215 adjustment = (tmp_entry->vme_start -
12216 vm_map_trunc_page(
12217 tmp_entry->vme_start,
12218 VM_MAP_PAGE_MASK(src_map)));
12219 tmp_entry->vme_start -= adjustment;
12220 VME_OFFSET_SET(tmp_entry,
12221 VME_OFFSET(tmp_entry) - adjustment);
12222 copy_addr -= adjustment;
12223 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12224 /* ... adjust for mis-aligned start of copy range */
12225 adjustment =
12226 (vm_map_trunc_page(copy->offset,
12227 PAGE_MASK) -
12228 vm_map_trunc_page(copy->offset,
12229 VM_MAP_PAGE_MASK(src_map)));
12230 if (adjustment) {
12231 assert(page_aligned(adjustment));
12232 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
12233 tmp_entry->vme_start += adjustment;
12234 VME_OFFSET_SET(tmp_entry,
12235 (VME_OFFSET(tmp_entry) +
12236 adjustment));
12237 copy_addr += adjustment;
12238 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12239 }
12240
12241 /*
12242 * Assert that the adjustments haven't exposed
12243 * more than was originally copied...
12244 */
12245 assert(tmp_entry->vme_start >= original_start);
12246 assert(VME_OFFSET(tmp_entry) >= original_offset);
12247 /*
12248 * ... and that it did not adjust outside of a
12249 * a single 16K page.
12250 */
12251 assert(vm_map_trunc_page(tmp_entry->vme_start,
12252 VM_MAP_PAGE_MASK(src_map)) ==
12253 vm_map_trunc_page(original_start,
12254 VM_MAP_PAGE_MASK(src_map)));
12255 }
12256
12257 /* adjust alignment of last copy_entry's "vme_end" */
12258 tmp_entry = vm_map_copy_last_entry(copy);
12259 if (tmp_entry != vm_map_copy_to_entry(copy)) {
12260 vm_map_offset_t adjustment;
12261
12262 original_end = tmp_entry->vme_end;
12263
12264 /* map-align the end of the last copy entry... */
12265 tmp_entry->vme_end =
12266 vm_map_round_page(tmp_entry->vme_end,
12267 VM_MAP_PAGE_MASK(src_map));
12268 /* ... adjust for mis-aligned end of copy range */
12269 adjustment =
12270 (vm_map_round_page((copy->offset +
12271 copy->size),
12272 VM_MAP_PAGE_MASK(src_map)) -
12273 vm_map_round_page((copy->offset +
12274 copy->size),
12275 PAGE_MASK));
12276 if (adjustment) {
12277 assert(page_aligned(adjustment));
12278 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
12279 tmp_entry->vme_end -= adjustment;
12280 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12281 }
12282
12283 /*
12284 * Assert that the adjustments haven't exposed
12285 * more than was originally copied...
12286 */
12287 assert(tmp_entry->vme_end <= original_end);
12288 /*
12289 * ... and that it did not adjust outside of a
12290 * a single 16K page.
12291 */
12292 assert(vm_map_round_page(tmp_entry->vme_end,
12293 VM_MAP_PAGE_MASK(src_map)) ==
12294 vm_map_round_page(original_end,
12295 VM_MAP_PAGE_MASK(src_map)));
12296 }
12297 }
12298
12299 /* Fix-up start and end points in copy. This is necessary */
12300 /* when the various entries in the copy object were picked */
12301 /* up from different sub-maps */
12302
12303 tmp_entry = vm_map_copy_first_entry(copy);
12304 copy_size = 0; /* compute actual size */
12305 while (tmp_entry != vm_map_copy_to_entry(copy)) {
12306 assert(VM_MAP_PAGE_ALIGNED(
12307 copy_addr + (tmp_entry->vme_end -
12308 tmp_entry->vme_start),
12309 MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
12310 assert(VM_MAP_PAGE_ALIGNED(
12311 copy_addr,
12312 MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
12313
12314 /*
12315 * The copy_entries will be injected directly into the
12316 * destination map and might not be "map aligned" there...
12317 */
12318 tmp_entry->map_aligned = FALSE;
12319
12320 tmp_entry->vme_end = copy_addr +
12321 (tmp_entry->vme_end - tmp_entry->vme_start);
12322 tmp_entry->vme_start = copy_addr;
12323 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12324 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
12325 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
12326 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
12327 }
12328
12329 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
12330 copy_size < copy->size) {
12331 /*
12332 * The actual size of the VM map copy is smaller than what
12333 * was requested by the caller. This must be because some
12334 * PAGE_SIZE-sized pages are missing at the end of the last
12335 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
12336 * The caller might not have been aware of those missing
12337 * pages and might not want to be aware of it, which is
12338 * fine as long as they don't try to access (and crash on)
12339 * those missing pages.
12340 * Let's adjust the size of the "copy", to avoid failing
12341 * in vm_map_copyout() or vm_map_copy_overwrite().
12342 */
12343 assert(vm_map_round_page(copy_size,
12344 VM_MAP_PAGE_MASK(src_map)) ==
12345 vm_map_round_page(copy->size,
12346 VM_MAP_PAGE_MASK(src_map)));
12347 copy->size = copy_size;
12348 }
12349
12350 *copy_result = copy;
12351 return KERN_SUCCESS;
12352
12353 #undef RETURN
12354 }
12355
12356 kern_return_t
vm_map_copy_extract(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,boolean_t do_copy,vm_map_copy_t * copy_result,vm_prot_t * cur_prot,vm_prot_t * max_prot,vm_inherit_t inheritance,vm_map_kernel_flags_t vmk_flags)12357 vm_map_copy_extract(
12358 vm_map_t src_map,
12359 vm_map_address_t src_addr,
12360 vm_map_size_t len,
12361 boolean_t do_copy,
12362 vm_map_copy_t *copy_result, /* OUT */
12363 vm_prot_t *cur_prot, /* IN/OUT */
12364 vm_prot_t *max_prot, /* IN/OUT */
12365 vm_inherit_t inheritance,
12366 vm_map_kernel_flags_t vmk_flags)
12367 {
12368 vm_map_copy_t copy;
12369 kern_return_t kr;
12370 vm_prot_t required_cur_prot, required_max_prot;
12371
12372 /*
12373 * Check for copies of zero bytes.
12374 */
12375
12376 if (len == 0) {
12377 *copy_result = VM_MAP_COPY_NULL;
12378 return KERN_SUCCESS;
12379 }
12380
12381 /*
12382 * Check that the end address doesn't overflow
12383 */
12384 if (src_addr + len < src_addr) {
12385 return KERN_INVALID_ADDRESS;
12386 }
12387
12388 if (VM_MAP_PAGE_SIZE(src_map) < PAGE_SIZE) {
12389 DEBUG4K_SHARE("src_map %p src_addr 0x%llx src_end 0x%llx\n", src_map, (uint64_t)src_addr, (uint64_t)(src_addr + len));
12390 }
12391
12392 required_cur_prot = *cur_prot;
12393 required_max_prot = *max_prot;
12394
12395 /*
12396 * Allocate a header element for the list.
12397 *
12398 * Use the start and end in the header to
12399 * remember the endpoints prior to rounding.
12400 */
12401
12402 copy = vm_map_copy_allocate();
12403 copy->type = VM_MAP_COPY_ENTRY_LIST;
12404 copy->cpy_hdr.entries_pageable = vmk_flags.vmkf_copy_pageable;
12405
12406 vm_map_store_init(©->cpy_hdr);
12407
12408 copy->offset = 0;
12409 copy->size = len;
12410
12411 kr = vm_map_remap_extract(src_map,
12412 src_addr,
12413 len,
12414 do_copy, /* copy */
12415 ©->cpy_hdr,
12416 cur_prot, /* IN/OUT */
12417 max_prot, /* IN/OUT */
12418 inheritance,
12419 vmk_flags);
12420 if (kr != KERN_SUCCESS) {
12421 vm_map_copy_discard(copy);
12422 return kr;
12423 }
12424 if (required_cur_prot != VM_PROT_NONE) {
12425 assert((*cur_prot & required_cur_prot) == required_cur_prot);
12426 assert((*max_prot & required_max_prot) == required_max_prot);
12427 }
12428
12429 *copy_result = copy;
12430 return KERN_SUCCESS;
12431 }
12432
12433 /*
12434 * vm_map_copyin_object:
12435 *
12436 * Create a copy object from an object.
12437 * Our caller donates an object reference.
12438 */
12439
12440 kern_return_t
vm_map_copyin_object(vm_object_t object,vm_object_offset_t offset,vm_object_size_t size,vm_map_copy_t * copy_result)12441 vm_map_copyin_object(
12442 vm_object_t object,
12443 vm_object_offset_t offset, /* offset of region in object */
12444 vm_object_size_t size, /* size of region in object */
12445 vm_map_copy_t *copy_result) /* OUT */
12446 {
12447 vm_map_copy_t copy; /* Resulting copy */
12448
12449 /*
12450 * We drop the object into a special copy object
12451 * that contains the object directly.
12452 */
12453
12454 copy = vm_map_copy_allocate();
12455 copy->type = VM_MAP_COPY_OBJECT;
12456 copy->cpy_object = object;
12457 copy->offset = offset;
12458 copy->size = size;
12459
12460 *copy_result = copy;
12461 return KERN_SUCCESS;
12462 }
12463
12464 static void
vm_map_fork_share(vm_map_t old_map,vm_map_entry_t old_entry,vm_map_t new_map)12465 vm_map_fork_share(
12466 vm_map_t old_map,
12467 vm_map_entry_t old_entry,
12468 vm_map_t new_map)
12469 {
12470 vm_object_t object;
12471 vm_map_entry_t new_entry;
12472
12473 /*
12474 * New sharing code. New map entry
12475 * references original object. Internal
12476 * objects use asynchronous copy algorithm for
12477 * future copies. First make sure we have
12478 * the right object. If we need a shadow,
12479 * or someone else already has one, then
12480 * make a new shadow and share it.
12481 */
12482
12483 object = VME_OBJECT(old_entry);
12484 if (old_entry->is_sub_map) {
12485 assert(old_entry->wired_count == 0);
12486 #ifndef NO_NESTED_PMAP
12487 if (old_entry->use_pmap) {
12488 kern_return_t result;
12489
12490 result = pmap_nest(new_map->pmap,
12491 (VME_SUBMAP(old_entry))->pmap,
12492 (addr64_t)old_entry->vme_start,
12493 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
12494 if (result) {
12495 panic("vm_map_fork_share: pmap_nest failed!");
12496 }
12497 }
12498 #endif /* NO_NESTED_PMAP */
12499 } else if (object == VM_OBJECT_NULL) {
12500 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
12501 old_entry->vme_start));
12502 VME_OFFSET_SET(old_entry, 0);
12503 VME_OBJECT_SET(old_entry, object);
12504 old_entry->use_pmap = TRUE;
12505 // assert(!old_entry->needs_copy);
12506 } else if (object->copy_strategy !=
12507 MEMORY_OBJECT_COPY_SYMMETRIC) {
12508 /*
12509 * We are already using an asymmetric
12510 * copy, and therefore we already have
12511 * the right object.
12512 */
12513
12514 assert(!old_entry->needs_copy);
12515 } else if (old_entry->needs_copy || /* case 1 */
12516 object->shadowed || /* case 2 */
12517 (!object->true_share && /* case 3 */
12518 !old_entry->is_shared &&
12519 (object->vo_size >
12520 (vm_map_size_t)(old_entry->vme_end -
12521 old_entry->vme_start)))) {
12522 /*
12523 * We need to create a shadow.
12524 * There are three cases here.
12525 * In the first case, we need to
12526 * complete a deferred symmetrical
12527 * copy that we participated in.
12528 * In the second and third cases,
12529 * we need to create the shadow so
12530 * that changes that we make to the
12531 * object do not interfere with
12532 * any symmetrical copies which
12533 * have occured (case 2) or which
12534 * might occur (case 3).
12535 *
12536 * The first case is when we had
12537 * deferred shadow object creation
12538 * via the entry->needs_copy mechanism.
12539 * This mechanism only works when
12540 * only one entry points to the source
12541 * object, and we are about to create
12542 * a second entry pointing to the
12543 * same object. The problem is that
12544 * there is no way of mapping from
12545 * an object to the entries pointing
12546 * to it. (Deferred shadow creation
12547 * works with one entry because occurs
12548 * at fault time, and we walk from the
12549 * entry to the object when handling
12550 * the fault.)
12551 *
12552 * The second case is when the object
12553 * to be shared has already been copied
12554 * with a symmetric copy, but we point
12555 * directly to the object without
12556 * needs_copy set in our entry. (This
12557 * can happen because different ranges
12558 * of an object can be pointed to by
12559 * different entries. In particular,
12560 * a single entry pointing to an object
12561 * can be split by a call to vm_inherit,
12562 * which, combined with task_create, can
12563 * result in the different entries
12564 * having different needs_copy values.)
12565 * The shadowed flag in the object allows
12566 * us to detect this case. The problem
12567 * with this case is that if this object
12568 * has or will have shadows, then we
12569 * must not perform an asymmetric copy
12570 * of this object, since such a copy
12571 * allows the object to be changed, which
12572 * will break the previous symmetrical
12573 * copies (which rely upon the object
12574 * not changing). In a sense, the shadowed
12575 * flag says "don't change this object".
12576 * We fix this by creating a shadow
12577 * object for this object, and sharing
12578 * that. This works because we are free
12579 * to change the shadow object (and thus
12580 * to use an asymmetric copy strategy);
12581 * this is also semantically correct,
12582 * since this object is temporary, and
12583 * therefore a copy of the object is
12584 * as good as the object itself. (This
12585 * is not true for permanent objects,
12586 * since the pager needs to see changes,
12587 * which won't happen if the changes
12588 * are made to a copy.)
12589 *
12590 * The third case is when the object
12591 * to be shared has parts sticking
12592 * outside of the entry we're working
12593 * with, and thus may in the future
12594 * be subject to a symmetrical copy.
12595 * (This is a preemptive version of
12596 * case 2.)
12597 */
12598 VME_OBJECT_SHADOW(old_entry,
12599 (vm_map_size_t) (old_entry->vme_end -
12600 old_entry->vme_start));
12601
12602 /*
12603 * If we're making a shadow for other than
12604 * copy on write reasons, then we have
12605 * to remove write permission.
12606 */
12607
12608 if (!old_entry->needs_copy &&
12609 (old_entry->protection & VM_PROT_WRITE)) {
12610 vm_prot_t prot;
12611
12612 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
12613
12614 prot = old_entry->protection & ~VM_PROT_WRITE;
12615
12616 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
12617
12618 if (override_nx(old_map, VME_ALIAS(old_entry)) && prot) {
12619 prot |= VM_PROT_EXECUTE;
12620 }
12621
12622
12623 if (old_map->mapped_in_other_pmaps) {
12624 vm_object_pmap_protect(
12625 VME_OBJECT(old_entry),
12626 VME_OFFSET(old_entry),
12627 (old_entry->vme_end -
12628 old_entry->vme_start),
12629 PMAP_NULL,
12630 PAGE_SIZE,
12631 old_entry->vme_start,
12632 prot);
12633 } else {
12634 pmap_protect(old_map->pmap,
12635 old_entry->vme_start,
12636 old_entry->vme_end,
12637 prot);
12638 }
12639 }
12640
12641 old_entry->needs_copy = FALSE;
12642 object = VME_OBJECT(old_entry);
12643 }
12644
12645
12646 /*
12647 * If object was using a symmetric copy strategy,
12648 * change its copy strategy to the default
12649 * asymmetric copy strategy, which is copy_delay
12650 * in the non-norma case and copy_call in the
12651 * norma case. Bump the reference count for the
12652 * new entry.
12653 */
12654
12655 if (old_entry->is_sub_map) {
12656 vm_map_lock(VME_SUBMAP(old_entry));
12657 vm_map_reference(VME_SUBMAP(old_entry));
12658 vm_map_unlock(VME_SUBMAP(old_entry));
12659 } else {
12660 vm_object_lock(object);
12661 vm_object_reference_locked(object);
12662 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
12663 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
12664 }
12665 vm_object_unlock(object);
12666 }
12667
12668 /*
12669 * Clone the entry, using object ref from above.
12670 * Mark both entries as shared.
12671 */
12672
12673 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
12674 * map or descendants */
12675 vm_map_entry_copy(old_map, new_entry, old_entry);
12676 old_entry->is_shared = TRUE;
12677 new_entry->is_shared = TRUE;
12678
12679 /*
12680 * We're dealing with a shared mapping, so the resulting mapping
12681 * should inherit some of the original mapping's accounting settings.
12682 * "iokit_acct" should have been cleared in vm_map_entry_copy().
12683 * "use_pmap" should stay the same as before (if it hasn't been reset
12684 * to TRUE when we cleared "iokit_acct").
12685 */
12686 assert(!new_entry->iokit_acct);
12687
12688 /*
12689 * If old entry's inheritence is VM_INHERIT_NONE,
12690 * the new entry is for corpse fork, remove the
12691 * write permission from the new entry.
12692 */
12693 if (old_entry->inheritance == VM_INHERIT_NONE) {
12694 new_entry->protection &= ~VM_PROT_WRITE;
12695 new_entry->max_protection &= ~VM_PROT_WRITE;
12696 }
12697
12698 /*
12699 * Insert the entry into the new map -- we
12700 * know we're inserting at the end of the new
12701 * map.
12702 */
12703
12704 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry,
12705 VM_MAP_KERNEL_FLAGS_NONE);
12706
12707 /*
12708 * Update the physical map
12709 */
12710
12711 if (old_entry->is_sub_map) {
12712 /* Bill Angell pmap support goes here */
12713 } else {
12714 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
12715 old_entry->vme_end - old_entry->vme_start,
12716 old_entry->vme_start);
12717 }
12718 }
12719
12720 static boolean_t
vm_map_fork_copy(vm_map_t old_map,vm_map_entry_t * old_entry_p,vm_map_t new_map,int vm_map_copyin_flags)12721 vm_map_fork_copy(
12722 vm_map_t old_map,
12723 vm_map_entry_t *old_entry_p,
12724 vm_map_t new_map,
12725 int vm_map_copyin_flags)
12726 {
12727 vm_map_entry_t old_entry = *old_entry_p;
12728 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
12729 vm_map_offset_t start = old_entry->vme_start;
12730 vm_map_copy_t copy;
12731 vm_map_entry_t last = vm_map_last_entry(new_map);
12732
12733 vm_map_unlock(old_map);
12734 /*
12735 * Use maxprot version of copyin because we
12736 * care about whether this memory can ever
12737 * be accessed, not just whether it's accessible
12738 * right now.
12739 */
12740 vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
12741 if (vm_map_copyin_internal(old_map, start, entry_size,
12742 vm_map_copyin_flags, ©)
12743 != KERN_SUCCESS) {
12744 /*
12745 * The map might have changed while it
12746 * was unlocked, check it again. Skip
12747 * any blank space or permanently
12748 * unreadable region.
12749 */
12750 vm_map_lock(old_map);
12751 if (!vm_map_lookup_entry(old_map, start, &last) ||
12752 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
12753 last = last->vme_next;
12754 }
12755 *old_entry_p = last;
12756
12757 /*
12758 * XXX For some error returns, want to
12759 * XXX skip to the next element. Note
12760 * that INVALID_ADDRESS and
12761 * PROTECTION_FAILURE are handled above.
12762 */
12763
12764 return FALSE;
12765 }
12766
12767 /*
12768 * Assert that the vm_map_copy is coming from the right
12769 * zone and hasn't been forged
12770 */
12771 vm_map_copy_require(copy);
12772
12773 /*
12774 * Insert the copy into the new map
12775 */
12776 vm_map_copy_insert(new_map, last, copy);
12777
12778 /*
12779 * Pick up the traversal at the end of
12780 * the copied region.
12781 */
12782
12783 vm_map_lock(old_map);
12784 start += entry_size;
12785 if (!vm_map_lookup_entry(old_map, start, &last)) {
12786 last = last->vme_next;
12787 } else {
12788 if (last->vme_start == start) {
12789 /*
12790 * No need to clip here and we don't
12791 * want to cause any unnecessary
12792 * unnesting...
12793 */
12794 } else {
12795 vm_map_clip_start(old_map, last, start);
12796 }
12797 }
12798 *old_entry_p = last;
12799
12800 return TRUE;
12801 }
12802
12803 /*
12804 * vm_map_fork:
12805 *
12806 * Create and return a new map based on the old
12807 * map, according to the inheritance values on the
12808 * regions in that map and the options.
12809 *
12810 * The source map must not be locked.
12811 */
12812 vm_map_t
vm_map_fork(ledger_t ledger,vm_map_t old_map,int options)12813 vm_map_fork(
12814 ledger_t ledger,
12815 vm_map_t old_map,
12816 int options)
12817 {
12818 pmap_t new_pmap;
12819 vm_map_t new_map;
12820 vm_map_entry_t old_entry;
12821 vm_map_size_t new_size = 0, entry_size;
12822 vm_map_entry_t new_entry;
12823 boolean_t src_needs_copy;
12824 boolean_t new_entry_needs_copy;
12825 boolean_t pmap_is64bit;
12826 int vm_map_copyin_flags;
12827 vm_inherit_t old_entry_inheritance;
12828 int map_create_options;
12829 kern_return_t footprint_collect_kr;
12830
12831 if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
12832 VM_MAP_FORK_PRESERVE_PURGEABLE |
12833 VM_MAP_FORK_CORPSE_FOOTPRINT)) {
12834 /* unsupported option */
12835 return VM_MAP_NULL;
12836 }
12837
12838 pmap_is64bit =
12839 #if defined(__i386__) || defined(__x86_64__)
12840 old_map->pmap->pm_task_map != TASK_MAP_32BIT;
12841 #elif defined(__arm64__)
12842 old_map->pmap->is_64bit;
12843 #elif defined(__arm__)
12844 FALSE;
12845 #else
12846 #error Unknown architecture.
12847 #endif
12848
12849 unsigned int pmap_flags = 0;
12850 pmap_flags |= pmap_is64bit ? PMAP_CREATE_64BIT : 0;
12851 #if defined(HAS_APPLE_PAC)
12852 pmap_flags |= old_map->pmap->disable_jop ? PMAP_CREATE_DISABLE_JOP : 0;
12853 #endif
12854 #if PMAP_CREATE_FORCE_4K_PAGES
12855 if (VM_MAP_PAGE_SIZE(old_map) == FOURK_PAGE_SIZE &&
12856 PAGE_SIZE != FOURK_PAGE_SIZE) {
12857 pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
12858 }
12859 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
12860 new_pmap = pmap_create_options(ledger, (vm_map_size_t) 0, pmap_flags);
12861 if (new_pmap == NULL) {
12862 return VM_MAP_NULL;
12863 }
12864
12865 vm_map_reference(old_map);
12866 vm_map_lock(old_map);
12867
12868 map_create_options = 0;
12869 if (old_map->hdr.entries_pageable) {
12870 map_create_options |= VM_MAP_CREATE_PAGEABLE;
12871 }
12872 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12873 map_create_options |= VM_MAP_CREATE_CORPSE_FOOTPRINT;
12874 footprint_collect_kr = KERN_SUCCESS;
12875 }
12876 new_map = vm_map_create_options(new_pmap,
12877 old_map->min_offset,
12878 old_map->max_offset,
12879 map_create_options);
12880 /* inherit cs_enforcement */
12881 vm_map_cs_enforcement_set(new_map, old_map->cs_enforcement);
12882 vm_map_lock(new_map);
12883 vm_commit_pagezero_status(new_map);
12884 /* inherit the parent map's page size */
12885 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
12886
12887 /* ensure PMAP_CS structures are prepared for the fork */
12888 pmap_cs_fork_prepare(old_map->pmap, new_pmap);
12889
12890 for (old_entry = vm_map_first_entry(old_map); old_entry != vm_map_to_entry(old_map);) {
12891 /*
12892 * Abort any corpse collection if the system is shutting down.
12893 */
12894 if ((options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
12895 get_system_inshutdown()) {
12896 vm_map_corpse_footprint_collect_done(new_map);
12897 vm_map_unlock(new_map);
12898 vm_map_unlock(old_map);
12899 vm_map_deallocate(new_map);
12900 vm_map_deallocate(old_map);
12901 printf("Aborting corpse map due to system shutdown\n");
12902 return VM_MAP_NULL;
12903 }
12904
12905 entry_size = old_entry->vme_end - old_entry->vme_start;
12906
12907 old_entry_inheritance = old_entry->inheritance;
12908 /*
12909 * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
12910 * share VM_INHERIT_NONE entries that are not backed by a
12911 * device pager.
12912 */
12913 if (old_entry_inheritance == VM_INHERIT_NONE &&
12914 (options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE) &&
12915 (old_entry->protection & VM_PROT_READ) &&
12916 !(!old_entry->is_sub_map &&
12917 VME_OBJECT(old_entry) != NULL &&
12918 VME_OBJECT(old_entry)->pager != NULL &&
12919 is_device_pager_ops(
12920 VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
12921 old_entry_inheritance = VM_INHERIT_SHARE;
12922 }
12923
12924 if (old_entry_inheritance != VM_INHERIT_NONE &&
12925 (options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
12926 footprint_collect_kr == KERN_SUCCESS) {
12927 /*
12928 * The corpse won't have old_map->pmap to query
12929 * footprint information, so collect that data now
12930 * and store it in new_map->vmmap_corpse_footprint
12931 * for later autopsy.
12932 */
12933 footprint_collect_kr =
12934 vm_map_corpse_footprint_collect(old_map,
12935 old_entry,
12936 new_map);
12937 }
12938
12939 switch (old_entry_inheritance) {
12940 case VM_INHERIT_NONE:
12941 break;
12942
12943 case VM_INHERIT_SHARE:
12944 vm_map_fork_share(old_map, old_entry, new_map);
12945 new_size += entry_size;
12946 break;
12947
12948 case VM_INHERIT_COPY:
12949
12950 /*
12951 * Inline the copy_quickly case;
12952 * upon failure, fall back on call
12953 * to vm_map_fork_copy.
12954 */
12955
12956 if (old_entry->is_sub_map) {
12957 break;
12958 }
12959 if ((old_entry->wired_count != 0) ||
12960 ((VME_OBJECT(old_entry) != NULL) &&
12961 (VME_OBJECT(old_entry)->true_share))) {
12962 goto slow_vm_map_fork_copy;
12963 }
12964
12965 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
12966 vm_map_entry_copy(old_map, new_entry, old_entry);
12967 if (old_entry->permanent) {
12968 /* inherit "permanent" on fork() */
12969 new_entry->permanent = TRUE;
12970 }
12971
12972 if (new_entry->used_for_jit == TRUE && new_map->jit_entry_exists == FALSE) {
12973 new_map->jit_entry_exists = TRUE;
12974 }
12975
12976 if (new_entry->is_sub_map) {
12977 /* clear address space specifics */
12978 new_entry->use_pmap = FALSE;
12979 } else {
12980 /*
12981 * We're dealing with a copy-on-write operation,
12982 * so the resulting mapping should not inherit
12983 * the original mapping's accounting settings.
12984 * "iokit_acct" should have been cleared in
12985 * vm_map_entry_copy().
12986 * "use_pmap" should be reset to its default
12987 * (TRUE) so that the new mapping gets
12988 * accounted for in the task's memory footprint.
12989 */
12990 assert(!new_entry->iokit_acct);
12991 new_entry->use_pmap = TRUE;
12992 }
12993
12994 if (!vm_object_copy_quickly(
12995 VME_OBJECT(new_entry),
12996 VME_OFFSET(old_entry),
12997 (old_entry->vme_end -
12998 old_entry->vme_start),
12999 &src_needs_copy,
13000 &new_entry_needs_copy)) {
13001 vm_map_entry_dispose(new_map, new_entry);
13002 goto slow_vm_map_fork_copy;
13003 }
13004
13005 /*
13006 * Handle copy-on-write obligations
13007 */
13008
13009 if (src_needs_copy && !old_entry->needs_copy) {
13010 vm_prot_t prot;
13011
13012 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
13013
13014 prot = old_entry->protection & ~VM_PROT_WRITE;
13015
13016 if (override_nx(old_map, VME_ALIAS(old_entry))
13017 && prot) {
13018 prot |= VM_PROT_EXECUTE;
13019 }
13020
13021 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
13022
13023 vm_object_pmap_protect(
13024 VME_OBJECT(old_entry),
13025 VME_OFFSET(old_entry),
13026 (old_entry->vme_end -
13027 old_entry->vme_start),
13028 ((old_entry->is_shared
13029 || old_map->mapped_in_other_pmaps)
13030 ? PMAP_NULL :
13031 old_map->pmap),
13032 VM_MAP_PAGE_SIZE(old_map),
13033 old_entry->vme_start,
13034 prot);
13035
13036 assert(old_entry->wired_count == 0);
13037 old_entry->needs_copy = TRUE;
13038 }
13039 new_entry->needs_copy = new_entry_needs_copy;
13040
13041 /*
13042 * Insert the entry at the end
13043 * of the map.
13044 */
13045
13046 vm_map_store_entry_link(new_map,
13047 vm_map_last_entry(new_map),
13048 new_entry,
13049 VM_MAP_KERNEL_FLAGS_NONE);
13050 new_size += entry_size;
13051 break;
13052
13053 slow_vm_map_fork_copy:
13054 vm_map_copyin_flags = 0;
13055 if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
13056 vm_map_copyin_flags |=
13057 VM_MAP_COPYIN_PRESERVE_PURGEABLE;
13058 }
13059 if (vm_map_fork_copy(old_map,
13060 &old_entry,
13061 new_map,
13062 vm_map_copyin_flags)) {
13063 new_size += entry_size;
13064 }
13065 continue;
13066 }
13067 old_entry = old_entry->vme_next;
13068 }
13069
13070 #if defined(__arm64__)
13071 pmap_insert_sharedpage(new_map->pmap);
13072 #endif /* __arm64__ */
13073
13074 new_map->size = new_size;
13075
13076 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
13077 vm_map_corpse_footprint_collect_done(new_map);
13078 }
13079
13080 /* Propagate JIT entitlement for the pmap layer. */
13081 if (pmap_get_jit_entitled(old_map->pmap)) {
13082 /* Tell the pmap that it supports JIT. */
13083 pmap_set_jit_entitled(new_map->pmap);
13084 }
13085
13086 vm_map_unlock(new_map);
13087 vm_map_unlock(old_map);
13088 vm_map_deallocate(old_map);
13089
13090 return new_map;
13091 }
13092
13093 /*
13094 * vm_map_exec:
13095 *
13096 * Setup the "new_map" with the proper execution environment according
13097 * to the type of executable (platform, 64bit, chroot environment).
13098 * Map the comm page and shared region, etc...
13099 */
13100 kern_return_t
vm_map_exec(vm_map_t new_map,task_t task,boolean_t is64bit,void * fsroot,cpu_type_t cpu,cpu_subtype_t cpu_subtype,boolean_t reslide,boolean_t is_driverkit)13101 vm_map_exec(
13102 vm_map_t new_map,
13103 task_t task,
13104 boolean_t is64bit,
13105 void *fsroot,
13106 cpu_type_t cpu,
13107 cpu_subtype_t cpu_subtype,
13108 boolean_t reslide,
13109 boolean_t is_driverkit)
13110 {
13111 SHARED_REGION_TRACE_DEBUG(
13112 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
13113 (void *)VM_KERNEL_ADDRPERM(current_task()),
13114 (void *)VM_KERNEL_ADDRPERM(new_map),
13115 (void *)VM_KERNEL_ADDRPERM(task),
13116 (void *)VM_KERNEL_ADDRPERM(fsroot),
13117 cpu,
13118 cpu_subtype));
13119 (void) vm_commpage_enter(new_map, task, is64bit);
13120
13121 (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu, cpu_subtype, reslide, is_driverkit);
13122
13123 SHARED_REGION_TRACE_DEBUG(
13124 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
13125 (void *)VM_KERNEL_ADDRPERM(current_task()),
13126 (void *)VM_KERNEL_ADDRPERM(new_map),
13127 (void *)VM_KERNEL_ADDRPERM(task),
13128 (void *)VM_KERNEL_ADDRPERM(fsroot),
13129 cpu,
13130 cpu_subtype));
13131
13132 /*
13133 * Some devices have region(s) of memory that shouldn't get allocated by
13134 * user processes. The following code creates dummy vm_map_entry_t's for each
13135 * of the regions that needs to be reserved to prevent any allocations in
13136 * those regions.
13137 */
13138 kern_return_t kr = KERN_FAILURE;
13139 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
13140 vmk_flags.vmkf_permanent = TRUE;
13141 vmk_flags.vmkf_beyond_max = TRUE;
13142
13143 struct vm_reserved_region *regions = NULL;
13144 size_t num_regions = ml_get_vm_reserved_regions(is64bit, ®ions);
13145 assert((num_regions == 0) || (num_regions > 0 && regions != NULL));
13146
13147 for (size_t i = 0; i < num_regions; ++i) {
13148 kr = vm_map_enter(
13149 new_map,
13150 ®ions[i].vmrr_addr,
13151 regions[i].vmrr_size,
13152 (vm_map_offset_t)0,
13153 VM_FLAGS_FIXED,
13154 vmk_flags,
13155 VM_KERN_MEMORY_NONE,
13156 VM_OBJECT_NULL,
13157 (vm_object_offset_t)0,
13158 FALSE,
13159 VM_PROT_NONE,
13160 VM_PROT_NONE,
13161 VM_INHERIT_COPY);
13162
13163 if (kr != KERN_SUCCESS) {
13164 panic("Failed to reserve %s region in user map %p %d", regions[i].vmrr_name, new_map, kr);
13165 }
13166 }
13167
13168 new_map->reserved_regions = (num_regions ? TRUE : FALSE);
13169
13170 return KERN_SUCCESS;
13171 }
13172
13173 uint64_t vm_map_lookup_locked_copy_slowly_count = 0;
13174 uint64_t vm_map_lookup_locked_copy_slowly_size = 0;
13175 uint64_t vm_map_lookup_locked_copy_slowly_max = 0;
13176 uint64_t vm_map_lookup_locked_copy_slowly_restart = 0;
13177 uint64_t vm_map_lookup_locked_copy_slowly_error = 0;
13178 uint64_t vm_map_lookup_locked_copy_strategically_count = 0;
13179 uint64_t vm_map_lookup_locked_copy_strategically_size = 0;
13180 uint64_t vm_map_lookup_locked_copy_strategically_max = 0;
13181 uint64_t vm_map_lookup_locked_copy_strategically_restart = 0;
13182 uint64_t vm_map_lookup_locked_copy_strategically_error = 0;
13183 uint64_t vm_map_lookup_locked_copy_shadow_count = 0;
13184 uint64_t vm_map_lookup_locked_copy_shadow_size = 0;
13185 uint64_t vm_map_lookup_locked_copy_shadow_max = 0;
13186 /*
13187 * vm_map_lookup_locked:
13188 *
13189 * Finds the VM object, offset, and
13190 * protection for a given virtual address in the
13191 * specified map, assuming a page fault of the
13192 * type specified.
13193 *
13194 * Returns the (object, offset, protection) for
13195 * this address, whether it is wired down, and whether
13196 * this map has the only reference to the data in question.
13197 * In order to later verify this lookup, a "version"
13198 * is returned.
13199 * If contended != NULL, *contended will be set to
13200 * true iff the thread had to spin or block to acquire
13201 * an exclusive lock.
13202 *
13203 * The map MUST be locked by the caller and WILL be
13204 * locked on exit. In order to guarantee the
13205 * existence of the returned object, it is returned
13206 * locked.
13207 *
13208 * If a lookup is requested with "write protection"
13209 * specified, the map may be changed to perform virtual
13210 * copying operations, although the data referenced will
13211 * remain the same.
13212 */
13213 kern_return_t
vm_map_lookup_locked(vm_map_t * var_map,vm_map_offset_t vaddr,vm_prot_t fault_type,int object_lock_type,vm_map_version_t * out_version,vm_object_t * object,vm_object_offset_t * offset,vm_prot_t * out_prot,boolean_t * wired,vm_object_fault_info_t fault_info,vm_map_t * real_map,bool * contended)13214 vm_map_lookup_locked(
13215 vm_map_t *var_map, /* IN/OUT */
13216 vm_map_offset_t vaddr,
13217 vm_prot_t fault_type,
13218 int object_lock_type,
13219 vm_map_version_t *out_version, /* OUT */
13220 vm_object_t *object, /* OUT */
13221 vm_object_offset_t *offset, /* OUT */
13222 vm_prot_t *out_prot, /* OUT */
13223 boolean_t *wired, /* OUT */
13224 vm_object_fault_info_t fault_info, /* OUT */
13225 vm_map_t *real_map, /* OUT */
13226 bool *contended) /* OUT */
13227 {
13228 vm_map_entry_t entry;
13229 vm_map_t map = *var_map;
13230 vm_map_t old_map = *var_map;
13231 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
13232 vm_map_offset_t cow_parent_vaddr = 0;
13233 vm_map_offset_t old_start = 0;
13234 vm_map_offset_t old_end = 0;
13235 vm_prot_t prot;
13236 boolean_t mask_protections;
13237 boolean_t force_copy;
13238 boolean_t no_force_copy_if_executable;
13239 boolean_t submap_needed_copy;
13240 vm_prot_t original_fault_type;
13241 vm_map_size_t fault_page_mask;
13242
13243 /*
13244 * VM_PROT_MASK means that the caller wants us to use "fault_type"
13245 * as a mask against the mapping's actual protections, not as an
13246 * absolute value.
13247 */
13248 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
13249 force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
13250 no_force_copy_if_executable = (fault_type & VM_PROT_COPY_FAIL_IF_EXECUTABLE) ? TRUE : FALSE;
13251 fault_type &= VM_PROT_ALL;
13252 original_fault_type = fault_type;
13253 if (contended) {
13254 *contended = false;
13255 }
13256
13257 *real_map = map;
13258
13259 fault_page_mask = MIN(VM_MAP_PAGE_MASK(map), PAGE_MASK);
13260 vaddr = VM_MAP_TRUNC_PAGE(vaddr, fault_page_mask);
13261
13262 RetryLookup:
13263 fault_type = original_fault_type;
13264
13265 /*
13266 * If the map has an interesting hint, try it before calling
13267 * full blown lookup routine.
13268 */
13269 entry = map->hint;
13270
13271 if ((entry == vm_map_to_entry(map)) ||
13272 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
13273 vm_map_entry_t tmp_entry;
13274
13275 /*
13276 * Entry was either not a valid hint, or the vaddr
13277 * was not contained in the entry, so do a full lookup.
13278 */
13279 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
13280 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13281 vm_map_unlock(cow_sub_map_parent);
13282 }
13283 if ((*real_map != map)
13284 && (*real_map != cow_sub_map_parent)) {
13285 vm_map_unlock(*real_map);
13286 }
13287 return KERN_INVALID_ADDRESS;
13288 }
13289
13290 entry = tmp_entry;
13291 }
13292 if (map == old_map) {
13293 old_start = entry->vme_start;
13294 old_end = entry->vme_end;
13295 }
13296
13297 /*
13298 * Handle submaps. Drop lock on upper map, submap is
13299 * returned locked.
13300 */
13301
13302 submap_needed_copy = FALSE;
13303 submap_recurse:
13304 if (entry->is_sub_map) {
13305 vm_map_offset_t local_vaddr;
13306 vm_map_offset_t end_delta;
13307 vm_map_offset_t start_delta;
13308 vm_map_entry_t submap_entry, saved_submap_entry;
13309 vm_object_offset_t submap_entry_offset;
13310 vm_object_size_t submap_entry_size;
13311 vm_prot_t subentry_protection;
13312 vm_prot_t subentry_max_protection;
13313 boolean_t subentry_no_copy_on_read;
13314 boolean_t mapped_needs_copy = FALSE;
13315 vm_map_version_t version;
13316
13317 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) >= VM_MAP_PAGE_SHIFT(map),
13318 "map %p (%d) entry %p submap %p (%d)\n",
13319 map, VM_MAP_PAGE_SHIFT(map), entry,
13320 VME_SUBMAP(entry), VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
13321
13322 local_vaddr = vaddr;
13323
13324 if ((entry->use_pmap &&
13325 !((fault_type & VM_PROT_WRITE) ||
13326 force_copy))) {
13327 /* if real_map equals map we unlock below */
13328 if ((*real_map != map) &&
13329 (*real_map != cow_sub_map_parent)) {
13330 vm_map_unlock(*real_map);
13331 }
13332 *real_map = VME_SUBMAP(entry);
13333 }
13334
13335 if (entry->needs_copy &&
13336 ((fault_type & VM_PROT_WRITE) ||
13337 force_copy)) {
13338 if (!mapped_needs_copy) {
13339 if (vm_map_lock_read_to_write(map)) {
13340 vm_map_lock_read(map);
13341 *real_map = map;
13342 goto RetryLookup;
13343 }
13344 vm_map_lock_read(VME_SUBMAP(entry));
13345 *var_map = VME_SUBMAP(entry);
13346 cow_sub_map_parent = map;
13347 /* reset base to map before cow object */
13348 /* this is the map which will accept */
13349 /* the new cow object */
13350 old_start = entry->vme_start;
13351 old_end = entry->vme_end;
13352 cow_parent_vaddr = vaddr;
13353 mapped_needs_copy = TRUE;
13354 } else {
13355 vm_map_lock_read(VME_SUBMAP(entry));
13356 *var_map = VME_SUBMAP(entry);
13357 if ((cow_sub_map_parent != map) &&
13358 (*real_map != map)) {
13359 vm_map_unlock(map);
13360 }
13361 }
13362 } else {
13363 if (entry->needs_copy) {
13364 submap_needed_copy = TRUE;
13365 }
13366 vm_map_lock_read(VME_SUBMAP(entry));
13367 *var_map = VME_SUBMAP(entry);
13368 /* leave map locked if it is a target */
13369 /* cow sub_map above otherwise, just */
13370 /* follow the maps down to the object */
13371 /* here we unlock knowing we are not */
13372 /* revisiting the map. */
13373 if ((*real_map != map) && (map != cow_sub_map_parent)) {
13374 vm_map_unlock_read(map);
13375 }
13376 }
13377
13378 map = *var_map;
13379
13380 /* calculate the offset in the submap for vaddr */
13381 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
13382 assertf(VM_MAP_PAGE_ALIGNED(local_vaddr, fault_page_mask),
13383 "local_vaddr 0x%llx entry->vme_start 0x%llx fault_page_mask 0x%llx\n",
13384 (uint64_t)local_vaddr, (uint64_t)entry->vme_start, (uint64_t)fault_page_mask);
13385
13386 RetrySubMap:
13387 if (!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
13388 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13389 vm_map_unlock(cow_sub_map_parent);
13390 }
13391 if ((*real_map != map)
13392 && (*real_map != cow_sub_map_parent)) {
13393 vm_map_unlock(*real_map);
13394 }
13395 *real_map = map;
13396 return KERN_INVALID_ADDRESS;
13397 }
13398
13399 /* find the attenuated shadow of the underlying object */
13400 /* on our target map */
13401
13402 /* in english the submap object may extend beyond the */
13403 /* region mapped by the entry or, may only fill a portion */
13404 /* of it. For our purposes, we only care if the object */
13405 /* doesn't fill. In this case the area which will */
13406 /* ultimately be clipped in the top map will only need */
13407 /* to be as big as the portion of the underlying entry */
13408 /* which is mapped */
13409 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
13410 submap_entry->vme_start - VME_OFFSET(entry) : 0;
13411
13412 end_delta =
13413 (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
13414 submap_entry->vme_end ?
13415 0 : (VME_OFFSET(entry) +
13416 (old_end - old_start))
13417 - submap_entry->vme_end;
13418
13419 old_start += start_delta;
13420 old_end -= end_delta;
13421
13422 if (submap_entry->is_sub_map) {
13423 entry = submap_entry;
13424 vaddr = local_vaddr;
13425 goto submap_recurse;
13426 }
13427
13428 if (((fault_type & VM_PROT_WRITE) ||
13429 force_copy)
13430 && cow_sub_map_parent) {
13431 vm_object_t sub_object, copy_object;
13432 vm_object_offset_t copy_offset;
13433 vm_map_offset_t local_start;
13434 vm_map_offset_t local_end;
13435 boolean_t object_copied = FALSE;
13436 vm_object_offset_t object_copied_offset = 0;
13437 boolean_t object_copied_needs_copy = FALSE;
13438 kern_return_t kr = KERN_SUCCESS;
13439
13440 if (vm_map_lock_read_to_write(map)) {
13441 vm_map_lock_read(map);
13442 old_start -= start_delta;
13443 old_end += end_delta;
13444 goto RetrySubMap;
13445 }
13446
13447
13448 sub_object = VME_OBJECT(submap_entry);
13449 if (sub_object == VM_OBJECT_NULL) {
13450 sub_object =
13451 vm_object_allocate(
13452 (vm_map_size_t)
13453 (submap_entry->vme_end -
13454 submap_entry->vme_start));
13455 VME_OBJECT_SET(submap_entry, sub_object);
13456 VME_OFFSET_SET(submap_entry, 0);
13457 assert(!submap_entry->is_sub_map);
13458 assert(submap_entry->use_pmap);
13459 }
13460 local_start = local_vaddr -
13461 (cow_parent_vaddr - old_start);
13462 local_end = local_vaddr +
13463 (old_end - cow_parent_vaddr);
13464 vm_map_clip_start(map, submap_entry, local_start);
13465 vm_map_clip_end(map, submap_entry, local_end);
13466 if (submap_entry->is_sub_map) {
13467 /* unnesting was done when clipping */
13468 assert(!submap_entry->use_pmap);
13469 }
13470
13471 /* This is the COW case, lets connect */
13472 /* an entry in our space to the underlying */
13473 /* object in the submap, bypassing the */
13474 /* submap. */
13475 submap_entry_offset = VME_OFFSET(submap_entry);
13476 submap_entry_size = submap_entry->vme_end - submap_entry->vme_start;
13477
13478 if ((submap_entry->wired_count != 0 ||
13479 sub_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) &&
13480 (submap_entry->protection & VM_PROT_EXECUTE) &&
13481 no_force_copy_if_executable) {
13482 // printf("FBDP map %p entry %p start 0x%llx end 0x%llx wired %d strat %d\n", map, submap_entry, (uint64_t)local_start, (uint64_t)local_end, submap_entry->wired_count, sub_object->copy_strategy);
13483 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13484 vm_map_unlock(cow_sub_map_parent);
13485 }
13486 if ((*real_map != map)
13487 && (*real_map != cow_sub_map_parent)) {
13488 vm_map_unlock(*real_map);
13489 }
13490 *real_map = map;
13491 kernel_triage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_SUBMAP_NO_COW_ON_EXECUTABLE), 0 /* arg */);
13492 vm_map_lock_write_to_read(map);
13493 kr = KERN_PROTECTION_FAILURE;
13494 DTRACE_VM4(submap_no_copy_executable,
13495 vm_map_t, map,
13496 vm_object_offset_t, submap_entry_offset,
13497 vm_object_size_t, submap_entry_size,
13498 int, kr);
13499 return kr;
13500 }
13501
13502 if (submap_entry->wired_count != 0) {
13503 vm_object_reference(sub_object);
13504
13505 assertf(VM_MAP_PAGE_ALIGNED(VME_OFFSET(submap_entry), VM_MAP_PAGE_MASK(map)),
13506 "submap_entry %p offset 0x%llx\n",
13507 submap_entry, VME_OFFSET(submap_entry));
13508
13509 DTRACE_VM6(submap_copy_slowly,
13510 vm_map_t, cow_sub_map_parent,
13511 vm_map_offset_t, vaddr,
13512 vm_map_t, map,
13513 vm_object_size_t, submap_entry_size,
13514 int, submap_entry->wired_count,
13515 int, sub_object->copy_strategy);
13516
13517 saved_submap_entry = submap_entry;
13518 version.main_timestamp = map->timestamp;
13519 vm_map_unlock(map); /* Increments timestamp by 1 */
13520 submap_entry = VM_MAP_ENTRY_NULL;
13521
13522 vm_object_lock(sub_object);
13523 kr = vm_object_copy_slowly(sub_object,
13524 submap_entry_offset,
13525 submap_entry_size,
13526 FALSE,
13527 ©_object);
13528 object_copied = TRUE;
13529 object_copied_offset = 0;
13530 /* 4k: account for extra offset in physical page */
13531 object_copied_offset += submap_entry_offset - vm_object_trunc_page(submap_entry_offset);
13532 object_copied_needs_copy = FALSE;
13533 vm_object_deallocate(sub_object);
13534
13535 vm_map_lock(map);
13536
13537 if (kr != KERN_SUCCESS &&
13538 kr != KERN_MEMORY_RESTART_COPY) {
13539 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13540 vm_map_unlock(cow_sub_map_parent);
13541 }
13542 if ((*real_map != map)
13543 && (*real_map != cow_sub_map_parent)) {
13544 vm_map_unlock(*real_map);
13545 }
13546 *real_map = map;
13547 vm_object_deallocate(copy_object);
13548 copy_object = VM_OBJECT_NULL;
13549 kernel_triage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_SUBMAP_COPY_SLOWLY_FAILED), 0 /* arg */);
13550 vm_map_lock_write_to_read(map);
13551 DTRACE_VM4(submap_copy_error_slowly,
13552 vm_object_t, sub_object,
13553 vm_object_offset_t, submap_entry_offset,
13554 vm_object_size_t, submap_entry_size,
13555 int, kr);
13556 vm_map_lookup_locked_copy_slowly_error++;
13557 return kr;
13558 }
13559
13560 if ((kr == KERN_SUCCESS) &&
13561 (version.main_timestamp + 1) == map->timestamp) {
13562 submap_entry = saved_submap_entry;
13563 } else {
13564 saved_submap_entry = NULL;
13565 old_start -= start_delta;
13566 old_end += end_delta;
13567 vm_object_deallocate(copy_object);
13568 copy_object = VM_OBJECT_NULL;
13569 vm_map_lock_write_to_read(map);
13570 vm_map_lookup_locked_copy_slowly_restart++;
13571 goto RetrySubMap;
13572 }
13573 vm_map_lookup_locked_copy_slowly_count++;
13574 vm_map_lookup_locked_copy_slowly_size += submap_entry_size;
13575 if (submap_entry_size > vm_map_lookup_locked_copy_slowly_max) {
13576 vm_map_lookup_locked_copy_slowly_max = submap_entry_size;
13577 }
13578 } else if (sub_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
13579 submap_entry_offset = VME_OFFSET(submap_entry);
13580 copy_object = VM_OBJECT_NULL;
13581 object_copied_offset = submap_entry_offset;
13582 object_copied_needs_copy = FALSE;
13583 DTRACE_VM6(submap_copy_strategically,
13584 vm_map_t, cow_sub_map_parent,
13585 vm_map_offset_t, vaddr,
13586 vm_map_t, map,
13587 vm_object_size_t, submap_entry_size,
13588 int, submap_entry->wired_count,
13589 int, sub_object->copy_strategy);
13590 kr = vm_object_copy_strategically(
13591 sub_object,
13592 submap_entry_offset,
13593 submap_entry->vme_end - submap_entry->vme_start,
13594 ©_object,
13595 &object_copied_offset,
13596 &object_copied_needs_copy);
13597 if (kr == KERN_MEMORY_RESTART_COPY) {
13598 old_start -= start_delta;
13599 old_end += end_delta;
13600 vm_object_deallocate(copy_object);
13601 copy_object = VM_OBJECT_NULL;
13602 vm_map_lock_write_to_read(map);
13603 vm_map_lookup_locked_copy_strategically_restart++;
13604 goto RetrySubMap;
13605 }
13606 if (kr != KERN_SUCCESS) {
13607 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13608 vm_map_unlock(cow_sub_map_parent);
13609 }
13610 if ((*real_map != map)
13611 && (*real_map != cow_sub_map_parent)) {
13612 vm_map_unlock(*real_map);
13613 }
13614 *real_map = map;
13615 vm_object_deallocate(copy_object);
13616 copy_object = VM_OBJECT_NULL;
13617 kernel_triage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_SUBMAP_COPY_STRAT_FAILED), 0 /* arg */);
13618 vm_map_lock_write_to_read(map);
13619 DTRACE_VM4(submap_copy_error_strategically,
13620 vm_object_t, sub_object,
13621 vm_object_offset_t, submap_entry_offset,
13622 vm_object_size_t, submap_entry_size,
13623 int, kr);
13624 vm_map_lookup_locked_copy_strategically_error++;
13625 return kr;
13626 }
13627 assert(copy_object != VM_OBJECT_NULL);
13628 assert(copy_object != sub_object);
13629 object_copied = TRUE;
13630 vm_map_lookup_locked_copy_strategically_count++;
13631 vm_map_lookup_locked_copy_strategically_size += submap_entry_size;
13632 if (submap_entry_size > vm_map_lookup_locked_copy_strategically_max) {
13633 vm_map_lookup_locked_copy_strategically_max = submap_entry_size;
13634 }
13635 } else {
13636 /* set up shadow object */
13637 object_copied = FALSE;
13638 copy_object = sub_object;
13639 vm_object_lock(sub_object);
13640 vm_object_reference_locked(sub_object);
13641 sub_object->shadowed = TRUE;
13642 vm_object_unlock(sub_object);
13643
13644 assert(submap_entry->wired_count == 0);
13645 submap_entry->needs_copy = TRUE;
13646
13647 prot = submap_entry->protection;
13648 assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
13649 prot = prot & ~VM_PROT_WRITE;
13650 assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
13651
13652 if (override_nx(old_map,
13653 VME_ALIAS(submap_entry))
13654 && prot) {
13655 prot |= VM_PROT_EXECUTE;
13656 }
13657
13658 vm_object_pmap_protect(
13659 sub_object,
13660 VME_OFFSET(submap_entry),
13661 submap_entry->vme_end -
13662 submap_entry->vme_start,
13663 (submap_entry->is_shared
13664 || map->mapped_in_other_pmaps) ?
13665 PMAP_NULL : map->pmap,
13666 VM_MAP_PAGE_SIZE(map),
13667 submap_entry->vme_start,
13668 prot);
13669 vm_map_lookup_locked_copy_shadow_count++;
13670 vm_map_lookup_locked_copy_shadow_size += submap_entry_size;
13671 if (submap_entry_size > vm_map_lookup_locked_copy_shadow_max) {
13672 vm_map_lookup_locked_copy_shadow_max = submap_entry_size;
13673 }
13674 }
13675
13676 /*
13677 * Adjust the fault offset to the submap entry.
13678 */
13679 copy_offset = (local_vaddr -
13680 submap_entry->vme_start +
13681 VME_OFFSET(submap_entry));
13682
13683 /* This works diffently than the */
13684 /* normal submap case. We go back */
13685 /* to the parent of the cow map and*/
13686 /* clip out the target portion of */
13687 /* the sub_map, substituting the */
13688 /* new copy object, */
13689
13690 subentry_protection = submap_entry->protection;
13691 subentry_max_protection = submap_entry->max_protection;
13692 subentry_no_copy_on_read = submap_entry->vme_no_copy_on_read;
13693 vm_map_unlock(map);
13694 submap_entry = NULL; /* not valid after map unlock */
13695
13696 local_start = old_start;
13697 local_end = old_end;
13698 map = cow_sub_map_parent;
13699 *var_map = cow_sub_map_parent;
13700 vaddr = cow_parent_vaddr;
13701 cow_sub_map_parent = NULL;
13702
13703 if (!vm_map_lookup_entry(map,
13704 vaddr, &entry)) {
13705 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13706 vm_map_unlock(cow_sub_map_parent);
13707 }
13708 if ((*real_map != map)
13709 && (*real_map != cow_sub_map_parent)) {
13710 vm_map_unlock(*real_map);
13711 }
13712 *real_map = map;
13713 vm_object_deallocate(
13714 copy_object);
13715 copy_object = VM_OBJECT_NULL;
13716 vm_map_lock_write_to_read(map);
13717 DTRACE_VM4(submap_lookup_post_unlock,
13718 uint64_t, (uint64_t)entry->vme_start,
13719 uint64_t, (uint64_t)entry->vme_end,
13720 vm_map_offset_t, vaddr,
13721 int, object_copied);
13722 return KERN_INVALID_ADDRESS;
13723 }
13724
13725 /* clip out the portion of space */
13726 /* mapped by the sub map which */
13727 /* corresponds to the underlying */
13728 /* object */
13729
13730 /*
13731 * Clip (and unnest) the smallest nested chunk
13732 * possible around the faulting address...
13733 */
13734 local_start = vaddr & ~(pmap_shared_region_size_min(map->pmap) - 1);
13735 local_end = local_start + pmap_shared_region_size_min(map->pmap);
13736 /*
13737 * ... but don't go beyond the "old_start" to "old_end"
13738 * range, to avoid spanning over another VM region
13739 * with a possibly different VM object and/or offset.
13740 */
13741 if (local_start < old_start) {
13742 local_start = old_start;
13743 }
13744 if (local_end > old_end) {
13745 local_end = old_end;
13746 }
13747 /*
13748 * Adjust copy_offset to the start of the range.
13749 */
13750 copy_offset -= (vaddr - local_start);
13751
13752 vm_map_clip_start(map, entry, local_start);
13753 vm_map_clip_end(map, entry, local_end);
13754 if (entry->is_sub_map) {
13755 /* unnesting was done when clipping */
13756 assert(!entry->use_pmap);
13757 }
13758
13759 /* substitute copy object for */
13760 /* shared map entry */
13761 vm_map_deallocate(VME_SUBMAP(entry));
13762 assert(!entry->iokit_acct);
13763 entry->is_sub_map = FALSE;
13764 entry->use_pmap = TRUE;
13765 VME_OBJECT_SET(entry, copy_object);
13766
13767 /* propagate the submap entry's protections */
13768 if (entry->protection != VM_PROT_READ) {
13769 /*
13770 * Someone has already altered the top entry's
13771 * protections via vm_protect(VM_PROT_COPY).
13772 * Respect these new values and ignore the
13773 * submap entry's protections.
13774 */
13775 } else {
13776 /*
13777 * Regular copy-on-write: propagate the submap
13778 * entry's protections to the top map entry.
13779 */
13780 entry->protection |= subentry_protection;
13781 }
13782 entry->max_protection |= subentry_max_protection;
13783 /* propagate no_copy_on_read */
13784 entry->vme_no_copy_on_read = subentry_no_copy_on_read;
13785
13786 if ((entry->protection & VM_PROT_WRITE) &&
13787 (entry->protection & VM_PROT_EXECUTE) &&
13788 #if XNU_TARGET_OS_OSX
13789 map->pmap != kernel_pmap &&
13790 (vm_map_cs_enforcement(map)
13791 #if __arm64__
13792 || !VM_MAP_IS_EXOTIC(map)
13793 #endif /* __arm64__ */
13794 ) &&
13795 #endif /* XNU_TARGET_OS_OSX */
13796 !(entry->used_for_jit) &&
13797 VM_MAP_POLICY_WX_STRIP_X(map)) {
13798 DTRACE_VM3(cs_wx,
13799 uint64_t, (uint64_t)entry->vme_start,
13800 uint64_t, (uint64_t)entry->vme_end,
13801 vm_prot_t, entry->protection);
13802 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
13803 proc_selfpid(),
13804 (current_task()->bsd_info
13805 ? proc_name_address(current_task()->bsd_info)
13806 : "?"),
13807 __FUNCTION__);
13808 entry->protection &= ~VM_PROT_EXECUTE;
13809 }
13810
13811 if (object_copied) {
13812 VME_OFFSET_SET(entry, local_start - old_start + object_copied_offset);
13813 entry->needs_copy = object_copied_needs_copy;
13814 entry->is_shared = FALSE;
13815 } else {
13816 assert(VME_OBJECT(entry) != VM_OBJECT_NULL);
13817 assert(VME_OBJECT(entry)->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
13818 assert(entry->wired_count == 0);
13819 VME_OFFSET_SET(entry, copy_offset);
13820 entry->needs_copy = TRUE;
13821 if (map != old_map) {
13822 entry->is_shared = TRUE;
13823 }
13824 }
13825 if (entry->inheritance == VM_INHERIT_SHARE) {
13826 entry->inheritance = VM_INHERIT_COPY;
13827 }
13828
13829 vm_map_lock_write_to_read(map);
13830 } else {
13831 if ((cow_sub_map_parent)
13832 && (cow_sub_map_parent != *real_map)
13833 && (cow_sub_map_parent != map)) {
13834 vm_map_unlock(cow_sub_map_parent);
13835 }
13836 entry = submap_entry;
13837 vaddr = local_vaddr;
13838 }
13839 }
13840
13841 /*
13842 * Check whether this task is allowed to have
13843 * this page.
13844 */
13845
13846 prot = entry->protection;
13847
13848 if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
13849 /*
13850 * HACK -- if not a stack, then allow execution
13851 */
13852 prot |= VM_PROT_EXECUTE;
13853 }
13854
13855 if (mask_protections) {
13856 fault_type &= prot;
13857 if (fault_type == VM_PROT_NONE) {
13858 goto protection_failure;
13859 }
13860 }
13861 if (((fault_type & prot) != fault_type)
13862 #if __arm64__
13863 /* prefetch abort in execute-only page */
13864 && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
13865 #elif defined(__x86_64__)
13866 /* Consider the UEXEC bit when handling an EXECUTE fault */
13867 && !((fault_type & VM_PROT_EXECUTE) && !(prot & VM_PROT_EXECUTE) && (prot & VM_PROT_UEXEC))
13868 #endif
13869 ) {
13870 protection_failure:
13871 if (*real_map != map) {
13872 vm_map_unlock(*real_map);
13873 }
13874 *real_map = map;
13875
13876 if ((fault_type & VM_PROT_EXECUTE) && prot) {
13877 log_stack_execution_failure((addr64_t)vaddr, prot);
13878 }
13879
13880 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
13881 DTRACE_VM3(prot_fault_detailed, vm_prot_t, fault_type, vm_prot_t, prot, void *, vaddr);
13882 /*
13883 * Noisy (esp. internally) and can be inferred from CrashReports. So OFF for now.
13884 *
13885 * kernel_triage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_PROTECTION_FAILURE), 0);
13886 */
13887 return KERN_PROTECTION_FAILURE;
13888 }
13889
13890 /*
13891 * If this page is not pageable, we have to get
13892 * it for all possible accesses.
13893 */
13894
13895 *wired = (entry->wired_count != 0);
13896 if (*wired) {
13897 fault_type = prot;
13898 }
13899
13900 /*
13901 * If the entry was copy-on-write, we either ...
13902 */
13903
13904 if (entry->needs_copy) {
13905 /*
13906 * If we want to write the page, we may as well
13907 * handle that now since we've got the map locked.
13908 *
13909 * If we don't need to write the page, we just
13910 * demote the permissions allowed.
13911 */
13912
13913 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
13914 /*
13915 * Make a new object, and place it in the
13916 * object chain. Note that no new references
13917 * have appeared -- one just moved from the
13918 * map to the new object.
13919 */
13920
13921 if (vm_map_lock_read_to_write(map)) {
13922 vm_map_lock_read(map);
13923 goto RetryLookup;
13924 }
13925
13926 if (VME_OBJECT(entry)->shadowed == FALSE) {
13927 vm_object_lock(VME_OBJECT(entry));
13928 VME_OBJECT(entry)->shadowed = TRUE;
13929 vm_object_unlock(VME_OBJECT(entry));
13930 }
13931 VME_OBJECT_SHADOW(entry,
13932 (vm_map_size_t) (entry->vme_end -
13933 entry->vme_start));
13934 entry->needs_copy = FALSE;
13935
13936 vm_map_lock_write_to_read(map);
13937 }
13938 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
13939 /*
13940 * We're attempting to read a copy-on-write
13941 * page -- don't allow writes.
13942 */
13943
13944 prot &= (~VM_PROT_WRITE);
13945 }
13946 }
13947
13948 if (submap_needed_copy && (prot & VM_PROT_WRITE)) {
13949 /*
13950 * We went through a "needs_copy" submap without triggering
13951 * a copy, so granting write access to the page would bypass
13952 * that submap's "needs_copy".
13953 */
13954 assert(!(fault_type & VM_PROT_WRITE));
13955 assert(!*wired);
13956 assert(!force_copy);
13957 // printf("FBDP %d[%s] submap_needed_copy for %p 0x%llx\n", proc_selfpid(), proc_name_address(current_task()->bsd_info), map, vaddr);
13958 prot &= ~VM_PROT_WRITE;
13959 }
13960
13961 /*
13962 * Create an object if necessary.
13963 */
13964 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
13965 if (vm_map_lock_read_to_write(map)) {
13966 vm_map_lock_read(map);
13967 goto RetryLookup;
13968 }
13969
13970 VME_OBJECT_SET(entry,
13971 vm_object_allocate(
13972 (vm_map_size_t)(entry->vme_end -
13973 entry->vme_start)));
13974 VME_OFFSET_SET(entry, 0);
13975 assert(entry->use_pmap);
13976 vm_map_lock_write_to_read(map);
13977 }
13978
13979 /*
13980 * Return the object/offset from this entry. If the entry
13981 * was copy-on-write or empty, it has been fixed up. Also
13982 * return the protection.
13983 */
13984
13985 *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
13986 *object = VME_OBJECT(entry);
13987 *out_prot = prot;
13988 KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_MAP_LOOKUP_OBJECT), VM_KERNEL_UNSLIDE_OR_PERM(*object), (unsigned long) VME_ALIAS(entry), 0, 0);
13989
13990 if (fault_info) {
13991 fault_info->interruptible = THREAD_UNINT; /* for now... */
13992 /* ... the caller will change "interruptible" if needed */
13993 fault_info->cluster_size = 0;
13994 fault_info->user_tag = VME_ALIAS(entry);
13995 fault_info->pmap_options = 0;
13996 if (entry->iokit_acct ||
13997 (!entry->is_sub_map && !entry->use_pmap)) {
13998 fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
13999 }
14000 fault_info->behavior = entry->behavior;
14001 fault_info->lo_offset = VME_OFFSET(entry);
14002 fault_info->hi_offset =
14003 (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
14004 fault_info->no_cache = entry->no_cache;
14005 fault_info->stealth = FALSE;
14006 fault_info->io_sync = FALSE;
14007 if (entry->used_for_jit ||
14008 entry->vme_resilient_codesign) {
14009 fault_info->cs_bypass = TRUE;
14010 } else {
14011 fault_info->cs_bypass = FALSE;
14012 }
14013 fault_info->pmap_cs_associated = FALSE;
14014 #if CONFIG_PMAP_CS
14015 if (entry->pmap_cs_associated) {
14016 /*
14017 * The pmap layer will validate this page
14018 * before allowing it to be executed from.
14019 */
14020 fault_info->pmap_cs_associated = TRUE;
14021 }
14022 #endif /* CONFIG_PMAP_CS */
14023 fault_info->mark_zf_absent = FALSE;
14024 fault_info->batch_pmap_op = FALSE;
14025 fault_info->resilient_media = entry->vme_resilient_media;
14026 fault_info->no_copy_on_read = entry->vme_no_copy_on_read;
14027 if (entry->translated_allow_execute) {
14028 fault_info->pmap_options |= PMAP_OPTIONS_TRANSLATED_ALLOW_EXECUTE;
14029 }
14030 }
14031
14032 /*
14033 * Lock the object to prevent it from disappearing
14034 */
14035 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE) {
14036 if (contended == NULL) {
14037 vm_object_lock(*object);
14038 } else {
14039 *contended = vm_object_lock_check_contended(*object);
14040 }
14041 } else {
14042 vm_object_lock_shared(*object);
14043 }
14044
14045 /*
14046 * Save the version number
14047 */
14048
14049 out_version->main_timestamp = map->timestamp;
14050
14051 return KERN_SUCCESS;
14052 }
14053
14054
14055 /*
14056 * vm_map_verify:
14057 *
14058 * Verifies that the map in question has not changed
14059 * since the given version. The map has to be locked
14060 * ("shared" mode is fine) before calling this function
14061 * and it will be returned locked too.
14062 */
14063 boolean_t
vm_map_verify(vm_map_t map,vm_map_version_t * version)14064 vm_map_verify(
14065 vm_map_t map,
14066 vm_map_version_t *version) /* REF */
14067 {
14068 boolean_t result;
14069
14070 vm_map_lock_assert_held(map);
14071 result = (map->timestamp == version->main_timestamp);
14072
14073 return result;
14074 }
14075
14076 /*
14077 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
14078 * Goes away after regular vm_region_recurse function migrates to
14079 * 64 bits
14080 * vm_region_recurse: A form of vm_region which follows the
14081 * submaps in a target map
14082 *
14083 */
14084
14085 kern_return_t
vm_map_region_recurse_64(vm_map_t map,vm_map_offset_t * address,vm_map_size_t * size,natural_t * nesting_depth,vm_region_submap_info_64_t submap_info,mach_msg_type_number_t * count)14086 vm_map_region_recurse_64(
14087 vm_map_t map,
14088 vm_map_offset_t *address, /* IN/OUT */
14089 vm_map_size_t *size, /* OUT */
14090 natural_t *nesting_depth, /* IN/OUT */
14091 vm_region_submap_info_64_t submap_info, /* IN/OUT */
14092 mach_msg_type_number_t *count) /* IN/OUT */
14093 {
14094 mach_msg_type_number_t original_count;
14095 vm_region_extended_info_data_t extended;
14096 vm_map_entry_t tmp_entry;
14097 vm_map_offset_t user_address;
14098 unsigned int user_max_depth;
14099
14100 /*
14101 * "curr_entry" is the VM map entry preceding or including the
14102 * address we're looking for.
14103 * "curr_map" is the map or sub-map containing "curr_entry".
14104 * "curr_address" is the equivalent of the top map's "user_address"
14105 * in the current map.
14106 * "curr_offset" is the cumulated offset of "curr_map" in the
14107 * target task's address space.
14108 * "curr_depth" is the depth of "curr_map" in the chain of
14109 * sub-maps.
14110 *
14111 * "curr_max_below" and "curr_max_above" limit the range (around
14112 * "curr_address") we should take into account in the current (sub)map.
14113 * They limit the range to what's visible through the map entries
14114 * we've traversed from the top map to the current map.
14115 *
14116 */
14117 vm_map_entry_t curr_entry;
14118 vm_map_address_t curr_address;
14119 vm_map_offset_t curr_offset;
14120 vm_map_t curr_map;
14121 unsigned int curr_depth;
14122 vm_map_offset_t curr_max_below, curr_max_above;
14123 vm_map_offset_t curr_skip;
14124
14125 /*
14126 * "next_" is the same as "curr_" but for the VM region immediately
14127 * after the address we're looking for. We need to keep track of this
14128 * too because we want to return info about that region if the
14129 * address we're looking for is not mapped.
14130 */
14131 vm_map_entry_t next_entry;
14132 vm_map_offset_t next_offset;
14133 vm_map_offset_t next_address;
14134 vm_map_t next_map;
14135 unsigned int next_depth;
14136 vm_map_offset_t next_max_below, next_max_above;
14137 vm_map_offset_t next_skip;
14138
14139 boolean_t look_for_pages;
14140 vm_region_submap_short_info_64_t short_info;
14141 boolean_t do_region_footprint;
14142 int effective_page_size, effective_page_shift;
14143 boolean_t submap_needed_copy;
14144
14145 if (map == VM_MAP_NULL) {
14146 /* no address space to work on */
14147 return KERN_INVALID_ARGUMENT;
14148 }
14149
14150 effective_page_shift = vm_self_region_page_shift(map);
14151 effective_page_size = (1 << effective_page_shift);
14152
14153 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
14154 /*
14155 * "info" structure is not big enough and
14156 * would overflow
14157 */
14158 return KERN_INVALID_ARGUMENT;
14159 }
14160
14161 do_region_footprint = task_self_region_footprint();
14162 original_count = *count;
14163
14164 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
14165 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
14166 look_for_pages = FALSE;
14167 short_info = (vm_region_submap_short_info_64_t) submap_info;
14168 submap_info = NULL;
14169 } else {
14170 look_for_pages = TRUE;
14171 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
14172 short_info = NULL;
14173
14174 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
14175 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
14176 }
14177 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
14178 *count = VM_REGION_SUBMAP_INFO_V2_COUNT_64;
14179 }
14180 }
14181
14182 user_address = *address;
14183 user_max_depth = *nesting_depth;
14184 submap_needed_copy = FALSE;
14185
14186 if (not_in_kdp) {
14187 vm_map_lock_read(map);
14188 }
14189
14190 recurse_again:
14191 curr_entry = NULL;
14192 curr_map = map;
14193 curr_address = user_address;
14194 curr_offset = 0;
14195 curr_skip = 0;
14196 curr_depth = 0;
14197 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
14198 curr_max_below = curr_address;
14199
14200 next_entry = NULL;
14201 next_map = NULL;
14202 next_address = 0;
14203 next_offset = 0;
14204 next_skip = 0;
14205 next_depth = 0;
14206 next_max_above = (vm_map_offset_t) -1;
14207 next_max_below = (vm_map_offset_t) -1;
14208
14209 for (;;) {
14210 if (vm_map_lookup_entry(curr_map,
14211 curr_address,
14212 &tmp_entry)) {
14213 /* tmp_entry contains the address we're looking for */
14214 curr_entry = tmp_entry;
14215 } else {
14216 vm_map_offset_t skip;
14217 /*
14218 * The address is not mapped. "tmp_entry" is the
14219 * map entry preceding the address. We want the next
14220 * one, if it exists.
14221 */
14222 curr_entry = tmp_entry->vme_next;
14223
14224 if (curr_entry == vm_map_to_entry(curr_map) ||
14225 (curr_entry->vme_start >=
14226 curr_address + curr_max_above)) {
14227 /* no next entry at this level: stop looking */
14228 if (not_in_kdp) {
14229 vm_map_unlock_read(curr_map);
14230 }
14231 curr_entry = NULL;
14232 curr_map = NULL;
14233 curr_skip = 0;
14234 curr_offset = 0;
14235 curr_depth = 0;
14236 curr_max_above = 0;
14237 curr_max_below = 0;
14238 break;
14239 }
14240
14241 /* adjust current address and offset */
14242 skip = curr_entry->vme_start - curr_address;
14243 curr_address = curr_entry->vme_start;
14244 curr_skip += skip;
14245 curr_offset += skip;
14246 curr_max_above -= skip;
14247 curr_max_below = 0;
14248 }
14249
14250 /*
14251 * Is the next entry at this level closer to the address (or
14252 * deeper in the submap chain) than the one we had
14253 * so far ?
14254 */
14255 tmp_entry = curr_entry->vme_next;
14256 if (tmp_entry == vm_map_to_entry(curr_map)) {
14257 /* no next entry at this level */
14258 } else if (tmp_entry->vme_start >=
14259 curr_address + curr_max_above) {
14260 /*
14261 * tmp_entry is beyond the scope of what we mapped of
14262 * this submap in the upper level: ignore it.
14263 */
14264 } else if ((next_entry == NULL) ||
14265 (tmp_entry->vme_start + curr_offset <=
14266 next_entry->vme_start + next_offset)) {
14267 /*
14268 * We didn't have a "next_entry" or this one is
14269 * closer to the address we're looking for:
14270 * use this "tmp_entry" as the new "next_entry".
14271 */
14272 if (next_entry != NULL) {
14273 /* unlock the last "next_map" */
14274 if (next_map != curr_map && not_in_kdp) {
14275 vm_map_unlock_read(next_map);
14276 }
14277 }
14278 next_entry = tmp_entry;
14279 next_map = curr_map;
14280 next_depth = curr_depth;
14281 next_address = next_entry->vme_start;
14282 next_skip = curr_skip;
14283 next_skip += (next_address - curr_address);
14284 next_offset = curr_offset;
14285 next_offset += (next_address - curr_address);
14286 next_max_above = MIN(next_max_above, curr_max_above);
14287 next_max_above = MIN(next_max_above,
14288 next_entry->vme_end - next_address);
14289 next_max_below = MIN(next_max_below, curr_max_below);
14290 next_max_below = MIN(next_max_below,
14291 next_address - next_entry->vme_start);
14292 }
14293
14294 /*
14295 * "curr_max_{above,below}" allow us to keep track of the
14296 * portion of the submap that is actually mapped at this level:
14297 * the rest of that submap is irrelevant to us, since it's not
14298 * mapped here.
14299 * The relevant portion of the map starts at
14300 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
14301 */
14302 curr_max_above = MIN(curr_max_above,
14303 curr_entry->vme_end - curr_address);
14304 curr_max_below = MIN(curr_max_below,
14305 curr_address - curr_entry->vme_start);
14306
14307 if (!curr_entry->is_sub_map ||
14308 curr_depth >= user_max_depth) {
14309 /*
14310 * We hit a leaf map or we reached the maximum depth
14311 * we could, so stop looking. Keep the current map
14312 * locked.
14313 */
14314 break;
14315 }
14316
14317 /*
14318 * Get down to the next submap level.
14319 */
14320
14321 if (curr_entry->needs_copy) {
14322 /* everything below this is effectively copy-on-write */
14323 submap_needed_copy = TRUE;
14324 }
14325
14326 /*
14327 * Lock the next level and unlock the current level,
14328 * unless we need to keep it locked to access the "next_entry"
14329 * later.
14330 */
14331 if (not_in_kdp) {
14332 vm_map_lock_read(VME_SUBMAP(curr_entry));
14333 }
14334 if (curr_map == next_map) {
14335 /* keep "next_map" locked in case we need it */
14336 } else {
14337 /* release this map */
14338 if (not_in_kdp) {
14339 vm_map_unlock_read(curr_map);
14340 }
14341 }
14342
14343 /*
14344 * Adjust the offset. "curr_entry" maps the submap
14345 * at relative address "curr_entry->vme_start" in the
14346 * curr_map but skips the first "VME_OFFSET(curr_entry)"
14347 * bytes of the submap.
14348 * "curr_offset" always represents the offset of a virtual
14349 * address in the curr_map relative to the absolute address
14350 * space (i.e. the top-level VM map).
14351 */
14352 curr_offset +=
14353 (VME_OFFSET(curr_entry) - curr_entry->vme_start);
14354 curr_address = user_address + curr_offset;
14355 /* switch to the submap */
14356 curr_map = VME_SUBMAP(curr_entry);
14357 curr_depth++;
14358 curr_entry = NULL;
14359 }
14360
14361 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
14362 // so probably should be a real 32b ID vs. ptr.
14363 // Current users just check for equality
14364
14365 if (curr_entry == NULL) {
14366 /* no VM region contains the address... */
14367
14368 if (do_region_footprint && /* we want footprint numbers */
14369 next_entry == NULL && /* & there are no more regions */
14370 /* & we haven't already provided our fake region: */
14371 user_address <= vm_map_last_entry(map)->vme_end) {
14372 ledger_amount_t ledger_resident, ledger_compressed;
14373
14374 /*
14375 * Add a fake memory region to account for
14376 * purgeable and/or ledger-tagged memory that
14377 * counts towards this task's memory footprint,
14378 * i.e. the resident/compressed pages of non-volatile
14379 * objects owned by that task.
14380 */
14381 task_ledgers_footprint(map->pmap->ledger,
14382 &ledger_resident,
14383 &ledger_compressed);
14384 if (ledger_resident + ledger_compressed == 0) {
14385 /* no purgeable memory usage to report */
14386 return KERN_INVALID_ADDRESS;
14387 }
14388 /* fake region to show nonvolatile footprint */
14389 if (look_for_pages) {
14390 submap_info->protection = VM_PROT_DEFAULT;
14391 submap_info->max_protection = VM_PROT_DEFAULT;
14392 submap_info->inheritance = VM_INHERIT_DEFAULT;
14393 submap_info->offset = 0;
14394 submap_info->user_tag = -1;
14395 submap_info->pages_resident = (unsigned int) (ledger_resident / effective_page_size);
14396 submap_info->pages_shared_now_private = 0;
14397 submap_info->pages_swapped_out = (unsigned int) (ledger_compressed / effective_page_size);
14398 submap_info->pages_dirtied = submap_info->pages_resident;
14399 submap_info->ref_count = 1;
14400 submap_info->shadow_depth = 0;
14401 submap_info->external_pager = 0;
14402 submap_info->share_mode = SM_PRIVATE;
14403 if (submap_needed_copy) {
14404 submap_info->share_mode = SM_COW;
14405 }
14406 submap_info->is_submap = 0;
14407 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
14408 submap_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
14409 submap_info->user_wired_count = 0;
14410 submap_info->pages_reusable = 0;
14411 } else {
14412 short_info->user_tag = -1;
14413 short_info->offset = 0;
14414 short_info->protection = VM_PROT_DEFAULT;
14415 short_info->inheritance = VM_INHERIT_DEFAULT;
14416 short_info->max_protection = VM_PROT_DEFAULT;
14417 short_info->behavior = VM_BEHAVIOR_DEFAULT;
14418 short_info->user_wired_count = 0;
14419 short_info->is_submap = 0;
14420 short_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
14421 short_info->external_pager = 0;
14422 short_info->shadow_depth = 0;
14423 short_info->share_mode = SM_PRIVATE;
14424 if (submap_needed_copy) {
14425 short_info->share_mode = SM_COW;
14426 }
14427 short_info->ref_count = 1;
14428 }
14429 *nesting_depth = 0;
14430 *size = (vm_map_size_t) (ledger_resident + ledger_compressed);
14431 // *address = user_address;
14432 *address = vm_map_last_entry(map)->vme_end;
14433 return KERN_SUCCESS;
14434 }
14435
14436 if (next_entry == NULL) {
14437 /* ... and no VM region follows it either */
14438 return KERN_INVALID_ADDRESS;
14439 }
14440 /* ... gather info about the next VM region */
14441 curr_entry = next_entry;
14442 curr_map = next_map; /* still locked ... */
14443 curr_address = next_address;
14444 curr_skip = next_skip;
14445 curr_offset = next_offset;
14446 curr_depth = next_depth;
14447 curr_max_above = next_max_above;
14448 curr_max_below = next_max_below;
14449 } else {
14450 /* we won't need "next_entry" after all */
14451 if (next_entry != NULL) {
14452 /* release "next_map" */
14453 if (next_map != curr_map && not_in_kdp) {
14454 vm_map_unlock_read(next_map);
14455 }
14456 }
14457 }
14458 next_entry = NULL;
14459 next_map = NULL;
14460 next_offset = 0;
14461 next_skip = 0;
14462 next_depth = 0;
14463 next_max_below = -1;
14464 next_max_above = -1;
14465
14466 if (curr_entry->is_sub_map &&
14467 curr_depth < user_max_depth) {
14468 /*
14469 * We're not as deep as we could be: we must have
14470 * gone back up after not finding anything mapped
14471 * below the original top-level map entry's.
14472 * Let's move "curr_address" forward and recurse again.
14473 */
14474 user_address = curr_address;
14475 goto recurse_again;
14476 }
14477
14478 *nesting_depth = curr_depth;
14479 *size = curr_max_above + curr_max_below;
14480 *address = user_address + curr_skip - curr_max_below;
14481
14482 if (look_for_pages) {
14483 submap_info->user_tag = VME_ALIAS(curr_entry);
14484 submap_info->offset = VME_OFFSET(curr_entry);
14485 submap_info->protection = curr_entry->protection;
14486 submap_info->inheritance = curr_entry->inheritance;
14487 submap_info->max_protection = curr_entry->max_protection;
14488 submap_info->behavior = curr_entry->behavior;
14489 submap_info->user_wired_count = curr_entry->user_wired_count;
14490 submap_info->is_submap = curr_entry->is_sub_map;
14491 submap_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
14492 } else {
14493 short_info->user_tag = VME_ALIAS(curr_entry);
14494 short_info->offset = VME_OFFSET(curr_entry);
14495 short_info->protection = curr_entry->protection;
14496 short_info->inheritance = curr_entry->inheritance;
14497 short_info->max_protection = curr_entry->max_protection;
14498 short_info->behavior = curr_entry->behavior;
14499 short_info->user_wired_count = curr_entry->user_wired_count;
14500 short_info->is_submap = curr_entry->is_sub_map;
14501 short_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
14502 }
14503
14504 extended.pages_resident = 0;
14505 extended.pages_swapped_out = 0;
14506 extended.pages_shared_now_private = 0;
14507 extended.pages_dirtied = 0;
14508 extended.pages_reusable = 0;
14509 extended.external_pager = 0;
14510 extended.shadow_depth = 0;
14511 extended.share_mode = SM_EMPTY;
14512 extended.ref_count = 0;
14513
14514 if (not_in_kdp) {
14515 if (!curr_entry->is_sub_map) {
14516 vm_map_offset_t range_start, range_end;
14517 range_start = MAX((curr_address - curr_max_below),
14518 curr_entry->vme_start);
14519 range_end = MIN((curr_address + curr_max_above),
14520 curr_entry->vme_end);
14521 vm_map_region_walk(curr_map,
14522 range_start,
14523 curr_entry,
14524 (VME_OFFSET(curr_entry) +
14525 (range_start -
14526 curr_entry->vme_start)),
14527 range_end - range_start,
14528 &extended,
14529 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
14530 if (extended.external_pager &&
14531 extended.ref_count == 2 &&
14532 extended.share_mode == SM_SHARED) {
14533 extended.share_mode = SM_PRIVATE;
14534 }
14535 if (submap_needed_copy) {
14536 extended.share_mode = SM_COW;
14537 }
14538 } else {
14539 if (curr_entry->use_pmap) {
14540 extended.share_mode = SM_TRUESHARED;
14541 } else {
14542 extended.share_mode = SM_PRIVATE;
14543 }
14544 extended.ref_count = os_ref_get_count_raw(&VME_SUBMAP(curr_entry)->map_refcnt);
14545 }
14546 }
14547
14548 if (look_for_pages) {
14549 submap_info->pages_resident = extended.pages_resident;
14550 submap_info->pages_swapped_out = extended.pages_swapped_out;
14551 submap_info->pages_shared_now_private =
14552 extended.pages_shared_now_private;
14553 submap_info->pages_dirtied = extended.pages_dirtied;
14554 submap_info->external_pager = extended.external_pager;
14555 submap_info->shadow_depth = extended.shadow_depth;
14556 submap_info->share_mode = extended.share_mode;
14557 submap_info->ref_count = extended.ref_count;
14558
14559 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
14560 submap_info->pages_reusable = extended.pages_reusable;
14561 }
14562 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
14563 submap_info->object_id_full = (vm_object_id_t) (VME_OBJECT(curr_entry) != NULL) ? VM_KERNEL_ADDRPERM(VME_OBJECT(curr_entry)) : 0ULL;
14564 }
14565 } else {
14566 short_info->external_pager = extended.external_pager;
14567 short_info->shadow_depth = extended.shadow_depth;
14568 short_info->share_mode = extended.share_mode;
14569 short_info->ref_count = extended.ref_count;
14570 }
14571
14572 if (not_in_kdp) {
14573 vm_map_unlock_read(curr_map);
14574 }
14575
14576 return KERN_SUCCESS;
14577 }
14578
14579 /*
14580 * vm_region:
14581 *
14582 * User call to obtain information about a region in
14583 * a task's address map. Currently, only one flavor is
14584 * supported.
14585 *
14586 * XXX The reserved and behavior fields cannot be filled
14587 * in until the vm merge from the IK is completed, and
14588 * vm_reserve is implemented.
14589 */
14590
14591 kern_return_t
vm_map_region(vm_map_t map,vm_map_offset_t * address,vm_map_size_t * size,vm_region_flavor_t flavor,vm_region_info_t info,mach_msg_type_number_t * count,mach_port_t * object_name)14592 vm_map_region(
14593 vm_map_t map,
14594 vm_map_offset_t *address, /* IN/OUT */
14595 vm_map_size_t *size, /* OUT */
14596 vm_region_flavor_t flavor, /* IN */
14597 vm_region_info_t info, /* OUT */
14598 mach_msg_type_number_t *count, /* IN/OUT */
14599 mach_port_t *object_name) /* OUT */
14600 {
14601 vm_map_entry_t tmp_entry;
14602 vm_map_entry_t entry;
14603 vm_map_offset_t start;
14604
14605 if (map == VM_MAP_NULL) {
14606 return KERN_INVALID_ARGUMENT;
14607 }
14608
14609 switch (flavor) {
14610 case VM_REGION_BASIC_INFO:
14611 /* legacy for old 32-bit objects info */
14612 {
14613 vm_region_basic_info_t basic;
14614
14615 if (*count < VM_REGION_BASIC_INFO_COUNT) {
14616 return KERN_INVALID_ARGUMENT;
14617 }
14618
14619 basic = (vm_region_basic_info_t) info;
14620 *count = VM_REGION_BASIC_INFO_COUNT;
14621
14622 vm_map_lock_read(map);
14623
14624 start = *address;
14625 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14626 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14627 vm_map_unlock_read(map);
14628 return KERN_INVALID_ADDRESS;
14629 }
14630 } else {
14631 entry = tmp_entry;
14632 }
14633
14634 start = entry->vme_start;
14635
14636 basic->offset = (uint32_t)VME_OFFSET(entry);
14637 basic->protection = entry->protection;
14638 basic->inheritance = entry->inheritance;
14639 basic->max_protection = entry->max_protection;
14640 basic->behavior = entry->behavior;
14641 basic->user_wired_count = entry->user_wired_count;
14642 basic->reserved = entry->is_sub_map;
14643 *address = start;
14644 *size = (entry->vme_end - start);
14645
14646 if (object_name) {
14647 *object_name = IP_NULL;
14648 }
14649 if (entry->is_sub_map) {
14650 basic->shared = FALSE;
14651 } else {
14652 basic->shared = entry->is_shared;
14653 }
14654
14655 vm_map_unlock_read(map);
14656 return KERN_SUCCESS;
14657 }
14658
14659 case VM_REGION_BASIC_INFO_64:
14660 {
14661 vm_region_basic_info_64_t basic;
14662
14663 if (*count < VM_REGION_BASIC_INFO_COUNT_64) {
14664 return KERN_INVALID_ARGUMENT;
14665 }
14666
14667 basic = (vm_region_basic_info_64_t) info;
14668 *count = VM_REGION_BASIC_INFO_COUNT_64;
14669
14670 vm_map_lock_read(map);
14671
14672 start = *address;
14673 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14674 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14675 vm_map_unlock_read(map);
14676 return KERN_INVALID_ADDRESS;
14677 }
14678 } else {
14679 entry = tmp_entry;
14680 }
14681
14682 start = entry->vme_start;
14683
14684 basic->offset = VME_OFFSET(entry);
14685 basic->protection = entry->protection;
14686 basic->inheritance = entry->inheritance;
14687 basic->max_protection = entry->max_protection;
14688 basic->behavior = entry->behavior;
14689 basic->user_wired_count = entry->user_wired_count;
14690 basic->reserved = entry->is_sub_map;
14691 *address = start;
14692 *size = (entry->vme_end - start);
14693
14694 if (object_name) {
14695 *object_name = IP_NULL;
14696 }
14697 if (entry->is_sub_map) {
14698 basic->shared = FALSE;
14699 } else {
14700 basic->shared = entry->is_shared;
14701 }
14702
14703 vm_map_unlock_read(map);
14704 return KERN_SUCCESS;
14705 }
14706 case VM_REGION_EXTENDED_INFO:
14707 if (*count < VM_REGION_EXTENDED_INFO_COUNT) {
14708 return KERN_INVALID_ARGUMENT;
14709 }
14710 OS_FALLTHROUGH;
14711 case VM_REGION_EXTENDED_INFO__legacy:
14712 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy) {
14713 return KERN_INVALID_ARGUMENT;
14714 }
14715
14716 {
14717 vm_region_extended_info_t extended;
14718 mach_msg_type_number_t original_count;
14719 int effective_page_size, effective_page_shift;
14720
14721 extended = (vm_region_extended_info_t) info;
14722
14723 effective_page_shift = vm_self_region_page_shift(map);
14724 effective_page_size = (1 << effective_page_shift);
14725
14726 vm_map_lock_read(map);
14727
14728 start = *address;
14729 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14730 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14731 vm_map_unlock_read(map);
14732 return KERN_INVALID_ADDRESS;
14733 }
14734 } else {
14735 entry = tmp_entry;
14736 }
14737 start = entry->vme_start;
14738
14739 extended->protection = entry->protection;
14740 extended->user_tag = VME_ALIAS(entry);
14741 extended->pages_resident = 0;
14742 extended->pages_swapped_out = 0;
14743 extended->pages_shared_now_private = 0;
14744 extended->pages_dirtied = 0;
14745 extended->external_pager = 0;
14746 extended->shadow_depth = 0;
14747
14748 original_count = *count;
14749 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
14750 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
14751 } else {
14752 extended->pages_reusable = 0;
14753 *count = VM_REGION_EXTENDED_INFO_COUNT;
14754 }
14755
14756 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
14757
14758 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) {
14759 extended->share_mode = SM_PRIVATE;
14760 }
14761
14762 if (object_name) {
14763 *object_name = IP_NULL;
14764 }
14765 *address = start;
14766 *size = (entry->vme_end - start);
14767
14768 vm_map_unlock_read(map);
14769 return KERN_SUCCESS;
14770 }
14771 case VM_REGION_TOP_INFO:
14772 {
14773 vm_region_top_info_t top;
14774
14775 if (*count < VM_REGION_TOP_INFO_COUNT) {
14776 return KERN_INVALID_ARGUMENT;
14777 }
14778
14779 top = (vm_region_top_info_t) info;
14780 *count = VM_REGION_TOP_INFO_COUNT;
14781
14782 vm_map_lock_read(map);
14783
14784 start = *address;
14785 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14786 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14787 vm_map_unlock_read(map);
14788 return KERN_INVALID_ADDRESS;
14789 }
14790 } else {
14791 entry = tmp_entry;
14792 }
14793 start = entry->vme_start;
14794
14795 top->private_pages_resident = 0;
14796 top->shared_pages_resident = 0;
14797
14798 vm_map_region_top_walk(entry, top);
14799
14800 if (object_name) {
14801 *object_name = IP_NULL;
14802 }
14803 *address = start;
14804 *size = (entry->vme_end - start);
14805
14806 vm_map_unlock_read(map);
14807 return KERN_SUCCESS;
14808 }
14809 default:
14810 return KERN_INVALID_ARGUMENT;
14811 }
14812 }
14813
14814 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
14815 MIN((entry_size), \
14816 ((obj)->all_reusable ? \
14817 (obj)->wired_page_count : \
14818 (obj)->resident_page_count - (obj)->reusable_page_count))
14819
14820 void
vm_map_region_top_walk(vm_map_entry_t entry,vm_region_top_info_t top)14821 vm_map_region_top_walk(
14822 vm_map_entry_t entry,
14823 vm_region_top_info_t top)
14824 {
14825 if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
14826 top->share_mode = SM_EMPTY;
14827 top->ref_count = 0;
14828 top->obj_id = 0;
14829 return;
14830 }
14831
14832 {
14833 struct vm_object *obj, *tmp_obj;
14834 int ref_count;
14835 uint32_t entry_size;
14836
14837 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
14838
14839 obj = VME_OBJECT(entry);
14840
14841 vm_object_lock(obj);
14842
14843 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14844 ref_count--;
14845 }
14846
14847 assert(obj->reusable_page_count <= obj->resident_page_count);
14848 if (obj->shadow) {
14849 if (ref_count == 1) {
14850 top->private_pages_resident =
14851 OBJ_RESIDENT_COUNT(obj, entry_size);
14852 } else {
14853 top->shared_pages_resident =
14854 OBJ_RESIDENT_COUNT(obj, entry_size);
14855 }
14856 top->ref_count = ref_count;
14857 top->share_mode = SM_COW;
14858
14859 while ((tmp_obj = obj->shadow)) {
14860 vm_object_lock(tmp_obj);
14861 vm_object_unlock(obj);
14862 obj = tmp_obj;
14863
14864 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14865 ref_count--;
14866 }
14867
14868 assert(obj->reusable_page_count <= obj->resident_page_count);
14869 top->shared_pages_resident +=
14870 OBJ_RESIDENT_COUNT(obj, entry_size);
14871 top->ref_count += ref_count - 1;
14872 }
14873 } else {
14874 if (entry->superpage_size) {
14875 top->share_mode = SM_LARGE_PAGE;
14876 top->shared_pages_resident = 0;
14877 top->private_pages_resident = entry_size;
14878 } else if (entry->needs_copy) {
14879 top->share_mode = SM_COW;
14880 top->shared_pages_resident =
14881 OBJ_RESIDENT_COUNT(obj, entry_size);
14882 } else {
14883 if (ref_count == 1 ||
14884 (ref_count == 2 && obj->named)) {
14885 top->share_mode = SM_PRIVATE;
14886 top->private_pages_resident =
14887 OBJ_RESIDENT_COUNT(obj,
14888 entry_size);
14889 } else {
14890 top->share_mode = SM_SHARED;
14891 top->shared_pages_resident =
14892 OBJ_RESIDENT_COUNT(obj,
14893 entry_size);
14894 }
14895 }
14896 top->ref_count = ref_count;
14897 }
14898 /* XXX K64: obj_id will be truncated */
14899 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
14900
14901 vm_object_unlock(obj);
14902 }
14903 }
14904
14905 void
vm_map_region_walk(vm_map_t map,vm_map_offset_t va,vm_map_entry_t entry,vm_object_offset_t offset,vm_object_size_t range,vm_region_extended_info_t extended,boolean_t look_for_pages,mach_msg_type_number_t count)14906 vm_map_region_walk(
14907 vm_map_t map,
14908 vm_map_offset_t va,
14909 vm_map_entry_t entry,
14910 vm_object_offset_t offset,
14911 vm_object_size_t range,
14912 vm_region_extended_info_t extended,
14913 boolean_t look_for_pages,
14914 mach_msg_type_number_t count)
14915 {
14916 struct vm_object *obj, *tmp_obj;
14917 vm_map_offset_t last_offset;
14918 int i;
14919 int ref_count;
14920 struct vm_object *shadow_object;
14921 unsigned short shadow_depth;
14922 boolean_t do_region_footprint;
14923 int effective_page_size, effective_page_shift;
14924 vm_map_offset_t effective_page_mask;
14925
14926 do_region_footprint = task_self_region_footprint();
14927
14928 if ((VME_OBJECT(entry) == 0) ||
14929 (entry->is_sub_map) ||
14930 (VME_OBJECT(entry)->phys_contiguous &&
14931 !entry->superpage_size)) {
14932 extended->share_mode = SM_EMPTY;
14933 extended->ref_count = 0;
14934 return;
14935 }
14936
14937 if (entry->superpage_size) {
14938 extended->shadow_depth = 0;
14939 extended->share_mode = SM_LARGE_PAGE;
14940 extended->ref_count = 1;
14941 extended->external_pager = 0;
14942
14943 /* TODO4K: Superpage in 4k mode? */
14944 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
14945 extended->shadow_depth = 0;
14946 return;
14947 }
14948
14949 effective_page_shift = vm_self_region_page_shift(map);
14950 effective_page_size = (1 << effective_page_shift);
14951 effective_page_mask = effective_page_size - 1;
14952
14953 offset = vm_map_trunc_page(offset, effective_page_mask);
14954
14955 obj = VME_OBJECT(entry);
14956
14957 vm_object_lock(obj);
14958
14959 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14960 ref_count--;
14961 }
14962
14963 if (look_for_pages) {
14964 for (last_offset = offset + range;
14965 offset < last_offset;
14966 offset += effective_page_size, va += effective_page_size) {
14967 if (do_region_footprint) {
14968 int disp;
14969
14970 disp = 0;
14971 if (map->has_corpse_footprint) {
14972 /*
14973 * Query the page info data we saved
14974 * while forking the corpse.
14975 */
14976 vm_map_corpse_footprint_query_page_info(
14977 map,
14978 va,
14979 &disp);
14980 } else {
14981 /*
14982 * Query the pmap.
14983 */
14984 vm_map_footprint_query_page_info(
14985 map,
14986 entry,
14987 va,
14988 &disp);
14989 }
14990 if (disp & VM_PAGE_QUERY_PAGE_PRESENT) {
14991 extended->pages_resident++;
14992 }
14993 if (disp & VM_PAGE_QUERY_PAGE_REUSABLE) {
14994 extended->pages_reusable++;
14995 }
14996 if (disp & VM_PAGE_QUERY_PAGE_DIRTY) {
14997 extended->pages_dirtied++;
14998 }
14999 if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
15000 extended->pages_swapped_out++;
15001 }
15002 continue;
15003 }
15004
15005 vm_map_region_look_for_page(map, va, obj,
15006 vm_object_trunc_page(offset), ref_count,
15007 0, extended, count);
15008 }
15009
15010 if (do_region_footprint) {
15011 goto collect_object_info;
15012 }
15013 } else {
15014 collect_object_info:
15015 shadow_object = obj->shadow;
15016 shadow_depth = 0;
15017
15018 if (!(obj->internal)) {
15019 extended->external_pager = 1;
15020 }
15021
15022 if (shadow_object != VM_OBJECT_NULL) {
15023 vm_object_lock(shadow_object);
15024 for (;
15025 shadow_object != VM_OBJECT_NULL;
15026 shadow_depth++) {
15027 vm_object_t next_shadow;
15028
15029 if (!(shadow_object->internal)) {
15030 extended->external_pager = 1;
15031 }
15032
15033 next_shadow = shadow_object->shadow;
15034 if (next_shadow) {
15035 vm_object_lock(next_shadow);
15036 }
15037 vm_object_unlock(shadow_object);
15038 shadow_object = next_shadow;
15039 }
15040 }
15041 extended->shadow_depth = shadow_depth;
15042 }
15043
15044 if (extended->shadow_depth || entry->needs_copy) {
15045 extended->share_mode = SM_COW;
15046 } else {
15047 if (ref_count == 1) {
15048 extended->share_mode = SM_PRIVATE;
15049 } else {
15050 if (obj->true_share) {
15051 extended->share_mode = SM_TRUESHARED;
15052 } else {
15053 extended->share_mode = SM_SHARED;
15054 }
15055 }
15056 }
15057 extended->ref_count = ref_count - extended->shadow_depth;
15058
15059 for (i = 0; i < extended->shadow_depth; i++) {
15060 if ((tmp_obj = obj->shadow) == 0) {
15061 break;
15062 }
15063 vm_object_lock(tmp_obj);
15064 vm_object_unlock(obj);
15065
15066 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) {
15067 ref_count--;
15068 }
15069
15070 extended->ref_count += ref_count;
15071 obj = tmp_obj;
15072 }
15073 vm_object_unlock(obj);
15074
15075 if (extended->share_mode == SM_SHARED) {
15076 vm_map_entry_t cur;
15077 vm_map_entry_t last;
15078 int my_refs;
15079
15080 obj = VME_OBJECT(entry);
15081 last = vm_map_to_entry(map);
15082 my_refs = 0;
15083
15084 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
15085 ref_count--;
15086 }
15087 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) {
15088 my_refs += vm_map_region_count_obj_refs(cur, obj);
15089 }
15090
15091 if (my_refs == ref_count) {
15092 extended->share_mode = SM_PRIVATE_ALIASED;
15093 } else if (my_refs > 1) {
15094 extended->share_mode = SM_SHARED_ALIASED;
15095 }
15096 }
15097 }
15098
15099
15100 /* object is locked on entry and locked on return */
15101
15102
15103 static void
vm_map_region_look_for_page(__unused vm_map_t map,__unused vm_map_offset_t va,vm_object_t object,vm_object_offset_t offset,int max_refcnt,unsigned short depth,vm_region_extended_info_t extended,mach_msg_type_number_t count)15104 vm_map_region_look_for_page(
15105 __unused vm_map_t map,
15106 __unused vm_map_offset_t va,
15107 vm_object_t object,
15108 vm_object_offset_t offset,
15109 int max_refcnt,
15110 unsigned short depth,
15111 vm_region_extended_info_t extended,
15112 mach_msg_type_number_t count)
15113 {
15114 vm_page_t p;
15115 vm_object_t shadow;
15116 int ref_count;
15117 vm_object_t caller_object;
15118
15119 shadow = object->shadow;
15120 caller_object = object;
15121
15122
15123 while (TRUE) {
15124 if (!(object->internal)) {
15125 extended->external_pager = 1;
15126 }
15127
15128 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
15129 if (shadow && (max_refcnt == 1)) {
15130 extended->pages_shared_now_private++;
15131 }
15132
15133 if (!p->vmp_fictitious &&
15134 (p->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p)))) {
15135 extended->pages_dirtied++;
15136 } else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
15137 if (p->vmp_reusable || object->all_reusable) {
15138 extended->pages_reusable++;
15139 }
15140 }
15141
15142 extended->pages_resident++;
15143
15144 if (object != caller_object) {
15145 vm_object_unlock(object);
15146 }
15147
15148 return;
15149 }
15150 if (object->internal &&
15151 object->alive &&
15152 !object->terminating &&
15153 object->pager_ready) {
15154 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
15155 == VM_EXTERNAL_STATE_EXISTS) {
15156 /* the pager has that page */
15157 extended->pages_swapped_out++;
15158 if (object != caller_object) {
15159 vm_object_unlock(object);
15160 }
15161 return;
15162 }
15163 }
15164
15165 if (shadow) {
15166 vm_object_lock(shadow);
15167
15168 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) {
15169 ref_count--;
15170 }
15171
15172 if (++depth > extended->shadow_depth) {
15173 extended->shadow_depth = depth;
15174 }
15175
15176 if (ref_count > max_refcnt) {
15177 max_refcnt = ref_count;
15178 }
15179
15180 if (object != caller_object) {
15181 vm_object_unlock(object);
15182 }
15183
15184 offset = offset + object->vo_shadow_offset;
15185 object = shadow;
15186 shadow = object->shadow;
15187 continue;
15188 }
15189 if (object != caller_object) {
15190 vm_object_unlock(object);
15191 }
15192 break;
15193 }
15194 }
15195
15196 static int
vm_map_region_count_obj_refs(vm_map_entry_t entry,vm_object_t object)15197 vm_map_region_count_obj_refs(
15198 vm_map_entry_t entry,
15199 vm_object_t object)
15200 {
15201 int ref_count;
15202 vm_object_t chk_obj;
15203 vm_object_t tmp_obj;
15204
15205 if (VME_OBJECT(entry) == 0) {
15206 return 0;
15207 }
15208
15209 if (entry->is_sub_map) {
15210 return 0;
15211 } else {
15212 ref_count = 0;
15213
15214 chk_obj = VME_OBJECT(entry);
15215 vm_object_lock(chk_obj);
15216
15217 while (chk_obj) {
15218 if (chk_obj == object) {
15219 ref_count++;
15220 }
15221 tmp_obj = chk_obj->shadow;
15222 if (tmp_obj) {
15223 vm_object_lock(tmp_obj);
15224 }
15225 vm_object_unlock(chk_obj);
15226
15227 chk_obj = tmp_obj;
15228 }
15229 }
15230 return ref_count;
15231 }
15232
15233
15234 /*
15235 * Routine: vm_map_simplify
15236 *
15237 * Description:
15238 * Attempt to simplify the map representation in
15239 * the vicinity of the given starting address.
15240 * Note:
15241 * This routine is intended primarily to keep the
15242 * kernel maps more compact -- they generally don't
15243 * benefit from the "expand a map entry" technology
15244 * at allocation time because the adjacent entry
15245 * is often wired down.
15246 */
15247 void
vm_map_simplify_entry(vm_map_t map,vm_map_entry_t this_entry)15248 vm_map_simplify_entry(
15249 vm_map_t map,
15250 vm_map_entry_t this_entry)
15251 {
15252 vm_map_entry_t prev_entry;
15253
15254 prev_entry = this_entry->vme_prev;
15255
15256 if ((this_entry != vm_map_to_entry(map)) &&
15257 (prev_entry != vm_map_to_entry(map)) &&
15258
15259 (prev_entry->vme_end == this_entry->vme_start) &&
15260
15261 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
15262 (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
15263 ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
15264 prev_entry->vme_start))
15265 == VME_OFFSET(this_entry)) &&
15266
15267 (prev_entry->behavior == this_entry->behavior) &&
15268 (prev_entry->needs_copy == this_entry->needs_copy) &&
15269 (prev_entry->protection == this_entry->protection) &&
15270 (prev_entry->max_protection == this_entry->max_protection) &&
15271 (prev_entry->inheritance == this_entry->inheritance) &&
15272 (prev_entry->use_pmap == this_entry->use_pmap) &&
15273 (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
15274 (prev_entry->no_cache == this_entry->no_cache) &&
15275 (prev_entry->permanent == this_entry->permanent) &&
15276 (prev_entry->map_aligned == this_entry->map_aligned) &&
15277 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
15278 (prev_entry->used_for_jit == this_entry->used_for_jit) &&
15279 (prev_entry->pmap_cs_associated == this_entry->pmap_cs_associated) &&
15280 (prev_entry->iokit_acct == this_entry->iokit_acct) &&
15281 (prev_entry->vme_resilient_codesign ==
15282 this_entry->vme_resilient_codesign) &&
15283 (prev_entry->vme_resilient_media ==
15284 this_entry->vme_resilient_media) &&
15285 (prev_entry->vme_no_copy_on_read == this_entry->vme_no_copy_on_read) &&
15286
15287 (prev_entry->wired_count == this_entry->wired_count) &&
15288 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
15289
15290 ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
15291 (prev_entry->in_transition == FALSE) &&
15292 (this_entry->in_transition == FALSE) &&
15293 (prev_entry->needs_wakeup == FALSE) &&
15294 (this_entry->needs_wakeup == FALSE) &&
15295 (prev_entry->is_shared == this_entry->is_shared) &&
15296 (prev_entry->superpage_size == FALSE) &&
15297 (this_entry->superpage_size == FALSE)
15298 ) {
15299 vm_map_store_entry_unlink(map, prev_entry);
15300 assert(prev_entry->vme_start < this_entry->vme_end);
15301 if (prev_entry->map_aligned) {
15302 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
15303 VM_MAP_PAGE_MASK(map)));
15304 }
15305 this_entry->vme_start = prev_entry->vme_start;
15306 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
15307
15308 if (map->holelistenabled) {
15309 vm_map_store_update_first_free(map, this_entry, TRUE);
15310 }
15311
15312 if (prev_entry->is_sub_map) {
15313 vm_map_deallocate(VME_SUBMAP(prev_entry));
15314 } else {
15315 vm_object_deallocate(VME_OBJECT(prev_entry));
15316 }
15317 vm_map_entry_dispose(map, prev_entry);
15318 SAVE_HINT_MAP_WRITE(map, this_entry);
15319 }
15320 }
15321
15322 void
vm_map_simplify(vm_map_t map,vm_map_offset_t start)15323 vm_map_simplify(
15324 vm_map_t map,
15325 vm_map_offset_t start)
15326 {
15327 vm_map_entry_t this_entry;
15328
15329 vm_map_lock(map);
15330 if (vm_map_lookup_entry(map, start, &this_entry)) {
15331 vm_map_simplify_entry(map, this_entry);
15332 vm_map_simplify_entry(map, this_entry->vme_next);
15333 }
15334 vm_map_unlock(map);
15335 }
15336
15337 static void
vm_map_simplify_range(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)15338 vm_map_simplify_range(
15339 vm_map_t map,
15340 vm_map_offset_t start,
15341 vm_map_offset_t end)
15342 {
15343 vm_map_entry_t entry;
15344
15345 /*
15346 * The map should be locked (for "write") by the caller.
15347 */
15348
15349 if (start >= end) {
15350 /* invalid address range */
15351 return;
15352 }
15353
15354 start = vm_map_trunc_page(start,
15355 VM_MAP_PAGE_MASK(map));
15356 end = vm_map_round_page(end,
15357 VM_MAP_PAGE_MASK(map));
15358
15359 if (!vm_map_lookup_entry(map, start, &entry)) {
15360 /* "start" is not mapped and "entry" ends before "start" */
15361 if (entry == vm_map_to_entry(map)) {
15362 /* start with first entry in the map */
15363 entry = vm_map_first_entry(map);
15364 } else {
15365 /* start with next entry */
15366 entry = entry->vme_next;
15367 }
15368 }
15369
15370 while (entry != vm_map_to_entry(map) &&
15371 entry->vme_start <= end) {
15372 /* try and coalesce "entry" with its previous entry */
15373 vm_map_simplify_entry(map, entry);
15374 entry = entry->vme_next;
15375 }
15376 }
15377
15378
15379 /*
15380 * Routine: vm_map_machine_attribute
15381 * Purpose:
15382 * Provide machine-specific attributes to mappings,
15383 * such as cachability etc. for machines that provide
15384 * them. NUMA architectures and machines with big/strange
15385 * caches will use this.
15386 * Note:
15387 * Responsibilities for locking and checking are handled here,
15388 * everything else in the pmap module. If any non-volatile
15389 * information must be kept, the pmap module should handle
15390 * it itself. [This assumes that attributes do not
15391 * need to be inherited, which seems ok to me]
15392 */
15393 kern_return_t
vm_map_machine_attribute(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_machine_attribute_t attribute,vm_machine_attribute_val_t * value)15394 vm_map_machine_attribute(
15395 vm_map_t map,
15396 vm_map_offset_t start,
15397 vm_map_offset_t end,
15398 vm_machine_attribute_t attribute,
15399 vm_machine_attribute_val_t* value) /* IN/OUT */
15400 {
15401 kern_return_t ret;
15402 vm_map_size_t sync_size;
15403 vm_map_entry_t entry;
15404
15405 if (start < vm_map_min(map) || end > vm_map_max(map)) {
15406 return KERN_INVALID_ADDRESS;
15407 }
15408
15409 /* Figure how much memory we need to flush (in page increments) */
15410 sync_size = end - start;
15411
15412 vm_map_lock(map);
15413
15414 if (attribute != MATTR_CACHE) {
15415 /* If we don't have to find physical addresses, we */
15416 /* don't have to do an explicit traversal here. */
15417 ret = pmap_attribute(map->pmap, start, end - start,
15418 attribute, value);
15419 vm_map_unlock(map);
15420 return ret;
15421 }
15422
15423 ret = KERN_SUCCESS; /* Assume it all worked */
15424
15425 while (sync_size) {
15426 if (vm_map_lookup_entry(map, start, &entry)) {
15427 vm_map_size_t sub_size;
15428 if ((entry->vme_end - start) > sync_size) {
15429 sub_size = sync_size;
15430 sync_size = 0;
15431 } else {
15432 sub_size = entry->vme_end - start;
15433 sync_size -= sub_size;
15434 }
15435 if (entry->is_sub_map) {
15436 vm_map_offset_t sub_start;
15437 vm_map_offset_t sub_end;
15438
15439 sub_start = (start - entry->vme_start)
15440 + VME_OFFSET(entry);
15441 sub_end = sub_start + sub_size;
15442 vm_map_machine_attribute(
15443 VME_SUBMAP(entry),
15444 sub_start,
15445 sub_end,
15446 attribute, value);
15447 } else {
15448 if (VME_OBJECT(entry)) {
15449 vm_page_t m;
15450 vm_object_t object;
15451 vm_object_t base_object;
15452 vm_object_t last_object;
15453 vm_object_offset_t offset;
15454 vm_object_offset_t base_offset;
15455 vm_map_size_t range;
15456 range = sub_size;
15457 offset = (start - entry->vme_start)
15458 + VME_OFFSET(entry);
15459 offset = vm_object_trunc_page(offset);
15460 base_offset = offset;
15461 object = VME_OBJECT(entry);
15462 base_object = object;
15463 last_object = NULL;
15464
15465 vm_object_lock(object);
15466
15467 while (range) {
15468 m = vm_page_lookup(
15469 object, offset);
15470
15471 if (m && !m->vmp_fictitious) {
15472 ret =
15473 pmap_attribute_cache_sync(
15474 VM_PAGE_GET_PHYS_PAGE(m),
15475 PAGE_SIZE,
15476 attribute, value);
15477 } else if (object->shadow) {
15478 offset = offset + object->vo_shadow_offset;
15479 last_object = object;
15480 object = object->shadow;
15481 vm_object_lock(last_object->shadow);
15482 vm_object_unlock(last_object);
15483 continue;
15484 }
15485 if (range < PAGE_SIZE) {
15486 range = 0;
15487 } else {
15488 range -= PAGE_SIZE;
15489 }
15490
15491 if (base_object != object) {
15492 vm_object_unlock(object);
15493 vm_object_lock(base_object);
15494 object = base_object;
15495 }
15496 /* Bump to the next page */
15497 base_offset += PAGE_SIZE;
15498 offset = base_offset;
15499 }
15500 vm_object_unlock(object);
15501 }
15502 }
15503 start += sub_size;
15504 } else {
15505 vm_map_unlock(map);
15506 return KERN_FAILURE;
15507 }
15508 }
15509
15510 vm_map_unlock(map);
15511
15512 return ret;
15513 }
15514
15515 /*
15516 * vm_map_behavior_set:
15517 *
15518 * Sets the paging reference behavior of the specified address
15519 * range in the target map. Paging reference behavior affects
15520 * how pagein operations resulting from faults on the map will be
15521 * clustered.
15522 */
15523 kern_return_t
vm_map_behavior_set(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_behavior_t new_behavior)15524 vm_map_behavior_set(
15525 vm_map_t map,
15526 vm_map_offset_t start,
15527 vm_map_offset_t end,
15528 vm_behavior_t new_behavior)
15529 {
15530 vm_map_entry_t entry;
15531 vm_map_entry_t temp_entry;
15532
15533 if (start > end ||
15534 start < vm_map_min(map) ||
15535 end > vm_map_max(map)) {
15536 return KERN_NO_SPACE;
15537 }
15538
15539 switch (new_behavior) {
15540 /*
15541 * This first block of behaviors all set a persistent state on the specified
15542 * memory range. All we have to do here is to record the desired behavior
15543 * in the vm_map_entry_t's.
15544 */
15545
15546 case VM_BEHAVIOR_DEFAULT:
15547 case VM_BEHAVIOR_RANDOM:
15548 case VM_BEHAVIOR_SEQUENTIAL:
15549 case VM_BEHAVIOR_RSEQNTL:
15550 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
15551 vm_map_lock(map);
15552
15553 /*
15554 * The entire address range must be valid for the map.
15555 * Note that vm_map_range_check() does a
15556 * vm_map_lookup_entry() internally and returns the
15557 * entry containing the start of the address range if
15558 * the entire range is valid.
15559 */
15560 if (vm_map_range_check(map, start, end, &temp_entry)) {
15561 entry = temp_entry;
15562 vm_map_clip_start(map, entry, start);
15563 } else {
15564 vm_map_unlock(map);
15565 return KERN_INVALID_ADDRESS;
15566 }
15567
15568 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
15569 vm_map_clip_end(map, entry, end);
15570 if (entry->is_sub_map) {
15571 assert(!entry->use_pmap);
15572 }
15573
15574 if (new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES) {
15575 entry->zero_wired_pages = TRUE;
15576 } else {
15577 entry->behavior = new_behavior;
15578 }
15579 entry = entry->vme_next;
15580 }
15581
15582 vm_map_unlock(map);
15583 break;
15584
15585 /*
15586 * The rest of these are different from the above in that they cause
15587 * an immediate action to take place as opposed to setting a behavior that
15588 * affects future actions.
15589 */
15590
15591 case VM_BEHAVIOR_WILLNEED:
15592 return vm_map_willneed(map, start, end);
15593
15594 case VM_BEHAVIOR_DONTNEED:
15595 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
15596
15597 case VM_BEHAVIOR_FREE:
15598 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
15599
15600 case VM_BEHAVIOR_REUSABLE:
15601 return vm_map_reusable_pages(map, start, end);
15602
15603 case VM_BEHAVIOR_REUSE:
15604 return vm_map_reuse_pages(map, start, end);
15605
15606 case VM_BEHAVIOR_CAN_REUSE:
15607 return vm_map_can_reuse(map, start, end);
15608
15609 #if MACH_ASSERT
15610 case VM_BEHAVIOR_PAGEOUT:
15611 return vm_map_pageout(map, start, end);
15612 #endif /* MACH_ASSERT */
15613
15614 default:
15615 return KERN_INVALID_ARGUMENT;
15616 }
15617
15618 return KERN_SUCCESS;
15619 }
15620
15621
15622 /*
15623 * Internals for madvise(MADV_WILLNEED) system call.
15624 *
15625 * The implementation is to do:-
15626 * a) read-ahead if the mapping corresponds to a mapped regular file
15627 * b) or, fault in the pages (zero-fill, decompress etc) if it's an anonymous mapping
15628 */
15629
15630
15631 static kern_return_t
vm_map_willneed(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)15632 vm_map_willneed(
15633 vm_map_t map,
15634 vm_map_offset_t start,
15635 vm_map_offset_t end
15636 )
15637 {
15638 vm_map_entry_t entry;
15639 vm_object_t object;
15640 memory_object_t pager;
15641 struct vm_object_fault_info fault_info = {};
15642 kern_return_t kr;
15643 vm_object_size_t len;
15644 vm_object_offset_t offset;
15645
15646 fault_info.interruptible = THREAD_UNINT; /* ignored value */
15647 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
15648 fault_info.stealth = TRUE;
15649
15650 /*
15651 * The MADV_WILLNEED operation doesn't require any changes to the
15652 * vm_map_entry_t's, so the read lock is sufficient.
15653 */
15654
15655 vm_map_lock_read(map);
15656
15657 /*
15658 * The madvise semantics require that the address range be fully
15659 * allocated with no holes. Otherwise, we're required to return
15660 * an error.
15661 */
15662
15663 if (!vm_map_range_check(map, start, end, &entry)) {
15664 vm_map_unlock_read(map);
15665 return KERN_INVALID_ADDRESS;
15666 }
15667
15668 /*
15669 * Examine each vm_map_entry_t in the range.
15670 */
15671 for (; entry != vm_map_to_entry(map) && start < end;) {
15672 /*
15673 * The first time through, the start address could be anywhere
15674 * within the vm_map_entry we found. So adjust the offset to
15675 * correspond. After that, the offset will always be zero to
15676 * correspond to the beginning of the current vm_map_entry.
15677 */
15678 offset = (start - entry->vme_start) + VME_OFFSET(entry);
15679
15680 /*
15681 * Set the length so we don't go beyond the end of the
15682 * map_entry or beyond the end of the range we were given.
15683 * This range could span also multiple map entries all of which
15684 * map different files, so make sure we only do the right amount
15685 * of I/O for each object. Note that it's possible for there
15686 * to be multiple map entries all referring to the same object
15687 * but with different page permissions, but it's not worth
15688 * trying to optimize that case.
15689 */
15690 len = MIN(entry->vme_end - start, end - start);
15691
15692 if ((vm_size_t) len != len) {
15693 /* 32-bit overflow */
15694 len = (vm_size_t) (0 - PAGE_SIZE);
15695 }
15696 fault_info.cluster_size = (vm_size_t) len;
15697 fault_info.lo_offset = offset;
15698 fault_info.hi_offset = offset + len;
15699 fault_info.user_tag = VME_ALIAS(entry);
15700 fault_info.pmap_options = 0;
15701 if (entry->iokit_acct ||
15702 (!entry->is_sub_map && !entry->use_pmap)) {
15703 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
15704 }
15705
15706 /*
15707 * If the entry is a submap OR there's no read permission
15708 * to this mapping, then just skip it.
15709 */
15710 if ((entry->is_sub_map) || (entry->protection & VM_PROT_READ) == 0) {
15711 entry = entry->vme_next;
15712 start = entry->vme_start;
15713 continue;
15714 }
15715
15716 object = VME_OBJECT(entry);
15717
15718 if (object == NULL ||
15719 (object && object->internal)) {
15720 /*
15721 * Memory range backed by anonymous memory.
15722 */
15723 vm_size_t region_size = 0, effective_page_size = 0;
15724 vm_map_offset_t addr = 0, effective_page_mask = 0;
15725
15726 region_size = len;
15727 addr = start;
15728
15729 effective_page_mask = MIN(vm_map_page_mask(current_map()), PAGE_MASK);
15730 effective_page_size = effective_page_mask + 1;
15731
15732 vm_map_unlock_read(map);
15733
15734 while (region_size) {
15735 vm_pre_fault(
15736 vm_map_trunc_page(addr, effective_page_mask),
15737 VM_PROT_READ | VM_PROT_WRITE);
15738
15739 region_size -= effective_page_size;
15740 addr += effective_page_size;
15741 }
15742 } else {
15743 /*
15744 * Find the file object backing this map entry. If there is
15745 * none, then we simply ignore the "will need" advice for this
15746 * entry and go on to the next one.
15747 */
15748 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
15749 entry = entry->vme_next;
15750 start = entry->vme_start;
15751 continue;
15752 }
15753
15754 vm_object_paging_begin(object);
15755 pager = object->pager;
15756 vm_object_unlock(object);
15757
15758 /*
15759 * The data_request() could take a long time, so let's
15760 * release the map lock to avoid blocking other threads.
15761 */
15762 vm_map_unlock_read(map);
15763
15764 /*
15765 * Get the data from the object asynchronously.
15766 *
15767 * Note that memory_object_data_request() places limits on the
15768 * amount of I/O it will do. Regardless of the len we
15769 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
15770 * silently truncates the len to that size. This isn't
15771 * necessarily bad since madvise shouldn't really be used to
15772 * page in unlimited amounts of data. Other Unix variants
15773 * limit the willneed case as well. If this turns out to be an
15774 * issue for developers, then we can always adjust the policy
15775 * here and still be backwards compatible since this is all
15776 * just "advice".
15777 */
15778 kr = memory_object_data_request(
15779 pager,
15780 vm_object_trunc_page(offset) + object->paging_offset,
15781 0, /* ignored */
15782 VM_PROT_READ,
15783 (memory_object_fault_info_t)&fault_info);
15784
15785 vm_object_lock(object);
15786 vm_object_paging_end(object);
15787 vm_object_unlock(object);
15788
15789 /*
15790 * If we couldn't do the I/O for some reason, just give up on
15791 * the madvise. We still return success to the user since
15792 * madvise isn't supposed to fail when the advice can't be
15793 * taken.
15794 */
15795
15796 if (kr != KERN_SUCCESS) {
15797 return KERN_SUCCESS;
15798 }
15799 }
15800
15801 start += len;
15802 if (start >= end) {
15803 /* done */
15804 return KERN_SUCCESS;
15805 }
15806
15807 /* look up next entry */
15808 vm_map_lock_read(map);
15809 if (!vm_map_lookup_entry(map, start, &entry)) {
15810 /*
15811 * There's a new hole in the address range.
15812 */
15813 vm_map_unlock_read(map);
15814 return KERN_INVALID_ADDRESS;
15815 }
15816 }
15817
15818 vm_map_unlock_read(map);
15819 return KERN_SUCCESS;
15820 }
15821
15822 static boolean_t
vm_map_entry_is_reusable(vm_map_entry_t entry)15823 vm_map_entry_is_reusable(
15824 vm_map_entry_t entry)
15825 {
15826 /* Only user map entries */
15827
15828 vm_object_t object;
15829
15830 if (entry->is_sub_map) {
15831 return FALSE;
15832 }
15833
15834 switch (VME_ALIAS(entry)) {
15835 case VM_MEMORY_MALLOC:
15836 case VM_MEMORY_MALLOC_SMALL:
15837 case VM_MEMORY_MALLOC_LARGE:
15838 case VM_MEMORY_REALLOC:
15839 case VM_MEMORY_MALLOC_TINY:
15840 case VM_MEMORY_MALLOC_LARGE_REUSABLE:
15841 case VM_MEMORY_MALLOC_LARGE_REUSED:
15842 /*
15843 * This is a malloc() memory region: check if it's still
15844 * in its original state and can be re-used for more
15845 * malloc() allocations.
15846 */
15847 break;
15848 default:
15849 /*
15850 * Not a malloc() memory region: let the caller decide if
15851 * it's re-usable.
15852 */
15853 return TRUE;
15854 }
15855
15856 if (/*entry->is_shared ||*/
15857 entry->is_sub_map ||
15858 entry->in_transition ||
15859 entry->protection != VM_PROT_DEFAULT ||
15860 entry->max_protection != VM_PROT_ALL ||
15861 entry->inheritance != VM_INHERIT_DEFAULT ||
15862 entry->no_cache ||
15863 entry->permanent ||
15864 entry->superpage_size != FALSE ||
15865 entry->zero_wired_pages ||
15866 entry->wired_count != 0 ||
15867 entry->user_wired_count != 0) {
15868 return FALSE;
15869 }
15870
15871 object = VME_OBJECT(entry);
15872 if (object == VM_OBJECT_NULL) {
15873 return TRUE;
15874 }
15875 if (
15876 #if 0
15877 /*
15878 * Let's proceed even if the VM object is potentially
15879 * shared.
15880 * We check for this later when processing the actual
15881 * VM pages, so the contents will be safe if shared.
15882 *
15883 * But we can still mark this memory region as "reusable" to
15884 * acknowledge that the caller did let us know that the memory
15885 * could be re-used and should not be penalized for holding
15886 * on to it. This allows its "resident size" to not include
15887 * the reusable range.
15888 */
15889 object->ref_count == 1 &&
15890 #endif
15891 object->wired_page_count == 0 &&
15892 object->copy == VM_OBJECT_NULL &&
15893 object->shadow == VM_OBJECT_NULL &&
15894 object->internal &&
15895 object->purgable == VM_PURGABLE_DENY &&
15896 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
15897 !object->code_signed) {
15898 return TRUE;
15899 }
15900 return FALSE;
15901 }
15902
15903 static kern_return_t
vm_map_reuse_pages(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)15904 vm_map_reuse_pages(
15905 vm_map_t map,
15906 vm_map_offset_t start,
15907 vm_map_offset_t end)
15908 {
15909 vm_map_entry_t entry;
15910 vm_object_t object;
15911 vm_object_offset_t start_offset, end_offset;
15912
15913 /*
15914 * The MADV_REUSE operation doesn't require any changes to the
15915 * vm_map_entry_t's, so the read lock is sufficient.
15916 */
15917
15918 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
15919 /*
15920 * XXX TODO4K
15921 * need to figure out what reusable means for a
15922 * portion of a native page.
15923 */
15924 return KERN_SUCCESS;
15925 }
15926
15927 vm_map_lock_read(map);
15928 assert(map->pmap != kernel_pmap); /* protect alias access */
15929
15930 /*
15931 * The madvise semantics require that the address range be fully
15932 * allocated with no holes. Otherwise, we're required to return
15933 * an error.
15934 */
15935
15936 if (!vm_map_range_check(map, start, end, &entry)) {
15937 vm_map_unlock_read(map);
15938 vm_page_stats_reusable.reuse_pages_failure++;
15939 return KERN_INVALID_ADDRESS;
15940 }
15941
15942 /*
15943 * Examine each vm_map_entry_t in the range.
15944 */
15945 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15946 entry = entry->vme_next) {
15947 /*
15948 * Sanity check on the VM map entry.
15949 */
15950 if (!vm_map_entry_is_reusable(entry)) {
15951 vm_map_unlock_read(map);
15952 vm_page_stats_reusable.reuse_pages_failure++;
15953 return KERN_INVALID_ADDRESS;
15954 }
15955
15956 /*
15957 * The first time through, the start address could be anywhere
15958 * within the vm_map_entry we found. So adjust the offset to
15959 * correspond.
15960 */
15961 if (entry->vme_start < start) {
15962 start_offset = start - entry->vme_start;
15963 } else {
15964 start_offset = 0;
15965 }
15966 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
15967 start_offset += VME_OFFSET(entry);
15968 end_offset += VME_OFFSET(entry);
15969
15970 assert(!entry->is_sub_map);
15971 object = VME_OBJECT(entry);
15972 if (object != VM_OBJECT_NULL) {
15973 vm_object_lock(object);
15974 vm_object_reuse_pages(object, start_offset, end_offset,
15975 TRUE);
15976 vm_object_unlock(object);
15977 }
15978
15979 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
15980 /*
15981 * XXX
15982 * We do not hold the VM map exclusively here.
15983 * The "alias" field is not that critical, so it's
15984 * safe to update it here, as long as it is the only
15985 * one that can be modified while holding the VM map
15986 * "shared".
15987 */
15988 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
15989 }
15990 }
15991
15992 vm_map_unlock_read(map);
15993 vm_page_stats_reusable.reuse_pages_success++;
15994 return KERN_SUCCESS;
15995 }
15996
15997
15998 static kern_return_t
vm_map_reusable_pages(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)15999 vm_map_reusable_pages(
16000 vm_map_t map,
16001 vm_map_offset_t start,
16002 vm_map_offset_t end)
16003 {
16004 vm_map_entry_t entry;
16005 vm_object_t object;
16006 vm_object_offset_t start_offset, end_offset;
16007 vm_map_offset_t pmap_offset;
16008
16009 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
16010 /*
16011 * XXX TODO4K
16012 * need to figure out what reusable means for a portion
16013 * of a native page.
16014 */
16015 return KERN_SUCCESS;
16016 }
16017
16018 /*
16019 * The MADV_REUSABLE operation doesn't require any changes to the
16020 * vm_map_entry_t's, so the read lock is sufficient.
16021 */
16022
16023 vm_map_lock_read(map);
16024 assert(map->pmap != kernel_pmap); /* protect alias access */
16025
16026 /*
16027 * The madvise semantics require that the address range be fully
16028 * allocated with no holes. Otherwise, we're required to return
16029 * an error.
16030 */
16031
16032 if (!vm_map_range_check(map, start, end, &entry)) {
16033 vm_map_unlock_read(map);
16034 vm_page_stats_reusable.reusable_pages_failure++;
16035 return KERN_INVALID_ADDRESS;
16036 }
16037
16038 /*
16039 * Examine each vm_map_entry_t in the range.
16040 */
16041 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16042 entry = entry->vme_next) {
16043 int kill_pages = 0;
16044
16045 /*
16046 * Sanity check on the VM map entry.
16047 */
16048 if (!vm_map_entry_is_reusable(entry)) {
16049 vm_map_unlock_read(map);
16050 vm_page_stats_reusable.reusable_pages_failure++;
16051 return KERN_INVALID_ADDRESS;
16052 }
16053
16054 if (!(entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
16055 /* not writable: can't discard contents */
16056 vm_map_unlock_read(map);
16057 vm_page_stats_reusable.reusable_nonwritable++;
16058 vm_page_stats_reusable.reusable_pages_failure++;
16059 return KERN_PROTECTION_FAILURE;
16060 }
16061
16062 /*
16063 * The first time through, the start address could be anywhere
16064 * within the vm_map_entry we found. So adjust the offset to
16065 * correspond.
16066 */
16067 if (entry->vme_start < start) {
16068 start_offset = start - entry->vme_start;
16069 pmap_offset = start;
16070 } else {
16071 start_offset = 0;
16072 pmap_offset = entry->vme_start;
16073 }
16074 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
16075 start_offset += VME_OFFSET(entry);
16076 end_offset += VME_OFFSET(entry);
16077
16078 assert(!entry->is_sub_map);
16079 object = VME_OBJECT(entry);
16080 if (object == VM_OBJECT_NULL) {
16081 continue;
16082 }
16083
16084
16085 vm_object_lock(object);
16086 if (((object->ref_count == 1) ||
16087 (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
16088 object->copy == VM_OBJECT_NULL)) &&
16089 object->shadow == VM_OBJECT_NULL &&
16090 /*
16091 * "iokit_acct" entries are billed for their virtual size
16092 * (rather than for their resident pages only), so they
16093 * wouldn't benefit from making pages reusable, and it
16094 * would be hard to keep track of pages that are both
16095 * "iokit_acct" and "reusable" in the pmap stats and
16096 * ledgers.
16097 */
16098 !(entry->iokit_acct ||
16099 (!entry->is_sub_map && !entry->use_pmap))) {
16100 if (object->ref_count != 1) {
16101 vm_page_stats_reusable.reusable_shared++;
16102 }
16103 kill_pages = 1;
16104 } else {
16105 kill_pages = -1;
16106 }
16107 if (kill_pages != -1) {
16108 vm_object_deactivate_pages(object,
16109 start_offset,
16110 end_offset - start_offset,
16111 kill_pages,
16112 TRUE /*reusable_pages*/,
16113 map->pmap,
16114 pmap_offset);
16115 } else {
16116 vm_page_stats_reusable.reusable_pages_shared++;
16117 DTRACE_VM4(vm_map_reusable_pages_shared,
16118 unsigned int, VME_ALIAS(entry),
16119 vm_map_t, map,
16120 vm_map_entry_t, entry,
16121 vm_object_t, object);
16122 }
16123 vm_object_unlock(object);
16124
16125 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
16126 VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
16127 /*
16128 * XXX
16129 * We do not hold the VM map exclusively here.
16130 * The "alias" field is not that critical, so it's
16131 * safe to update it here, as long as it is the only
16132 * one that can be modified while holding the VM map
16133 * "shared".
16134 */
16135 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
16136 }
16137 }
16138
16139 vm_map_unlock_read(map);
16140 vm_page_stats_reusable.reusable_pages_success++;
16141 return KERN_SUCCESS;
16142 }
16143
16144
16145 static kern_return_t
vm_map_can_reuse(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)16146 vm_map_can_reuse(
16147 vm_map_t map,
16148 vm_map_offset_t start,
16149 vm_map_offset_t end)
16150 {
16151 vm_map_entry_t entry;
16152
16153 /*
16154 * The MADV_REUSABLE operation doesn't require any changes to the
16155 * vm_map_entry_t's, so the read lock is sufficient.
16156 */
16157
16158 vm_map_lock_read(map);
16159 assert(map->pmap != kernel_pmap); /* protect alias access */
16160
16161 /*
16162 * The madvise semantics require that the address range be fully
16163 * allocated with no holes. Otherwise, we're required to return
16164 * an error.
16165 */
16166
16167 if (!vm_map_range_check(map, start, end, &entry)) {
16168 vm_map_unlock_read(map);
16169 vm_page_stats_reusable.can_reuse_failure++;
16170 return KERN_INVALID_ADDRESS;
16171 }
16172
16173 /*
16174 * Examine each vm_map_entry_t in the range.
16175 */
16176 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16177 entry = entry->vme_next) {
16178 /*
16179 * Sanity check on the VM map entry.
16180 */
16181 if (!vm_map_entry_is_reusable(entry)) {
16182 vm_map_unlock_read(map);
16183 vm_page_stats_reusable.can_reuse_failure++;
16184 return KERN_INVALID_ADDRESS;
16185 }
16186 }
16187
16188 vm_map_unlock_read(map);
16189 vm_page_stats_reusable.can_reuse_success++;
16190 return KERN_SUCCESS;
16191 }
16192
16193
16194 #if MACH_ASSERT
16195 static kern_return_t
vm_map_pageout(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)16196 vm_map_pageout(
16197 vm_map_t map,
16198 vm_map_offset_t start,
16199 vm_map_offset_t end)
16200 {
16201 vm_map_entry_t entry;
16202
16203 /*
16204 * The MADV_PAGEOUT operation doesn't require any changes to the
16205 * vm_map_entry_t's, so the read lock is sufficient.
16206 */
16207
16208 vm_map_lock_read(map);
16209
16210 /*
16211 * The madvise semantics require that the address range be fully
16212 * allocated with no holes. Otherwise, we're required to return
16213 * an error.
16214 */
16215
16216 if (!vm_map_range_check(map, start, end, &entry)) {
16217 vm_map_unlock_read(map);
16218 return KERN_INVALID_ADDRESS;
16219 }
16220
16221 /*
16222 * Examine each vm_map_entry_t in the range.
16223 */
16224 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16225 entry = entry->vme_next) {
16226 vm_object_t object;
16227
16228 /*
16229 * Sanity check on the VM map entry.
16230 */
16231 if (entry->is_sub_map) {
16232 vm_map_t submap;
16233 vm_map_offset_t submap_start;
16234 vm_map_offset_t submap_end;
16235 vm_map_entry_t submap_entry;
16236
16237 submap = VME_SUBMAP(entry);
16238 submap_start = VME_OFFSET(entry);
16239 submap_end = submap_start + (entry->vme_end -
16240 entry->vme_start);
16241
16242 vm_map_lock_read(submap);
16243
16244 if (!vm_map_range_check(submap,
16245 submap_start,
16246 submap_end,
16247 &submap_entry)) {
16248 vm_map_unlock_read(submap);
16249 vm_map_unlock_read(map);
16250 return KERN_INVALID_ADDRESS;
16251 }
16252
16253 object = VME_OBJECT(submap_entry);
16254 if (submap_entry->is_sub_map ||
16255 object == VM_OBJECT_NULL ||
16256 !object->internal) {
16257 vm_map_unlock_read(submap);
16258 continue;
16259 }
16260
16261 vm_object_pageout(object);
16262
16263 vm_map_unlock_read(submap);
16264 submap = VM_MAP_NULL;
16265 submap_entry = VM_MAP_ENTRY_NULL;
16266 continue;
16267 }
16268
16269 object = VME_OBJECT(entry);
16270 if (entry->is_sub_map ||
16271 object == VM_OBJECT_NULL ||
16272 !object->internal) {
16273 continue;
16274 }
16275
16276 vm_object_pageout(object);
16277 }
16278
16279 vm_map_unlock_read(map);
16280 return KERN_SUCCESS;
16281 }
16282 #endif /* MACH_ASSERT */
16283
16284
16285 /*
16286 * Routine: vm_map_entry_insert
16287 *
16288 * Description: This routine inserts a new vm_entry in a locked map.
16289 */
16290 vm_map_entry_t
vm_map_entry_insert(vm_map_t map,vm_map_entry_t insp_entry,vm_map_offset_t start,vm_map_offset_t end,vm_object_t object,vm_object_offset_t offset,vm_map_kernel_flags_t vmk_flags,boolean_t needs_copy,boolean_t is_shared,boolean_t in_transition,vm_prot_t cur_protection,vm_prot_t max_protection,vm_behavior_t behavior,vm_inherit_t inheritance,unsigned short wired_count,boolean_t no_cache,boolean_t permanent,boolean_t no_copy_on_read,unsigned int superpage_size,boolean_t clear_map_aligned,boolean_t is_submap,boolean_t used_for_jit,int alias,boolean_t translated_allow_execute)16291 vm_map_entry_insert(
16292 vm_map_t map,
16293 vm_map_entry_t insp_entry,
16294 vm_map_offset_t start,
16295 vm_map_offset_t end,
16296 vm_object_t object,
16297 vm_object_offset_t offset,
16298 vm_map_kernel_flags_t vmk_flags,
16299 boolean_t needs_copy,
16300 boolean_t is_shared,
16301 boolean_t in_transition,
16302 vm_prot_t cur_protection,
16303 vm_prot_t max_protection,
16304 vm_behavior_t behavior,
16305 vm_inherit_t inheritance,
16306 unsigned short wired_count,
16307 boolean_t no_cache,
16308 boolean_t permanent,
16309 boolean_t no_copy_on_read,
16310 unsigned int superpage_size,
16311 boolean_t clear_map_aligned,
16312 boolean_t is_submap,
16313 boolean_t used_for_jit,
16314 int alias,
16315 boolean_t translated_allow_execute)
16316 {
16317 vm_map_entry_t new_entry;
16318
16319 assert(insp_entry != (vm_map_entry_t)0);
16320 vm_map_lock_assert_exclusive(map);
16321
16322 #if DEVELOPMENT || DEBUG
16323 vm_object_offset_t end_offset = 0;
16324 assertf(!os_add_overflow(end - start, offset, &end_offset), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end - start), offset);
16325 #endif /* DEVELOPMENT || DEBUG */
16326
16327 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
16328
16329 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
16330 new_entry->map_aligned = TRUE;
16331 } else {
16332 new_entry->map_aligned = FALSE;
16333 }
16334 if (clear_map_aligned &&
16335 (!VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
16336 !VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
16337 new_entry->map_aligned = FALSE;
16338 }
16339
16340 new_entry->vme_start = start;
16341 new_entry->vme_end = end;
16342 if (new_entry->map_aligned) {
16343 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
16344 VM_MAP_PAGE_MASK(map)));
16345 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
16346 VM_MAP_PAGE_MASK(map)));
16347 } else {
16348 assert(page_aligned(new_entry->vme_start));
16349 assert(page_aligned(new_entry->vme_end));
16350 }
16351 assert(new_entry->vme_start < new_entry->vme_end);
16352
16353 VME_OBJECT_SET(new_entry, object);
16354 VME_OFFSET_SET(new_entry, offset);
16355 new_entry->is_shared = is_shared;
16356 new_entry->is_sub_map = is_submap;
16357 new_entry->needs_copy = needs_copy;
16358 new_entry->in_transition = in_transition;
16359 new_entry->needs_wakeup = FALSE;
16360 new_entry->inheritance = inheritance;
16361 new_entry->protection = cur_protection;
16362 new_entry->max_protection = max_protection;
16363 new_entry->behavior = behavior;
16364 new_entry->wired_count = wired_count;
16365 new_entry->user_wired_count = 0;
16366 if (is_submap) {
16367 /*
16368 * submap: "use_pmap" means "nested".
16369 * default: false.
16370 */
16371 new_entry->use_pmap = FALSE;
16372 } else {
16373 /*
16374 * object: "use_pmap" means "use pmap accounting" for footprint.
16375 * default: true.
16376 */
16377 new_entry->use_pmap = TRUE;
16378 }
16379 VME_ALIAS_SET(new_entry, alias);
16380 new_entry->zero_wired_pages = FALSE;
16381 new_entry->no_cache = no_cache;
16382 new_entry->permanent = permanent;
16383 if (superpage_size) {
16384 new_entry->superpage_size = TRUE;
16385 } else {
16386 new_entry->superpage_size = FALSE;
16387 }
16388 if (used_for_jit) {
16389 if (!(map->jit_entry_exists) ||
16390 VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map)) {
16391 new_entry->used_for_jit = TRUE;
16392 map->jit_entry_exists = TRUE;
16393 }
16394 } else {
16395 new_entry->used_for_jit = FALSE;
16396 }
16397 if (translated_allow_execute) {
16398 new_entry->translated_allow_execute = TRUE;
16399 } else {
16400 new_entry->translated_allow_execute = FALSE;
16401 }
16402 new_entry->pmap_cs_associated = FALSE;
16403 new_entry->iokit_acct = FALSE;
16404 new_entry->vme_resilient_codesign = FALSE;
16405 new_entry->vme_resilient_media = FALSE;
16406 new_entry->vme_atomic = FALSE;
16407 new_entry->vme_no_copy_on_read = no_copy_on_read;
16408
16409 /*
16410 * Insert the new entry into the list.
16411 */
16412
16413 vm_map_store_entry_link(map, insp_entry, new_entry, vmk_flags);
16414 map->size += end - start;
16415
16416 /*
16417 * Update the free space hint and the lookup hint.
16418 */
16419
16420 SAVE_HINT_MAP_WRITE(map, new_entry);
16421 return new_entry;
16422 }
16423
16424 /*
16425 * Routine: vm_map_remap_extract
16426 *
16427 * Description: This routine returns a vm_entry list from a map.
16428 */
16429 static kern_return_t
vm_map_remap_extract(vm_map_t map,vm_map_offset_t addr,vm_map_size_t size,boolean_t copy,struct vm_map_header * map_header,vm_prot_t * cur_protection,vm_prot_t * max_protection,vm_inherit_t inheritance,vm_map_kernel_flags_t vmk_flags)16430 vm_map_remap_extract(
16431 vm_map_t map,
16432 vm_map_offset_t addr,
16433 vm_map_size_t size,
16434 boolean_t copy,
16435 struct vm_map_header *map_header,
16436 vm_prot_t *cur_protection, /* IN/OUT */
16437 vm_prot_t *max_protection, /* IN/OUT */
16438 /* What, no behavior? */
16439 vm_inherit_t inheritance,
16440 vm_map_kernel_flags_t vmk_flags)
16441 {
16442 kern_return_t result;
16443 vm_map_size_t mapped_size;
16444 vm_map_size_t tmp_size;
16445 vm_map_entry_t src_entry; /* result of last map lookup */
16446 vm_map_entry_t new_entry;
16447 vm_object_offset_t offset;
16448 vm_map_offset_t map_address;
16449 vm_map_offset_t src_start; /* start of entry to map */
16450 vm_map_offset_t src_end; /* end of region to be mapped */
16451 vm_object_t object;
16452 vm_map_version_t version;
16453 boolean_t src_needs_copy;
16454 boolean_t new_entry_needs_copy;
16455 vm_map_entry_t saved_src_entry;
16456 boolean_t src_entry_was_wired;
16457 vm_prot_t max_prot_for_prot_copy;
16458 vm_map_offset_t effective_page_mask;
16459 boolean_t pageable, same_map;
16460 boolean_t vm_remap_legacy;
16461 vm_prot_t required_cur_prot, required_max_prot;
16462 vm_object_t new_copy_object; /* vm_object_copy_* result */
16463 boolean_t saved_used_for_jit; /* Saved used_for_jit. */
16464
16465 pageable = vmk_flags.vmkf_copy_pageable;
16466 same_map = vmk_flags.vmkf_copy_same_map;
16467
16468 effective_page_mask = MIN(PAGE_MASK, VM_MAP_PAGE_MASK(map));
16469
16470 assert(map != VM_MAP_NULL);
16471 assert(size != 0);
16472 assert(size == vm_map_round_page(size, effective_page_mask));
16473 assert(inheritance == VM_INHERIT_NONE ||
16474 inheritance == VM_INHERIT_COPY ||
16475 inheritance == VM_INHERIT_SHARE);
16476 assert(!(*cur_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)));
16477 assert(!(*max_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)));
16478 assert((*cur_protection & *max_protection) == *cur_protection);
16479
16480 /*
16481 * Compute start and end of region.
16482 */
16483 src_start = vm_map_trunc_page(addr, effective_page_mask);
16484 src_end = vm_map_round_page(src_start + size, effective_page_mask);
16485
16486 /*
16487 * Initialize map_header.
16488 */
16489 map_header->links.next = CAST_TO_VM_MAP_ENTRY(&map_header->links);
16490 map_header->links.prev = CAST_TO_VM_MAP_ENTRY(&map_header->links);
16491 map_header->nentries = 0;
16492 map_header->entries_pageable = pageable;
16493 // map_header->page_shift = MIN(VM_MAP_PAGE_SHIFT(map), PAGE_SHIFT);
16494 map_header->page_shift = (uint16_t)VM_MAP_PAGE_SHIFT(map);
16495 map_header->rb_head_store.rbh_root = (void *)(int)SKIP_RB_TREE;
16496
16497 vm_map_store_init( map_header );
16498
16499 if (copy && vmk_flags.vmkf_remap_prot_copy) {
16500 /*
16501 * Special case for vm_map_protect(VM_PROT_COPY):
16502 * we want to set the new mappings' max protection to the
16503 * specified *max_protection...
16504 */
16505 max_prot_for_prot_copy = *max_protection & (VM_PROT_ALL | VM_PROT_ALLEXEC);
16506 /* ... but we want to use the vm_remap() legacy mode */
16507 *max_protection = VM_PROT_NONE;
16508 *cur_protection = VM_PROT_NONE;
16509 } else {
16510 max_prot_for_prot_copy = VM_PROT_NONE;
16511 }
16512
16513 if (*cur_protection == VM_PROT_NONE &&
16514 *max_protection == VM_PROT_NONE) {
16515 /*
16516 * vm_remap() legacy mode:
16517 * Extract all memory regions in the specified range and
16518 * collect the strictest set of protections allowed on the
16519 * entire range, so the caller knows what they can do with
16520 * the remapped range.
16521 * We start with VM_PROT_ALL and we'll remove the protections
16522 * missing from each memory region.
16523 */
16524 vm_remap_legacy = TRUE;
16525 *cur_protection = VM_PROT_ALL;
16526 *max_protection = VM_PROT_ALL;
16527 required_cur_prot = VM_PROT_NONE;
16528 required_max_prot = VM_PROT_NONE;
16529 } else {
16530 /*
16531 * vm_remap_new() mode:
16532 * Extract all memory regions in the specified range and
16533 * ensure that they have at least the protections specified
16534 * by the caller via *cur_protection and *max_protection.
16535 * The resulting mapping should have these protections.
16536 */
16537 vm_remap_legacy = FALSE;
16538 if (copy) {
16539 required_cur_prot = VM_PROT_NONE;
16540 required_max_prot = VM_PROT_READ;
16541 } else {
16542 required_cur_prot = *cur_protection;
16543 required_max_prot = *max_protection;
16544 }
16545 }
16546
16547 map_address = 0;
16548 mapped_size = 0;
16549 result = KERN_SUCCESS;
16550
16551 /*
16552 * The specified source virtual space might correspond to
16553 * multiple map entries, need to loop on them.
16554 */
16555 vm_map_lock(map);
16556 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
16557 /*
16558 * This address space uses sub-pages so the range might
16559 * not be re-mappable in an address space with larger
16560 * pages. Re-assemble any broken-up VM map entries to
16561 * improve our chances of making it work.
16562 */
16563 vm_map_simplify_range(map, src_start, src_end);
16564 }
16565 while (mapped_size != size) {
16566 vm_map_size_t entry_size;
16567
16568 /*
16569 * Find the beginning of the region.
16570 */
16571 if (!vm_map_lookup_entry(map, src_start, &src_entry)) {
16572 result = KERN_INVALID_ADDRESS;
16573 break;
16574 }
16575
16576 if (src_start < src_entry->vme_start ||
16577 (mapped_size && src_start != src_entry->vme_start)) {
16578 result = KERN_INVALID_ADDRESS;
16579 break;
16580 }
16581
16582 tmp_size = size - mapped_size;
16583 if (src_end > src_entry->vme_end) {
16584 tmp_size -= (src_end - src_entry->vme_end);
16585 }
16586
16587 entry_size = (vm_map_size_t)(src_entry->vme_end -
16588 src_entry->vme_start);
16589
16590 if (src_entry->is_sub_map &&
16591 vmk_flags.vmkf_copy_single_object) {
16592 vm_map_t submap;
16593 vm_map_offset_t submap_start;
16594 vm_map_size_t submap_size;
16595 boolean_t submap_needs_copy;
16596
16597 /*
16598 * No check for "required protection" on "src_entry"
16599 * because the protections that matter are the ones
16600 * on the submap's VM map entry, which will be checked
16601 * during the call to vm_map_remap_extract() below.
16602 */
16603 submap_size = src_entry->vme_end - src_start;
16604 if (submap_size > size) {
16605 submap_size = size;
16606 }
16607 submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
16608 submap = VME_SUBMAP(src_entry);
16609 if (copy) {
16610 /*
16611 * The caller wants a copy-on-write re-mapping,
16612 * so let's extract from the submap accordingly.
16613 */
16614 submap_needs_copy = TRUE;
16615 } else if (src_entry->needs_copy) {
16616 /*
16617 * The caller wants a shared re-mapping but the
16618 * submap is mapped with "needs_copy", so its
16619 * contents can't be shared as is. Extract the
16620 * contents of the submap as "copy-on-write".
16621 * The re-mapping won't be shared with the
16622 * original mapping but this is equivalent to
16623 * what happened with the original "remap from
16624 * submap" code.
16625 * The shared region is mapped "needs_copy", for
16626 * example.
16627 */
16628 submap_needs_copy = TRUE;
16629 } else {
16630 /*
16631 * The caller wants a shared re-mapping and
16632 * this mapping can be shared (no "needs_copy"),
16633 * so let's extract from the submap accordingly.
16634 * Kernel submaps are mapped without
16635 * "needs_copy", for example.
16636 */
16637 submap_needs_copy = FALSE;
16638 }
16639 vm_map_reference(submap);
16640 vm_map_unlock(map);
16641 src_entry = NULL;
16642 if (vm_remap_legacy) {
16643 *cur_protection = VM_PROT_NONE;
16644 *max_protection = VM_PROT_NONE;
16645 }
16646
16647 DTRACE_VM7(remap_submap_recurse,
16648 vm_map_t, map,
16649 vm_map_offset_t, addr,
16650 vm_map_size_t, size,
16651 boolean_t, copy,
16652 vm_map_offset_t, submap_start,
16653 vm_map_size_t, submap_size,
16654 boolean_t, submap_needs_copy);
16655
16656 result = vm_map_remap_extract(submap,
16657 submap_start,
16658 submap_size,
16659 submap_needs_copy,
16660 map_header,
16661 cur_protection,
16662 max_protection,
16663 inheritance,
16664 vmk_flags);
16665 vm_map_deallocate(submap);
16666 return result;
16667 }
16668
16669 if (src_entry->is_sub_map) {
16670 /* protections for submap mapping are irrelevant here */
16671 } else if (((src_entry->protection & required_cur_prot) !=
16672 required_cur_prot) ||
16673 ((src_entry->max_protection & required_max_prot) !=
16674 required_max_prot)) {
16675 if (vmk_flags.vmkf_copy_single_object &&
16676 mapped_size != 0) {
16677 /*
16678 * Single object extraction.
16679 * We can't extract more with the required
16680 * protection but we've extracted some, so
16681 * stop there and declare success.
16682 * The caller should check the size of
16683 * the copy entry we've extracted.
16684 */
16685 result = KERN_SUCCESS;
16686 } else {
16687 /*
16688 * VM range extraction.
16689 * Required proctection is not available
16690 * for this part of the range: fail.
16691 */
16692 result = KERN_PROTECTION_FAILURE;
16693 }
16694 break;
16695 }
16696
16697 if (src_entry->is_sub_map) {
16698 vm_map_t submap;
16699 vm_map_offset_t submap_start;
16700 vm_map_size_t submap_size;
16701 vm_map_copy_t submap_copy;
16702 vm_prot_t submap_curprot, submap_maxprot;
16703 boolean_t submap_needs_copy;
16704
16705 /*
16706 * No check for "required protection" on "src_entry"
16707 * because the protections that matter are the ones
16708 * on the submap's VM map entry, which will be checked
16709 * during the call to vm_map_copy_extract() below.
16710 */
16711 object = VM_OBJECT_NULL;
16712 submap_copy = VM_MAP_COPY_NULL;
16713
16714 /* find equivalent range in the submap */
16715 submap = VME_SUBMAP(src_entry);
16716 submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
16717 submap_size = tmp_size;
16718 if (copy) {
16719 /*
16720 * The caller wants a copy-on-write re-mapping,
16721 * so let's extract from the submap accordingly.
16722 */
16723 submap_needs_copy = TRUE;
16724 } else if (src_entry->needs_copy) {
16725 /*
16726 * The caller wants a shared re-mapping but the
16727 * submap is mapped with "needs_copy", so its
16728 * contents can't be shared as is. Extract the
16729 * contents of the submap as "copy-on-write".
16730 * The re-mapping won't be shared with the
16731 * original mapping but this is equivalent to
16732 * what happened with the original "remap from
16733 * submap" code.
16734 * The shared region is mapped "needs_copy", for
16735 * example.
16736 */
16737 submap_needs_copy = TRUE;
16738 } else {
16739 /*
16740 * The caller wants a shared re-mapping and
16741 * this mapping can be shared (no "needs_copy"),
16742 * so let's extract from the submap accordingly.
16743 * Kernel submaps are mapped without
16744 * "needs_copy", for example.
16745 */
16746 submap_needs_copy = FALSE;
16747 }
16748 /* extra ref to keep submap alive */
16749 vm_map_reference(submap);
16750
16751 DTRACE_VM7(remap_submap_recurse,
16752 vm_map_t, map,
16753 vm_map_offset_t, addr,
16754 vm_map_size_t, size,
16755 boolean_t, copy,
16756 vm_map_offset_t, submap_start,
16757 vm_map_size_t, submap_size,
16758 boolean_t, submap_needs_copy);
16759
16760 /*
16761 * The map can be safely unlocked since we
16762 * already hold a reference on the submap.
16763 *
16764 * No timestamp since we don't care if the map
16765 * gets modified while we're down in the submap.
16766 * We'll resume the extraction at src_start + tmp_size
16767 * anyway.
16768 */
16769 vm_map_unlock(map);
16770 src_entry = NULL; /* not valid once map is unlocked */
16771
16772 if (vm_remap_legacy) {
16773 submap_curprot = VM_PROT_NONE;
16774 submap_maxprot = VM_PROT_NONE;
16775 if (max_prot_for_prot_copy) {
16776 submap_maxprot = max_prot_for_prot_copy;
16777 }
16778 } else {
16779 assert(!max_prot_for_prot_copy);
16780 submap_curprot = *cur_protection;
16781 submap_maxprot = *max_protection;
16782 }
16783 result = vm_map_copy_extract(submap,
16784 submap_start,
16785 submap_size,
16786 submap_needs_copy,
16787 &submap_copy,
16788 &submap_curprot,
16789 &submap_maxprot,
16790 inheritance,
16791 vmk_flags);
16792
16793 /* release extra ref on submap */
16794 vm_map_deallocate(submap);
16795 submap = VM_MAP_NULL;
16796
16797 if (result != KERN_SUCCESS) {
16798 vm_map_lock(map);
16799 break;
16800 }
16801
16802 /* transfer submap_copy entries to map_header */
16803 while (vm_map_copy_first_entry(submap_copy) !=
16804 vm_map_copy_to_entry(submap_copy)) {
16805 vm_map_entry_t copy_entry;
16806 vm_map_size_t copy_entry_size;
16807
16808 copy_entry = vm_map_copy_first_entry(submap_copy);
16809 assert(!copy_entry->is_sub_map);
16810 object = VME_OBJECT(copy_entry);
16811
16812 /*
16813 * Prevent kernel_object from being exposed to
16814 * user space.
16815 */
16816 if (__improbable(object == kernel_object)) {
16817 printf("%d[%s]: rejecting attempt to extract from kernel_object\n",
16818 proc_selfpid(),
16819 (current_task()->bsd_info
16820 ? proc_name_address(current_task()->bsd_info)
16821 : "?"));
16822 DTRACE_VM(extract_kernel_only);
16823 result = KERN_INVALID_RIGHT;
16824 vm_map_copy_discard(submap_copy);
16825 submap_copy = VM_MAP_COPY_NULL;
16826 vm_map_lock(map);
16827 break;
16828 }
16829
16830 vm_map_copy_entry_unlink(submap_copy, copy_entry);
16831 copy_entry_size = copy_entry->vme_end - copy_entry->vme_start;
16832 copy_entry->vme_start = map_address;
16833 copy_entry->vme_end = map_address + copy_entry_size;
16834 map_address += copy_entry_size;
16835 mapped_size += copy_entry_size;
16836 src_start += copy_entry_size;
16837 assert(src_start <= src_end);
16838 _vm_map_store_entry_link(map_header,
16839 map_header->links.prev,
16840 copy_entry);
16841 }
16842 /* done with submap_copy */
16843 vm_map_copy_discard(submap_copy);
16844
16845 if (vm_remap_legacy) {
16846 *cur_protection &= submap_curprot;
16847 *max_protection &= submap_maxprot;
16848 }
16849
16850 /* re-acquire the map lock and continue to next entry */
16851 vm_map_lock(map);
16852 continue;
16853 } else {
16854 object = VME_OBJECT(src_entry);
16855
16856 /*
16857 * Prevent kernel_object from being exposed to
16858 * user space.
16859 */
16860 if (__improbable(object == kernel_object)) {
16861 printf("%d[%s]: rejecting attempt to extract from kernel_object\n",
16862 proc_selfpid(),
16863 (current_task()->bsd_info
16864 ? proc_name_address(current_task()->bsd_info)
16865 : "?"));
16866 DTRACE_VM(extract_kernel_only);
16867 result = KERN_INVALID_RIGHT;
16868 break;
16869 }
16870
16871 if (src_entry->iokit_acct) {
16872 /*
16873 * This entry uses "IOKit accounting".
16874 */
16875 } else if (object != VM_OBJECT_NULL &&
16876 (object->purgable != VM_PURGABLE_DENY ||
16877 object->vo_ledger_tag != VM_LEDGER_TAG_NONE)) {
16878 /*
16879 * Purgeable objects have their own accounting:
16880 * no pmap accounting for them.
16881 */
16882 assertf(!src_entry->use_pmap,
16883 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
16884 map,
16885 src_entry,
16886 (uint64_t)src_entry->vme_start,
16887 (uint64_t)src_entry->vme_end,
16888 src_entry->protection,
16889 src_entry->max_protection,
16890 VME_ALIAS(src_entry));
16891 } else {
16892 /*
16893 * Not IOKit or purgeable:
16894 * must be accounted by pmap stats.
16895 */
16896 assertf(src_entry->use_pmap,
16897 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
16898 map,
16899 src_entry,
16900 (uint64_t)src_entry->vme_start,
16901 (uint64_t)src_entry->vme_end,
16902 src_entry->protection,
16903 src_entry->max_protection,
16904 VME_ALIAS(src_entry));
16905 }
16906
16907 if (object == VM_OBJECT_NULL) {
16908 assert(!src_entry->needs_copy);
16909 object = vm_object_allocate(entry_size);
16910 VME_OFFSET_SET(src_entry, 0);
16911 VME_OBJECT_SET(src_entry, object);
16912 assert(src_entry->use_pmap);
16913 assert(!map->mapped_in_other_pmaps);
16914 } else if (src_entry->wired_count ||
16915 object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
16916 /*
16917 * A wired memory region should not have
16918 * any pending copy-on-write and needs to
16919 * keep pointing at the VM object that
16920 * contains the wired pages.
16921 * If we're sharing this memory (copy=false),
16922 * we'll share this VM object.
16923 * If we're copying this memory (copy=true),
16924 * we'll call vm_object_copy_slowly() below
16925 * and use the new VM object for the remapping.
16926 *
16927 * Or, we are already using an asymmetric
16928 * copy, and therefore we already have
16929 * the right object.
16930 */
16931 assert(!src_entry->needs_copy);
16932 } else if (src_entry->needs_copy || object->shadowed ||
16933 (object->internal && !object->true_share &&
16934 !src_entry->is_shared &&
16935 object->vo_size > entry_size)) {
16936 VME_OBJECT_SHADOW(src_entry, entry_size);
16937 assert(src_entry->use_pmap);
16938
16939 if (!src_entry->needs_copy &&
16940 (src_entry->protection & VM_PROT_WRITE)) {
16941 vm_prot_t prot;
16942
16943 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
16944
16945 prot = src_entry->protection & ~VM_PROT_WRITE;
16946
16947 if (override_nx(map,
16948 VME_ALIAS(src_entry))
16949 && prot) {
16950 prot |= VM_PROT_EXECUTE;
16951 }
16952
16953 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
16954
16955 if (map->mapped_in_other_pmaps) {
16956 vm_object_pmap_protect(
16957 VME_OBJECT(src_entry),
16958 VME_OFFSET(src_entry),
16959 entry_size,
16960 PMAP_NULL,
16961 PAGE_SIZE,
16962 src_entry->vme_start,
16963 prot);
16964 #if MACH_ASSERT
16965 } else if (__improbable(map->pmap == PMAP_NULL)) {
16966 extern boolean_t vm_tests_in_progress;
16967 assert(vm_tests_in_progress);
16968 /*
16969 * Some VM tests (in vm_tests.c)
16970 * sometimes want to use a VM
16971 * map without a pmap.
16972 * Otherwise, this should never
16973 * happen.
16974 */
16975 #endif /* MACH_ASSERT */
16976 } else {
16977 pmap_protect(vm_map_pmap(map),
16978 src_entry->vme_start,
16979 src_entry->vme_end,
16980 prot);
16981 }
16982 }
16983
16984 object = VME_OBJECT(src_entry);
16985 src_entry->needs_copy = FALSE;
16986 }
16987
16988
16989 vm_object_lock(object);
16990 vm_object_reference_locked(object); /* object ref. for new entry */
16991 assert(!src_entry->needs_copy);
16992 if (object->copy_strategy ==
16993 MEMORY_OBJECT_COPY_SYMMETRIC) {
16994 /*
16995 * If we want to share this object (copy==0),
16996 * it needs to be COPY_DELAY.
16997 * If we want to copy this object (copy==1),
16998 * we can't just set "needs_copy" on our side
16999 * and expect the other side to do the same
17000 * (symmetrically), so we can't let the object
17001 * stay COPY_SYMMETRIC.
17002 * So we always switch from COPY_SYMMETRIC to
17003 * COPY_DELAY.
17004 */
17005 object->copy_strategy =
17006 MEMORY_OBJECT_COPY_DELAY;
17007 object->true_share = TRUE;
17008 }
17009 vm_object_unlock(object);
17010 }
17011
17012 offset = (VME_OFFSET(src_entry) +
17013 (src_start - src_entry->vme_start));
17014
17015 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
17016 vm_map_entry_copy(map, new_entry, src_entry);
17017 if (new_entry->is_sub_map) {
17018 /* clr address space specifics */
17019 new_entry->use_pmap = FALSE;
17020 } else if (copy) {
17021 /*
17022 * We're dealing with a copy-on-write operation,
17023 * so the resulting mapping should not inherit the
17024 * original mapping's accounting settings.
17025 * "use_pmap" should be reset to its default (TRUE)
17026 * so that the new mapping gets accounted for in
17027 * the task's memory footprint.
17028 */
17029 new_entry->use_pmap = TRUE;
17030 }
17031 /* "iokit_acct" was cleared in vm_map_entry_copy() */
17032 assert(!new_entry->iokit_acct);
17033
17034 new_entry->map_aligned = FALSE;
17035
17036 new_entry->vme_start = map_address;
17037 new_entry->vme_end = map_address + tmp_size;
17038 assert(new_entry->vme_start < new_entry->vme_end);
17039 if (copy && vmk_flags.vmkf_remap_prot_copy) {
17040 /*
17041 * Remapping for vm_map_protect(VM_PROT_COPY)
17042 * to convert a read-only mapping into a
17043 * copy-on-write version of itself but
17044 * with write access:
17045 * keep the original inheritance and add
17046 * VM_PROT_WRITE to the max protection.
17047 */
17048 new_entry->inheritance = src_entry->inheritance;
17049 new_entry->protection &= max_prot_for_prot_copy;
17050 new_entry->max_protection |= VM_PROT_WRITE;
17051 } else {
17052 new_entry->inheritance = inheritance;
17053 if (!vm_remap_legacy) {
17054 new_entry->protection = *cur_protection;
17055 new_entry->max_protection = *max_protection;
17056 }
17057 }
17058 VME_OFFSET_SET(new_entry, offset);
17059
17060 /*
17061 * The new region has to be copied now if required.
17062 */
17063 RestartCopy:
17064 if (!copy) {
17065 if (src_entry->used_for_jit == TRUE) {
17066 if (same_map) {
17067 } else if (!VM_MAP_POLICY_ALLOW_JIT_SHARING(map)) {
17068 /*
17069 * Cannot allow an entry describing a JIT
17070 * region to be shared across address spaces.
17071 */
17072 result = KERN_INVALID_ARGUMENT;
17073 vm_object_deallocate(object);
17074 _vm_map_entry_dispose(map_header, new_entry);
17075 new_entry = VM_MAP_ENTRY_NULL;
17076 break;
17077 }
17078 }
17079
17080 src_entry->is_shared = TRUE;
17081 new_entry->is_shared = TRUE;
17082 if (!(new_entry->is_sub_map)) {
17083 new_entry->needs_copy = FALSE;
17084 }
17085 } else if (src_entry->is_sub_map) {
17086 /* make this a COW sub_map if not already */
17087 assert(new_entry->wired_count == 0);
17088 new_entry->needs_copy = TRUE;
17089 object = VM_OBJECT_NULL;
17090 } else if (src_entry->wired_count == 0 &&
17091 !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) &&
17092 vm_object_copy_quickly(VME_OBJECT(new_entry),
17093 VME_OFFSET(new_entry),
17094 (new_entry->vme_end -
17095 new_entry->vme_start),
17096 &src_needs_copy,
17097 &new_entry_needs_copy)) {
17098 new_entry->needs_copy = new_entry_needs_copy;
17099 new_entry->is_shared = FALSE;
17100 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
17101
17102 /*
17103 * Handle copy_on_write semantics.
17104 */
17105 if (src_needs_copy && !src_entry->needs_copy) {
17106 vm_prot_t prot;
17107
17108 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
17109
17110 prot = src_entry->protection & ~VM_PROT_WRITE;
17111
17112 if (override_nx(map,
17113 VME_ALIAS(src_entry))
17114 && prot) {
17115 prot |= VM_PROT_EXECUTE;
17116 }
17117
17118 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
17119
17120 vm_object_pmap_protect(object,
17121 offset,
17122 entry_size,
17123 ((src_entry->is_shared
17124 || map->mapped_in_other_pmaps) ?
17125 PMAP_NULL : map->pmap),
17126 VM_MAP_PAGE_SIZE(map),
17127 src_entry->vme_start,
17128 prot);
17129
17130 assert(src_entry->wired_count == 0);
17131 src_entry->needs_copy = TRUE;
17132 }
17133 /*
17134 * Throw away the old object reference of the new entry.
17135 */
17136 vm_object_deallocate(object);
17137 } else {
17138 new_entry->is_shared = FALSE;
17139 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
17140
17141 src_entry_was_wired = (src_entry->wired_count > 0);
17142 saved_src_entry = src_entry;
17143 src_entry = VM_MAP_ENTRY_NULL;
17144
17145 /*
17146 * The map can be safely unlocked since we
17147 * already hold a reference on the object.
17148 *
17149 * Record the timestamp of the map for later
17150 * verification, and unlock the map.
17151 */
17152 version.main_timestamp = map->timestamp;
17153 vm_map_unlock(map); /* Increments timestamp once! */
17154
17155 /*
17156 * Perform the copy.
17157 */
17158 if (src_entry_was_wired > 0 ||
17159 (debug4k_no_cow_copyin &&
17160 VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT)) {
17161 vm_object_lock(object);
17162 result = vm_object_copy_slowly(
17163 object,
17164 offset,
17165 (new_entry->vme_end -
17166 new_entry->vme_start),
17167 THREAD_UNINT,
17168 &new_copy_object);
17169 /* VME_OBJECT_SET will reset used_for_jit, so preserve it. */
17170 saved_used_for_jit = new_entry->used_for_jit;
17171 VME_OBJECT_SET(new_entry, new_copy_object);
17172 new_entry->used_for_jit = saved_used_for_jit;
17173 VME_OFFSET_SET(new_entry, offset - vm_object_trunc_page(offset));
17174 new_entry->needs_copy = FALSE;
17175 } else {
17176 vm_object_offset_t new_offset;
17177
17178 new_offset = VME_OFFSET(new_entry);
17179 result = vm_object_copy_strategically(
17180 object,
17181 offset,
17182 (new_entry->vme_end -
17183 new_entry->vme_start),
17184 &new_copy_object,
17185 &new_offset,
17186 &new_entry_needs_copy);
17187 /* VME_OBJECT_SET will reset used_for_jit, so preserve it. */
17188 saved_used_for_jit = new_entry->used_for_jit;
17189 VME_OBJECT_SET(new_entry, new_copy_object);
17190 new_entry->used_for_jit = saved_used_for_jit;
17191 if (new_offset != VME_OFFSET(new_entry)) {
17192 VME_OFFSET_SET(new_entry, new_offset);
17193 }
17194
17195 new_entry->needs_copy = new_entry_needs_copy;
17196 }
17197
17198 /*
17199 * Throw away the old object reference of the new entry.
17200 */
17201 vm_object_deallocate(object);
17202
17203 if (result != KERN_SUCCESS &&
17204 result != KERN_MEMORY_RESTART_COPY) {
17205 _vm_map_entry_dispose(map_header, new_entry);
17206 vm_map_lock(map);
17207 break;
17208 }
17209
17210 /*
17211 * Verify that the map has not substantially
17212 * changed while the copy was being made.
17213 */
17214
17215 vm_map_lock(map);
17216 if (version.main_timestamp + 1 != map->timestamp) {
17217 /*
17218 * Simple version comparison failed.
17219 *
17220 * Retry the lookup and verify that the
17221 * same object/offset are still present.
17222 */
17223 saved_src_entry = VM_MAP_ENTRY_NULL;
17224 vm_object_deallocate(VME_OBJECT(new_entry));
17225 _vm_map_entry_dispose(map_header, new_entry);
17226 if (result == KERN_MEMORY_RESTART_COPY) {
17227 result = KERN_SUCCESS;
17228 }
17229 continue;
17230 }
17231 /* map hasn't changed: src_entry is still valid */
17232 src_entry = saved_src_entry;
17233 saved_src_entry = VM_MAP_ENTRY_NULL;
17234
17235 if (result == KERN_MEMORY_RESTART_COPY) {
17236 vm_object_reference(object);
17237 goto RestartCopy;
17238 }
17239 }
17240
17241 _vm_map_store_entry_link(map_header,
17242 map_header->links.prev, new_entry);
17243
17244 /* protections for submap mapping are irrelevant here */
17245 if (vm_remap_legacy && !src_entry->is_sub_map) {
17246 *cur_protection &= src_entry->protection;
17247 *max_protection &= src_entry->max_protection;
17248 }
17249
17250 map_address += tmp_size;
17251 mapped_size += tmp_size;
17252 src_start += tmp_size;
17253
17254 if (vmk_flags.vmkf_copy_single_object) {
17255 if (mapped_size != size) {
17256 DEBUG4K_SHARE("map %p addr 0x%llx size 0x%llx clipped copy at mapped_size 0x%llx\n", map, (uint64_t)addr, (uint64_t)size, (uint64_t)mapped_size);
17257 if (src_entry->vme_next != vm_map_to_entry(map) &&
17258 VME_OBJECT(src_entry->vme_next) == VME_OBJECT(src_entry)) {
17259 /* XXX TODO4K */
17260 DEBUG4K_ERROR("could have extended copy to next entry...\n");
17261 }
17262 }
17263 break;
17264 }
17265 } /* end while */
17266
17267 vm_map_unlock(map);
17268 if (result != KERN_SUCCESS) {
17269 /*
17270 * Free all allocated elements.
17271 */
17272 for (src_entry = map_header->links.next;
17273 src_entry != CAST_TO_VM_MAP_ENTRY(&map_header->links);
17274 src_entry = new_entry) {
17275 new_entry = src_entry->vme_next;
17276 _vm_map_store_entry_unlink(map_header, src_entry);
17277 if (src_entry->is_sub_map) {
17278 vm_map_deallocate(VME_SUBMAP(src_entry));
17279 } else {
17280 vm_object_deallocate(VME_OBJECT(src_entry));
17281 }
17282 _vm_map_entry_dispose(map_header, src_entry);
17283 }
17284 }
17285 return result;
17286 }
17287
17288 bool
vm_map_is_exotic(vm_map_t map)17289 vm_map_is_exotic(
17290 vm_map_t map)
17291 {
17292 return VM_MAP_IS_EXOTIC(map);
17293 }
17294
17295 bool
vm_map_is_alien(vm_map_t map)17296 vm_map_is_alien(
17297 vm_map_t map)
17298 {
17299 return VM_MAP_IS_ALIEN(map);
17300 }
17301
17302 #if XNU_TARGET_OS_OSX
17303 void
vm_map_mark_alien(vm_map_t map)17304 vm_map_mark_alien(
17305 vm_map_t map)
17306 {
17307 vm_map_lock(map);
17308 map->is_alien = true;
17309 vm_map_unlock(map);
17310 }
17311
17312 void
vm_map_single_jit(vm_map_t map)17313 vm_map_single_jit(
17314 vm_map_t map)
17315 {
17316 vm_map_lock(map);
17317 map->single_jit = true;
17318 vm_map_unlock(map);
17319 }
17320 #endif /* XNU_TARGET_OS_OSX */
17321
17322 static kern_return_t
vm_map_copy_to_physcopy(vm_map_copy_t copy_map,vm_map_t target_map)17323 vm_map_copy_to_physcopy(
17324 vm_map_copy_t copy_map,
17325 vm_map_t target_map)
17326 {
17327 vm_map_size_t size;
17328 vm_map_entry_t entry;
17329 vm_map_entry_t new_entry;
17330 vm_object_t new_object;
17331 unsigned int pmap_flags;
17332 pmap_t new_pmap;
17333 vm_map_t new_map;
17334 vm_map_address_t src_start, src_end, src_cur;
17335 vm_map_address_t dst_start, dst_end, dst_cur;
17336 kern_return_t kr;
17337 void *kbuf;
17338
17339 /*
17340 * Perform the equivalent of vm_allocate() and memcpy().
17341 * Replace the mappings in "copy_map" with the newly allocated mapping.
17342 */
17343 DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) BEFORE\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
17344
17345 assert(copy_map->cpy_hdr.page_shift != VM_MAP_PAGE_MASK(target_map));
17346
17347 /* create a new pmap to map "copy_map" */
17348 pmap_flags = 0;
17349 assert(copy_map->cpy_hdr.page_shift == FOURK_PAGE_SHIFT);
17350 #if PMAP_CREATE_FORCE_4K_PAGES
17351 pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
17352 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
17353 pmap_flags |= PMAP_CREATE_64BIT;
17354 new_pmap = pmap_create_options(NULL, (vm_map_size_t)0, pmap_flags);
17355 if (new_pmap == NULL) {
17356 return KERN_RESOURCE_SHORTAGE;
17357 }
17358
17359 /* allocate new VM object */
17360 size = VM_MAP_ROUND_PAGE(copy_map->size, PAGE_MASK);
17361 new_object = vm_object_allocate(size);
17362 assert(new_object);
17363
17364 /* allocate new VM map entry */
17365 new_entry = vm_map_copy_entry_create(copy_map, FALSE);
17366 assert(new_entry);
17367
17368 /* finish initializing new VM map entry */
17369 new_entry->protection = VM_PROT_DEFAULT;
17370 new_entry->max_protection = VM_PROT_DEFAULT;
17371 new_entry->use_pmap = TRUE;
17372
17373 /* make new VM map entry point to new VM object */
17374 new_entry->vme_start = 0;
17375 new_entry->vme_end = size;
17376 VME_OBJECT_SET(new_entry, new_object);
17377 VME_OFFSET_SET(new_entry, 0);
17378
17379 /* create a new pageable VM map to map "copy_map" */
17380 new_map = vm_map_create_options(new_pmap, 0, MACH_VM_MAX_ADDRESS,
17381 VM_MAP_CREATE_PAGEABLE);
17382 assert(new_map);
17383 vm_map_set_page_shift(new_map, copy_map->cpy_hdr.page_shift);
17384
17385 /* map "copy_map" in the new VM map */
17386 src_start = 0;
17387 kr = vm_map_copyout_internal(
17388 new_map,
17389 &src_start,
17390 copy_map,
17391 copy_map->size,
17392 FALSE, /* consume_on_success */
17393 VM_PROT_DEFAULT,
17394 VM_PROT_DEFAULT,
17395 VM_INHERIT_DEFAULT);
17396 assert(kr == KERN_SUCCESS);
17397 src_end = src_start + copy_map->size;
17398
17399 /* map "new_object" in the new VM map */
17400 vm_object_reference(new_object);
17401 dst_start = 0;
17402 kr = vm_map_enter(new_map,
17403 &dst_start,
17404 size,
17405 0, /* mask */
17406 VM_FLAGS_ANYWHERE,
17407 VM_MAP_KERNEL_FLAGS_NONE,
17408 VM_KERN_MEMORY_OSFMK,
17409 new_object,
17410 0, /* offset */
17411 FALSE, /* needs copy */
17412 VM_PROT_DEFAULT,
17413 VM_PROT_DEFAULT,
17414 VM_INHERIT_DEFAULT);
17415 assert(kr == KERN_SUCCESS);
17416 dst_end = dst_start + size;
17417
17418 /* get a kernel buffer */
17419 kbuf = kalloc_data(PAGE_SIZE, Z_WAITOK | Z_NOFAIL);
17420
17421 /* physically copy "copy_map" mappings to new VM object */
17422 for (src_cur = src_start, dst_cur = dst_start;
17423 src_cur < src_end;
17424 src_cur += PAGE_SIZE, dst_cur += PAGE_SIZE) {
17425 vm_size_t bytes;
17426
17427 bytes = PAGE_SIZE;
17428 if (src_cur + PAGE_SIZE > src_end) {
17429 /* partial copy for last page */
17430 bytes = src_end - src_cur;
17431 assert(bytes > 0 && bytes < PAGE_SIZE);
17432 /* rest of dst page should be zero-filled */
17433 }
17434 /* get bytes from src mapping */
17435 kr = copyinmap(new_map, src_cur, kbuf, bytes);
17436 if (kr != KERN_SUCCESS) {
17437 DEBUG4K_COPY("copyinmap(%p, 0x%llx, %p, 0x%llx) kr 0x%x\n", new_map, (uint64_t)src_cur, kbuf, (uint64_t)bytes, kr);
17438 }
17439 /* put bytes in dst mapping */
17440 assert(dst_cur < dst_end);
17441 assert(dst_cur + bytes <= dst_end);
17442 kr = copyoutmap(new_map, kbuf, dst_cur, bytes);
17443 if (kr != KERN_SUCCESS) {
17444 DEBUG4K_COPY("copyoutmap(%p, %p, 0x%llx, 0x%llx) kr 0x%x\n", new_map, kbuf, (uint64_t)dst_cur, (uint64_t)bytes, kr);
17445 }
17446 }
17447
17448 /* free kernel buffer */
17449 kfree_data(kbuf, PAGE_SIZE);
17450
17451 /* destroy new map */
17452 vm_map_destroy(new_map, VM_MAP_REMOVE_NO_FLAGS);
17453 new_map = VM_MAP_NULL;
17454
17455 /* dispose of the old map entries in "copy_map" */
17456 while (vm_map_copy_first_entry(copy_map) !=
17457 vm_map_copy_to_entry(copy_map)) {
17458 entry = vm_map_copy_first_entry(copy_map);
17459 vm_map_copy_entry_unlink(copy_map, entry);
17460 if (entry->is_sub_map) {
17461 vm_map_deallocate(VME_SUBMAP(entry));
17462 } else {
17463 vm_object_deallocate(VME_OBJECT(entry));
17464 }
17465 vm_map_copy_entry_dispose(copy_map, entry);
17466 }
17467
17468 /* change "copy_map"'s page_size to match "target_map" */
17469 copy_map->cpy_hdr.page_shift = (uint16_t)VM_MAP_PAGE_SHIFT(target_map);
17470 copy_map->offset = 0;
17471 copy_map->size = size;
17472
17473 /* insert new map entry in "copy_map" */
17474 assert(vm_map_copy_last_entry(copy_map) == vm_map_copy_to_entry(copy_map));
17475 vm_map_copy_entry_link(copy_map, vm_map_copy_last_entry(copy_map), new_entry);
17476
17477 DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) AFTER\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
17478 return KERN_SUCCESS;
17479 }
17480
17481 void
17482 vm_map_copy_adjust_get_target_copy_map(
17483 vm_map_copy_t copy_map,
17484 vm_map_copy_t *target_copy_map_p);
17485 void
vm_map_copy_adjust_get_target_copy_map(vm_map_copy_t copy_map,vm_map_copy_t * target_copy_map_p)17486 vm_map_copy_adjust_get_target_copy_map(
17487 vm_map_copy_t copy_map,
17488 vm_map_copy_t *target_copy_map_p)
17489 {
17490 vm_map_copy_t target_copy_map;
17491 vm_map_entry_t entry, target_entry;
17492
17493 if (*target_copy_map_p != VM_MAP_COPY_NULL) {
17494 /* the caller already has a "target_copy_map": use it */
17495 return;
17496 }
17497
17498 /* the caller wants us to create a new copy of "copy_map" */
17499 target_copy_map = vm_map_copy_allocate();
17500 target_copy_map->type = copy_map->type;
17501 assert(target_copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17502 target_copy_map->offset = copy_map->offset;
17503 target_copy_map->size = copy_map->size;
17504 target_copy_map->cpy_hdr.page_shift = copy_map->cpy_hdr.page_shift;
17505 vm_map_store_init(&target_copy_map->cpy_hdr);
17506 for (entry = vm_map_copy_first_entry(copy_map);
17507 entry != vm_map_copy_to_entry(copy_map);
17508 entry = entry->vme_next) {
17509 target_entry = vm_map_copy_entry_create(target_copy_map, FALSE);
17510 vm_map_entry_copy_full(target_entry, entry);
17511 if (target_entry->is_sub_map) {
17512 vm_map_reference(VME_SUBMAP(target_entry));
17513 } else {
17514 vm_object_reference(VME_OBJECT(target_entry));
17515 }
17516 vm_map_copy_entry_link(
17517 target_copy_map,
17518 vm_map_copy_last_entry(target_copy_map),
17519 target_entry);
17520 }
17521 entry = VM_MAP_ENTRY_NULL;
17522 *target_copy_map_p = target_copy_map;
17523 }
17524
17525 static void
vm_map_copy_trim(vm_map_copy_t copy_map,uint16_t new_page_shift,vm_map_offset_t trim_start,vm_map_offset_t trim_end)17526 vm_map_copy_trim(
17527 vm_map_copy_t copy_map,
17528 uint16_t new_page_shift,
17529 vm_map_offset_t trim_start,
17530 vm_map_offset_t trim_end)
17531 {
17532 uint16_t copy_page_shift;
17533 vm_map_entry_t entry, next_entry;
17534
17535 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17536 assert(copy_map->cpy_hdr.nentries > 0);
17537
17538 trim_start += vm_map_copy_first_entry(copy_map)->vme_start;
17539 trim_end += vm_map_copy_first_entry(copy_map)->vme_start;
17540
17541 /* use the new page_shift to do the clipping */
17542 copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
17543 copy_map->cpy_hdr.page_shift = new_page_shift;
17544
17545 for (entry = vm_map_copy_first_entry(copy_map);
17546 entry != vm_map_copy_to_entry(copy_map);
17547 entry = next_entry) {
17548 next_entry = entry->vme_next;
17549 if (entry->vme_end <= trim_start) {
17550 /* entry fully before trim range: skip */
17551 continue;
17552 }
17553 if (entry->vme_start >= trim_end) {
17554 /* entry fully after trim range: done */
17555 break;
17556 }
17557 /* clip entry if needed */
17558 vm_map_copy_clip_start(copy_map, entry, trim_start);
17559 vm_map_copy_clip_end(copy_map, entry, trim_end);
17560 /* dispose of entry */
17561 copy_map->size -= entry->vme_end - entry->vme_start;
17562 vm_map_copy_entry_unlink(copy_map, entry);
17563 if (entry->is_sub_map) {
17564 vm_map_deallocate(VME_SUBMAP(entry));
17565 } else {
17566 vm_object_deallocate(VME_OBJECT(entry));
17567 }
17568 vm_map_copy_entry_dispose(copy_map, entry);
17569 entry = VM_MAP_ENTRY_NULL;
17570 }
17571
17572 /* restore copy_map's original page_shift */
17573 copy_map->cpy_hdr.page_shift = copy_page_shift;
17574 }
17575
17576 /*
17577 * Make any necessary adjustments to "copy_map" to allow it to be
17578 * mapped into "target_map".
17579 * If no changes were necessary, "target_copy_map" points to the
17580 * untouched "copy_map".
17581 * If changes are necessary, changes will be made to "target_copy_map".
17582 * If "target_copy_map" was NULL, we create a new "vm_map_copy_t" and
17583 * copy the original "copy_map" to it before applying the changes.
17584 * The caller should discard "target_copy_map" if it's not the same as
17585 * the original "copy_map".
17586 */
17587 /* TODO4K: also adjust to sub-range in the copy_map -> add start&end? */
17588 kern_return_t
vm_map_copy_adjust_to_target(vm_map_copy_t src_copy_map,vm_map_offset_t offset,vm_map_size_t size,vm_map_t target_map,boolean_t copy,vm_map_copy_t * target_copy_map_p,vm_map_offset_t * overmap_start_p,vm_map_offset_t * overmap_end_p,vm_map_offset_t * trimmed_start_p)17589 vm_map_copy_adjust_to_target(
17590 vm_map_copy_t src_copy_map,
17591 vm_map_offset_t offset,
17592 vm_map_size_t size,
17593 vm_map_t target_map,
17594 boolean_t copy,
17595 vm_map_copy_t *target_copy_map_p,
17596 vm_map_offset_t *overmap_start_p,
17597 vm_map_offset_t *overmap_end_p,
17598 vm_map_offset_t *trimmed_start_p)
17599 {
17600 vm_map_copy_t copy_map, target_copy_map;
17601 vm_map_size_t target_size;
17602 vm_map_size_t src_copy_map_size;
17603 vm_map_size_t overmap_start, overmap_end;
17604 int misalignments;
17605 vm_map_entry_t entry, target_entry;
17606 vm_map_offset_t addr_adjustment;
17607 vm_map_offset_t new_start, new_end;
17608 int copy_page_mask, target_page_mask;
17609 uint16_t copy_page_shift, target_page_shift;
17610 vm_map_offset_t trimmed_end;
17611
17612 /*
17613 * Assert that the vm_map_copy is coming from the right
17614 * zone and hasn't been forged
17615 */
17616 vm_map_copy_require(src_copy_map);
17617 assert(src_copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17618
17619 /*
17620 * Start working with "src_copy_map" but we'll switch
17621 * to "target_copy_map" as soon as we start making adjustments.
17622 */
17623 copy_map = src_copy_map;
17624 src_copy_map_size = src_copy_map->size;
17625
17626 copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
17627 copy_page_mask = VM_MAP_COPY_PAGE_MASK(copy_map);
17628 target_page_shift = (uint16_t)VM_MAP_PAGE_SHIFT(target_map);
17629 target_page_mask = VM_MAP_PAGE_MASK(target_map);
17630
17631 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p...\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, *target_copy_map_p);
17632
17633 target_copy_map = *target_copy_map_p;
17634 if (target_copy_map != VM_MAP_COPY_NULL) {
17635 vm_map_copy_require(target_copy_map);
17636 }
17637
17638 if (offset + size > copy_map->size) {
17639 DEBUG4K_ERROR("copy_map %p (%d->%d) copy_map->size 0x%llx offset 0x%llx size 0x%llx KERN_INVALID_ARGUMENT\n", copy_map, copy_page_shift, target_page_shift, (uint64_t)copy_map->size, (uint64_t)offset, (uint64_t)size);
17640 return KERN_INVALID_ARGUMENT;
17641 }
17642
17643 /* trim the end */
17644 trimmed_end = 0;
17645 new_end = VM_MAP_ROUND_PAGE(offset + size, target_page_mask);
17646 if (new_end < copy_map->size) {
17647 trimmed_end = src_copy_map_size - new_end;
17648 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim end from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)new_end, (uint64_t)copy_map->size);
17649 /* get "target_copy_map" if needed and adjust it */
17650 vm_map_copy_adjust_get_target_copy_map(copy_map,
17651 &target_copy_map);
17652 copy_map = target_copy_map;
17653 vm_map_copy_trim(target_copy_map, target_page_shift,
17654 new_end, copy_map->size);
17655 }
17656
17657 /* trim the start */
17658 new_start = VM_MAP_TRUNC_PAGE(offset, target_page_mask);
17659 if (new_start != 0) {
17660 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim start from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)0, (uint64_t)new_start);
17661 /* get "target_copy_map" if needed and adjust it */
17662 vm_map_copy_adjust_get_target_copy_map(copy_map,
17663 &target_copy_map);
17664 copy_map = target_copy_map;
17665 vm_map_copy_trim(target_copy_map, target_page_shift,
17666 0, new_start);
17667 }
17668 *trimmed_start_p = new_start;
17669
17670 /* target_size starts with what's left after trimming */
17671 target_size = copy_map->size;
17672 assertf(target_size == src_copy_map_size - *trimmed_start_p - trimmed_end,
17673 "target_size 0x%llx src_copy_map_size 0x%llx trimmed_start 0x%llx trimmed_end 0x%llx\n",
17674 (uint64_t)target_size, (uint64_t)src_copy_map_size,
17675 (uint64_t)*trimmed_start_p, (uint64_t)trimmed_end);
17676
17677 /* check for misalignments but don't adjust yet */
17678 misalignments = 0;
17679 overmap_start = 0;
17680 overmap_end = 0;
17681 if (copy_page_shift < target_page_shift) {
17682 /*
17683 * Remapping from 4K to 16K: check the VM object alignments
17684 * throughout the range.
17685 * If the start and end of the range are mis-aligned, we can
17686 * over-map to re-align, and adjust the "overmap" start/end
17687 * and "target_size" of the range accordingly.
17688 * If there is any mis-alignment within the range:
17689 * if "copy":
17690 * we can do immediate-copy instead of copy-on-write,
17691 * else:
17692 * no way to remap and share; fail.
17693 */
17694 for (entry = vm_map_copy_first_entry(copy_map);
17695 entry != vm_map_copy_to_entry(copy_map);
17696 entry = entry->vme_next) {
17697 vm_object_offset_t object_offset_start, object_offset_end;
17698
17699 object_offset_start = VME_OFFSET(entry);
17700 object_offset_end = object_offset_start;
17701 object_offset_end += entry->vme_end - entry->vme_start;
17702 if (object_offset_start & target_page_mask) {
17703 if (entry == vm_map_copy_first_entry(copy_map) && !copy) {
17704 overmap_start++;
17705 } else {
17706 misalignments++;
17707 }
17708 }
17709 if (object_offset_end & target_page_mask) {
17710 if (entry->vme_next == vm_map_copy_to_entry(copy_map) && !copy) {
17711 overmap_end++;
17712 } else {
17713 misalignments++;
17714 }
17715 }
17716 }
17717 }
17718 entry = VM_MAP_ENTRY_NULL;
17719
17720 /* decide how to deal with misalignments */
17721 assert(overmap_start <= 1);
17722 assert(overmap_end <= 1);
17723 if (!overmap_start && !overmap_end && !misalignments) {
17724 /* copy_map is properly aligned for target_map ... */
17725 if (*trimmed_start_p) {
17726 /* ... but we trimmed it, so still need to adjust */
17727 } else {
17728 /* ... and we didn't trim anything: we're done */
17729 if (target_copy_map == VM_MAP_COPY_NULL) {
17730 target_copy_map = copy_map;
17731 }
17732 *target_copy_map_p = target_copy_map;
17733 *overmap_start_p = 0;
17734 *overmap_end_p = 0;
17735 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17736 return KERN_SUCCESS;
17737 }
17738 } else if (misalignments && !copy) {
17739 /* can't "share" if misaligned */
17740 DEBUG4K_ADJUST("unsupported sharing\n");
17741 #if MACH_ASSERT
17742 if (debug4k_panic_on_misaligned_sharing) {
17743 panic("DEBUG4k %s:%d unsupported sharing", __FUNCTION__, __LINE__);
17744 }
17745 #endif /* MACH_ASSERT */
17746 DEBUG4K_ADJUST("copy_map %p (%d) target_map %p (%d) copy %d target_copy_map %p -> KERN_NOT_SUPPORTED\n", copy_map, copy_page_shift, target_map, target_page_shift, copy, *target_copy_map_p);
17747 return KERN_NOT_SUPPORTED;
17748 } else {
17749 /* can't virtual-copy if misaligned (but can physical-copy) */
17750 DEBUG4K_ADJUST("mis-aligned copying\n");
17751 }
17752
17753 /* get a "target_copy_map" if needed and switch to it */
17754 vm_map_copy_adjust_get_target_copy_map(copy_map, &target_copy_map);
17755 copy_map = target_copy_map;
17756
17757 if (misalignments && copy) {
17758 vm_map_size_t target_copy_map_size;
17759
17760 /*
17761 * Can't do copy-on-write with misaligned mappings.
17762 * Replace the mappings with a physical copy of the original
17763 * mappings' contents.
17764 */
17765 target_copy_map_size = target_copy_map->size;
17766 kern_return_t kr = vm_map_copy_to_physcopy(target_copy_map, target_map);
17767 if (kr != KERN_SUCCESS) {
17768 return kr;
17769 }
17770 *target_copy_map_p = target_copy_map;
17771 *overmap_start_p = 0;
17772 *overmap_end_p = target_copy_map->size - target_copy_map_size;
17773 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx)-> trimmed 0x%llx overmap start 0x%llx end 0x%llx PHYSCOPY\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17774 return KERN_SUCCESS;
17775 }
17776
17777 /* apply the adjustments */
17778 misalignments = 0;
17779 overmap_start = 0;
17780 overmap_end = 0;
17781 /* remove copy_map->offset, so that everything starts at offset 0 */
17782 addr_adjustment = copy_map->offset;
17783 /* also remove whatever we trimmed from the start */
17784 addr_adjustment += *trimmed_start_p;
17785 for (target_entry = vm_map_copy_first_entry(target_copy_map);
17786 target_entry != vm_map_copy_to_entry(target_copy_map);
17787 target_entry = target_entry->vme_next) {
17788 vm_object_offset_t object_offset_start, object_offset_end;
17789
17790 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx BEFORE\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17791 object_offset_start = VME_OFFSET(target_entry);
17792 if (object_offset_start & target_page_mask) {
17793 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at start\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17794 if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
17795 /*
17796 * start of 1st entry is mis-aligned:
17797 * re-adjust by over-mapping.
17798 */
17799 overmap_start = object_offset_start - trunc_page_mask_64(object_offset_start, target_page_mask);
17800 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_start 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_start);
17801 VME_OFFSET_SET(target_entry, VME_OFFSET(target_entry) - overmap_start);
17802 } else {
17803 misalignments++;
17804 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
17805 assert(copy);
17806 }
17807 }
17808
17809 if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
17810 target_size += overmap_start;
17811 } else {
17812 target_entry->vme_start += overmap_start;
17813 }
17814 target_entry->vme_end += overmap_start;
17815
17816 object_offset_end = VME_OFFSET(target_entry) + target_entry->vme_end - target_entry->vme_start;
17817 if (object_offset_end & target_page_mask) {
17818 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at end\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17819 if (target_entry->vme_next == vm_map_copy_to_entry(target_copy_map)) {
17820 /*
17821 * end of last entry is mis-aligned: re-adjust by over-mapping.
17822 */
17823 overmap_end = round_page_mask_64(object_offset_end, target_page_mask) - object_offset_end;
17824 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_end 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_end);
17825 target_entry->vme_end += overmap_end;
17826 target_size += overmap_end;
17827 } else {
17828 misalignments++;
17829 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
17830 assert(copy);
17831 }
17832 }
17833 target_entry->vme_start -= addr_adjustment;
17834 target_entry->vme_end -= addr_adjustment;
17835 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx AFTER\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17836 }
17837
17838 target_copy_map->size = target_size;
17839 target_copy_map->offset += overmap_start;
17840 target_copy_map->offset -= addr_adjustment;
17841 target_copy_map->cpy_hdr.page_shift = target_page_shift;
17842
17843 // assert(VM_MAP_PAGE_ALIGNED(target_copy_map->size, target_page_mask));
17844 // assert(VM_MAP_PAGE_ALIGNED(target_copy_map->offset, FOURK_PAGE_MASK));
17845 assert(overmap_start < VM_MAP_PAGE_SIZE(target_map));
17846 assert(overmap_end < VM_MAP_PAGE_SIZE(target_map));
17847
17848 *target_copy_map_p = target_copy_map;
17849 *overmap_start_p = overmap_start;
17850 *overmap_end_p = overmap_end;
17851
17852 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17853 return KERN_SUCCESS;
17854 }
17855
17856 kern_return_t
vm_map_range_physical_size(vm_map_t map,vm_map_address_t start,mach_vm_size_t size,mach_vm_size_t * phys_size)17857 vm_map_range_physical_size(
17858 vm_map_t map,
17859 vm_map_address_t start,
17860 mach_vm_size_t size,
17861 mach_vm_size_t * phys_size)
17862 {
17863 kern_return_t kr;
17864 vm_map_copy_t copy_map, target_copy_map;
17865 vm_map_offset_t adjusted_start, adjusted_end;
17866 vm_map_size_t adjusted_size;
17867 vm_prot_t cur_prot, max_prot;
17868 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
17869 vm_map_kernel_flags_t vmk_flags;
17870
17871 adjusted_start = vm_map_trunc_page(start, VM_MAP_PAGE_MASK(map));
17872 adjusted_end = vm_map_round_page(start + size, VM_MAP_PAGE_MASK(map));
17873 adjusted_size = adjusted_end - adjusted_start;
17874 *phys_size = adjusted_size;
17875 if (VM_MAP_PAGE_SIZE(map) == PAGE_SIZE) {
17876 return KERN_SUCCESS;
17877 }
17878 if (start == 0) {
17879 adjusted_start = vm_map_trunc_page(start, PAGE_MASK);
17880 adjusted_end = vm_map_round_page(start + size, PAGE_MASK);
17881 adjusted_size = adjusted_end - adjusted_start;
17882 *phys_size = adjusted_size;
17883 return KERN_SUCCESS;
17884 }
17885 if (adjusted_size == 0) {
17886 DEBUG4K_SHARE("map %p start 0x%llx size 0x%llx adjusted 0x%llx -> phys_size 0!\n", map, (uint64_t)start, (uint64_t)size, (uint64_t)adjusted_size);
17887 *phys_size = 0;
17888 return KERN_SUCCESS;
17889 }
17890
17891 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
17892 vmk_flags.vmkf_copy_pageable = TRUE;
17893 vmk_flags.vmkf_copy_same_map = TRUE;
17894 assert(adjusted_size != 0);
17895 cur_prot = VM_PROT_NONE; /* legacy mode */
17896 max_prot = VM_PROT_NONE; /* legacy mode */
17897 kr = vm_map_copy_extract(map, adjusted_start, adjusted_size,
17898 FALSE /* copy */,
17899 ©_map,
17900 &cur_prot, &max_prot, VM_INHERIT_DEFAULT,
17901 vmk_flags);
17902 if (kr != KERN_SUCCESS) {
17903 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
17904 //assert(0);
17905 *phys_size = 0;
17906 return kr;
17907 }
17908 assert(copy_map != VM_MAP_COPY_NULL);
17909 target_copy_map = copy_map;
17910 DEBUG4K_ADJUST("adjusting...\n");
17911 kr = vm_map_copy_adjust_to_target(
17912 copy_map,
17913 start - adjusted_start, /* offset */
17914 size, /* size */
17915 kernel_map,
17916 FALSE, /* copy */
17917 &target_copy_map,
17918 &overmap_start,
17919 &overmap_end,
17920 &trimmed_start);
17921 if (kr == KERN_SUCCESS) {
17922 if (target_copy_map->size != *phys_size) {
17923 DEBUG4K_ADJUST("map %p (%d) start 0x%llx size 0x%llx adjusted_start 0x%llx adjusted_end 0x%llx overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx phys_size 0x%llx -> 0x%llx\n", map, VM_MAP_PAGE_SHIFT(map), (uint64_t)start, (uint64_t)size, (uint64_t)adjusted_start, (uint64_t)adjusted_end, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start, (uint64_t)*phys_size, (uint64_t)target_copy_map->size);
17924 }
17925 *phys_size = target_copy_map->size;
17926 } else {
17927 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
17928 //assert(0);
17929 *phys_size = 0;
17930 }
17931 vm_map_copy_discard(copy_map);
17932 copy_map = VM_MAP_COPY_NULL;
17933
17934 return kr;
17935 }
17936
17937
17938 kern_return_t
memory_entry_check_for_adjustment(vm_map_t src_map,ipc_port_t port,vm_map_offset_t * overmap_start,vm_map_offset_t * overmap_end)17939 memory_entry_check_for_adjustment(
17940 vm_map_t src_map,
17941 ipc_port_t port,
17942 vm_map_offset_t *overmap_start,
17943 vm_map_offset_t *overmap_end)
17944 {
17945 kern_return_t kr = KERN_SUCCESS;
17946 vm_map_copy_t copy_map = VM_MAP_COPY_NULL, target_copy_map = VM_MAP_COPY_NULL;
17947
17948 assert(port);
17949 assertf(ip_kotype(port) == IKOT_NAMED_ENTRY, "Port Type expected: %d...received:%d\n", IKOT_NAMED_ENTRY, ip_kotype(port));
17950
17951 vm_named_entry_t named_entry;
17952
17953 named_entry = mach_memory_entry_from_port(port);
17954 copy_map = named_entry->backing.copy;
17955 target_copy_map = copy_map;
17956
17957 if (src_map && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT) {
17958 vm_map_offset_t trimmed_start;
17959
17960 trimmed_start = 0;
17961 DEBUG4K_ADJUST("adjusting...\n");
17962 kr = vm_map_copy_adjust_to_target(
17963 copy_map,
17964 0, /* offset */
17965 copy_map->size, /* size */
17966 src_map,
17967 FALSE, /* copy */
17968 &target_copy_map,
17969 overmap_start,
17970 overmap_end,
17971 &trimmed_start);
17972 assert(trimmed_start == 0);
17973 }
17974
17975 return kr;
17976 }
17977
17978
17979 /*
17980 * Routine: vm_remap
17981 *
17982 * Map portion of a task's address space.
17983 * Mapped region must not overlap more than
17984 * one vm memory object. Protections and
17985 * inheritance attributes remain the same
17986 * as in the original task and are out parameters.
17987 * Source and Target task can be identical
17988 * Other attributes are identical as for vm_map()
17989 */
17990 kern_return_t
vm_map_remap(vm_map_t target_map,vm_map_address_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,vm_map_t src_map,vm_map_offset_t memory_address,boolean_t copy,vm_prot_t * cur_protection,vm_prot_t * max_protection,vm_inherit_t inheritance)17991 vm_map_remap(
17992 vm_map_t target_map,
17993 vm_map_address_t *address,
17994 vm_map_size_t size,
17995 vm_map_offset_t mask,
17996 int flags,
17997 vm_map_kernel_flags_t vmk_flags,
17998 vm_tag_t tag,
17999 vm_map_t src_map,
18000 vm_map_offset_t memory_address,
18001 boolean_t copy,
18002 vm_prot_t *cur_protection, /* IN/OUT */
18003 vm_prot_t *max_protection, /* IN/OUT */
18004 vm_inherit_t inheritance)
18005 {
18006 kern_return_t result;
18007 vm_map_entry_t entry;
18008 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
18009 vm_map_entry_t new_entry;
18010 vm_map_copy_t copy_map;
18011 vm_map_offset_t offset_in_mapping;
18012 vm_map_size_t target_size = 0;
18013 vm_map_size_t src_page_mask, target_page_mask;
18014 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
18015 vm_map_offset_t initial_memory_address;
18016 vm_map_size_t initial_size;
18017
18018 if (target_map == VM_MAP_NULL) {
18019 return KERN_INVALID_ARGUMENT;
18020 }
18021
18022 initial_memory_address = memory_address;
18023 initial_size = size;
18024 src_page_mask = VM_MAP_PAGE_MASK(src_map);
18025 target_page_mask = VM_MAP_PAGE_MASK(target_map);
18026
18027 switch (inheritance) {
18028 case VM_INHERIT_NONE:
18029 case VM_INHERIT_COPY:
18030 case VM_INHERIT_SHARE:
18031 if (size != 0 && src_map != VM_MAP_NULL) {
18032 break;
18033 }
18034 OS_FALLTHROUGH;
18035 default:
18036 return KERN_INVALID_ARGUMENT;
18037 }
18038
18039 if (src_page_mask != target_page_mask) {
18040 if (copy) {
18041 DEBUG4K_COPY("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map, VM_MAP_PAGE_SIZE(src_map), (uint64_t)memory_address, (uint64_t)size, copy, target_map, VM_MAP_PAGE_SIZE(target_map));
18042 } else {
18043 DEBUG4K_SHARE("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map, VM_MAP_PAGE_SIZE(src_map), (uint64_t)memory_address, (uint64_t)size, copy, target_map, VM_MAP_PAGE_SIZE(target_map));
18044 }
18045 }
18046
18047 /*
18048 * If the user is requesting that we return the address of the
18049 * first byte of the data (rather than the base of the page),
18050 * then we use different rounding semantics: specifically,
18051 * we assume that (memory_address, size) describes a region
18052 * all of whose pages we must cover, rather than a base to be truncated
18053 * down and a size to be added to that base. So we figure out
18054 * the highest page that the requested region includes and make
18055 * sure that the size will cover it.
18056 *
18057 * The key example we're worried about it is of the form:
18058 *
18059 * memory_address = 0x1ff0, size = 0x20
18060 *
18061 * With the old semantics, we round down the memory_address to 0x1000
18062 * and round up the size to 0x1000, resulting in our covering *only*
18063 * page 0x1000. With the new semantics, we'd realize that the region covers
18064 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
18065 * 0x1000 and page 0x2000 in the region we remap.
18066 */
18067 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
18068 vm_map_offset_t range_start, range_end;
18069
18070 range_start = vm_map_trunc_page(memory_address, src_page_mask);
18071 range_end = vm_map_round_page(memory_address + size, src_page_mask);
18072 memory_address = range_start;
18073 size = range_end - range_start;
18074 offset_in_mapping = initial_memory_address - memory_address;
18075 } else {
18076 /*
18077 * IMPORTANT:
18078 * This legacy code path is broken: for the range mentioned
18079 * above [ memory_address = 0x1ff0,size = 0x20 ], which spans
18080 * two 4k pages, it yields [ memory_address = 0x1000,
18081 * size = 0x1000 ], which covers only the first 4k page.
18082 * BUT some code unfortunately depends on this bug, so we
18083 * can't fix it without breaking something.
18084 * New code should get automatically opted in the new
18085 * behavior with the new VM_FLAGS_RETURN_DATA_ADDR flags.
18086 */
18087 offset_in_mapping = 0;
18088 memory_address = vm_map_trunc_page(memory_address, src_page_mask);
18089 size = vm_map_round_page(size, src_page_mask);
18090 initial_memory_address = memory_address;
18091 initial_size = size;
18092 }
18093
18094
18095 if (size == 0) {
18096 return KERN_INVALID_ARGUMENT;
18097 }
18098
18099 if (flags & VM_FLAGS_RESILIENT_MEDIA) {
18100 /* must be copy-on-write to be "media resilient" */
18101 if (!copy) {
18102 return KERN_INVALID_ARGUMENT;
18103 }
18104 }
18105
18106 vmk_flags.vmkf_copy_pageable = target_map->hdr.entries_pageable;
18107 vmk_flags.vmkf_copy_same_map = (src_map == target_map);
18108
18109 assert(size != 0);
18110 result = vm_map_copy_extract(src_map,
18111 memory_address,
18112 size,
18113 copy, ©_map,
18114 cur_protection, /* IN/OUT */
18115 max_protection, /* IN/OUT */
18116 inheritance,
18117 vmk_flags);
18118 if (result != KERN_SUCCESS) {
18119 return result;
18120 }
18121 assert(copy_map != VM_MAP_COPY_NULL);
18122
18123 overmap_start = 0;
18124 overmap_end = 0;
18125 trimmed_start = 0;
18126 target_size = size;
18127 if (src_page_mask != target_page_mask) {
18128 vm_map_copy_t target_copy_map;
18129
18130 target_copy_map = copy_map; /* can modify "copy_map" itself */
18131 DEBUG4K_ADJUST("adjusting...\n");
18132 result = vm_map_copy_adjust_to_target(
18133 copy_map,
18134 offset_in_mapping, /* offset */
18135 initial_size,
18136 target_map,
18137 copy,
18138 &target_copy_map,
18139 &overmap_start,
18140 &overmap_end,
18141 &trimmed_start);
18142 if (result != KERN_SUCCESS) {
18143 DEBUG4K_COPY("failed to adjust 0x%x\n", result);
18144 vm_map_copy_discard(copy_map);
18145 return result;
18146 }
18147 if (trimmed_start == 0) {
18148 /* nothing trimmed: no adjustment needed */
18149 } else if (trimmed_start >= offset_in_mapping) {
18150 /* trimmed more than offset_in_mapping: nothing left */
18151 assert(overmap_start == 0);
18152 assert(overmap_end == 0);
18153 offset_in_mapping = 0;
18154 } else {
18155 /* trimmed some of offset_in_mapping: adjust */
18156 assert(overmap_start == 0);
18157 assert(overmap_end == 0);
18158 offset_in_mapping -= trimmed_start;
18159 }
18160 offset_in_mapping += overmap_start;
18161 target_size = target_copy_map->size;
18162 }
18163
18164 /*
18165 * Allocate/check a range of free virtual address
18166 * space for the target
18167 */
18168 *address = vm_map_trunc_page(*address, target_page_mask);
18169 vm_map_lock(target_map);
18170 target_size = vm_map_round_page(target_size, target_page_mask);
18171 result = vm_map_remap_range_allocate(target_map, address,
18172 target_size,
18173 mask, flags, vmk_flags, tag,
18174 &insp_entry);
18175
18176 for (entry = vm_map_copy_first_entry(copy_map);
18177 entry != vm_map_copy_to_entry(copy_map);
18178 entry = new_entry) {
18179 new_entry = entry->vme_next;
18180 vm_map_copy_entry_unlink(copy_map, entry);
18181 if (result == KERN_SUCCESS) {
18182 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
18183 /* no codesigning -> read-only access */
18184 entry->max_protection = VM_PROT_READ;
18185 entry->protection = VM_PROT_READ;
18186 entry->vme_resilient_codesign = TRUE;
18187 }
18188 entry->vme_start += *address;
18189 entry->vme_end += *address;
18190 assert(!entry->map_aligned);
18191 if ((flags & VM_FLAGS_RESILIENT_MEDIA) &&
18192 !entry->is_sub_map &&
18193 (VME_OBJECT(entry) == VM_OBJECT_NULL ||
18194 VME_OBJECT(entry)->internal)) {
18195 entry->vme_resilient_media = TRUE;
18196 }
18197 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start, MIN(target_page_mask, PAGE_MASK)));
18198 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end, MIN(target_page_mask, PAGE_MASK)));
18199 assert(VM_MAP_PAGE_ALIGNED(VME_OFFSET(entry), MIN(target_page_mask, PAGE_MASK)));
18200 vm_map_store_entry_link(target_map, insp_entry, entry,
18201 vmk_flags);
18202 insp_entry = entry;
18203 } else {
18204 if (!entry->is_sub_map) {
18205 vm_object_deallocate(VME_OBJECT(entry));
18206 } else {
18207 vm_map_deallocate(VME_SUBMAP(entry));
18208 }
18209 vm_map_copy_entry_dispose(copy_map, entry);
18210 }
18211 }
18212
18213 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
18214 *cur_protection = VM_PROT_READ;
18215 *max_protection = VM_PROT_READ;
18216 }
18217
18218 if (result == KERN_SUCCESS) {
18219 target_map->size += target_size;
18220 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
18221
18222 }
18223 vm_map_unlock(target_map);
18224
18225 if (result == KERN_SUCCESS && target_map->wiring_required) {
18226 result = vm_map_wire_kernel(target_map, *address,
18227 *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
18228 TRUE);
18229 }
18230
18231 /*
18232 * If requested, return the address of the data pointed to by the
18233 * request, rather than the base of the resulting page.
18234 */
18235 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
18236 *address += offset_in_mapping;
18237 }
18238
18239 if (src_page_mask != target_page_mask) {
18240 DEBUG4K_SHARE("vm_remap(%p 0x%llx 0x%llx copy=%d-> %p 0x%llx 0x%llx result=0x%x\n", src_map, (uint64_t)memory_address, (uint64_t)size, copy, target_map, (uint64_t)*address, (uint64_t)offset_in_mapping, result);
18241 }
18242 vm_map_copy_discard(copy_map);
18243 copy_map = VM_MAP_COPY_NULL;
18244
18245 return result;
18246 }
18247
18248 /*
18249 * Routine: vm_map_remap_range_allocate
18250 *
18251 * Description:
18252 * Allocate a range in the specified virtual address map.
18253 * returns the address and the map entry just before the allocated
18254 * range
18255 *
18256 * Map must be locked.
18257 */
18258
18259 static kern_return_t
vm_map_remap_range_allocate(vm_map_t map,vm_map_address_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,__unused vm_tag_t tag,vm_map_entry_t * map_entry)18260 vm_map_remap_range_allocate(
18261 vm_map_t map,
18262 vm_map_address_t *address, /* IN/OUT */
18263 vm_map_size_t size,
18264 vm_map_offset_t mask,
18265 int flags,
18266 vm_map_kernel_flags_t vmk_flags,
18267 __unused vm_tag_t tag,
18268 vm_map_entry_t *map_entry) /* OUT */
18269 {
18270 vm_map_entry_t entry;
18271 vm_map_offset_t start;
18272 vm_map_offset_t end;
18273 vm_map_offset_t desired_empty_end;
18274 kern_return_t kr;
18275 vm_map_entry_t hole_entry;
18276
18277 StartAgain:;
18278
18279 start = *address;
18280
18281 if (flags & VM_FLAGS_ANYWHERE) {
18282 if (flags & VM_FLAGS_RANDOM_ADDR) {
18283 /*
18284 * Get a random start address.
18285 */
18286 kr = vm_map_random_address_for_size(map, address, size);
18287 if (kr != KERN_SUCCESS) {
18288 return kr;
18289 }
18290 start = *address;
18291 }
18292
18293 /*
18294 * Calculate the first possible address.
18295 */
18296
18297 if (start < map->min_offset) {
18298 start = map->min_offset;
18299 }
18300 if (start > map->max_offset) {
18301 return KERN_NO_SPACE;
18302 }
18303
18304 /*
18305 * Look for the first possible address;
18306 * if there's already something at this
18307 * address, we have to start after it.
18308 */
18309
18310 if (map->disable_vmentry_reuse == TRUE) {
18311 VM_MAP_HIGHEST_ENTRY(map, entry, start);
18312 } else {
18313 if (map->holelistenabled) {
18314 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
18315
18316 if (hole_entry == NULL) {
18317 /*
18318 * No more space in the map?
18319 */
18320 return KERN_NO_SPACE;
18321 } else {
18322 boolean_t found_hole = FALSE;
18323
18324 do {
18325 if (hole_entry->vme_start >= start) {
18326 start = hole_entry->vme_start;
18327 found_hole = TRUE;
18328 break;
18329 }
18330
18331 if (hole_entry->vme_end > start) {
18332 found_hole = TRUE;
18333 break;
18334 }
18335 hole_entry = hole_entry->vme_next;
18336 } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
18337
18338 if (found_hole == FALSE) {
18339 return KERN_NO_SPACE;
18340 }
18341
18342 entry = hole_entry;
18343 }
18344 } else {
18345 assert(first_free_is_valid(map));
18346 if (start == map->min_offset) {
18347 if ((entry = map->first_free) != vm_map_to_entry(map)) {
18348 start = entry->vme_end;
18349 }
18350 } else {
18351 vm_map_entry_t tmp_entry;
18352 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
18353 start = tmp_entry->vme_end;
18354 }
18355 entry = tmp_entry;
18356 }
18357 }
18358 start = vm_map_round_page(start,
18359 VM_MAP_PAGE_MASK(map));
18360 }
18361
18362 /*
18363 * In any case, the "entry" always precedes
18364 * the proposed new region throughout the
18365 * loop:
18366 */
18367
18368 while (TRUE) {
18369 vm_map_entry_t next;
18370
18371 /*
18372 * Find the end of the proposed new region.
18373 * Be sure we didn't go beyond the end, or
18374 * wrap around the address.
18375 */
18376
18377 end = ((start + mask) & ~mask);
18378 end = vm_map_round_page(end,
18379 VM_MAP_PAGE_MASK(map));
18380 if (end < start) {
18381 return KERN_NO_SPACE;
18382 }
18383 start = end;
18384 end += size;
18385
18386 /* We want an entire page of empty space, but don't increase the allocation size. */
18387 desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
18388
18389 if ((desired_empty_end > map->max_offset) || (desired_empty_end < start)) {
18390 if (map->wait_for_space) {
18391 if (size <= (map->max_offset -
18392 map->min_offset)) {
18393 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
18394 vm_map_unlock(map);
18395 thread_block(THREAD_CONTINUE_NULL);
18396 vm_map_lock(map);
18397 goto StartAgain;
18398 }
18399 }
18400
18401 return KERN_NO_SPACE;
18402 }
18403
18404 next = entry->vme_next;
18405
18406 if (map->holelistenabled) {
18407 if (entry->vme_end >= desired_empty_end) {
18408 break;
18409 }
18410 } else {
18411 /*
18412 * If there are no more entries, we must win.
18413 *
18414 * OR
18415 *
18416 * If there is another entry, it must be
18417 * after the end of the potential new region.
18418 */
18419
18420 if (next == vm_map_to_entry(map)) {
18421 break;
18422 }
18423
18424 if (next->vme_start >= desired_empty_end) {
18425 break;
18426 }
18427 }
18428
18429 /*
18430 * Didn't fit -- move to the next entry.
18431 */
18432
18433 entry = next;
18434
18435 if (map->holelistenabled) {
18436 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
18437 /*
18438 * Wrapped around
18439 */
18440 return KERN_NO_SPACE;
18441 }
18442 start = entry->vme_start;
18443 } else {
18444 start = entry->vme_end;
18445 }
18446 }
18447
18448 if (map->holelistenabled) {
18449 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
18450 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.", entry, (unsigned long long)entry->vme_start);
18451 }
18452 }
18453
18454 *address = start;
18455 } else {
18456 vm_map_entry_t temp_entry;
18457
18458 /*
18459 * Verify that:
18460 * the address doesn't itself violate
18461 * the mask requirement.
18462 */
18463
18464 if ((start & mask) != 0) {
18465 return KERN_NO_SPACE;
18466 }
18467
18468
18469 /*
18470 * ... the address is within bounds
18471 */
18472
18473 end = start + size;
18474
18475 if ((start < map->min_offset) ||
18476 (end > map->max_offset) ||
18477 (start >= end)) {
18478 return KERN_INVALID_ADDRESS;
18479 }
18480
18481 /*
18482 * If we're asked to overwrite whatever was mapped in that
18483 * range, first deallocate that range.
18484 */
18485 if (flags & VM_FLAGS_OVERWRITE) {
18486 vm_map_t zap_map;
18487 int remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES | VM_MAP_REMOVE_NO_MAP_ALIGN;
18488
18489 /*
18490 * We use a "zap_map" to avoid having to unlock
18491 * the "map" in vm_map_delete(), which would compromise
18492 * the atomicity of the "deallocate" and then "remap"
18493 * combination.
18494 */
18495 zap_map = vm_map_create_options(PMAP_NULL, start, end,
18496 VM_MAP_CREATE_ZAP_OPTIONS(map));
18497 vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
18498
18499 if (vmk_flags.vmkf_overwrite_immutable) {
18500 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
18501 }
18502 kr = vm_map_delete(map, start, end,
18503 remove_flags,
18504 zap_map);
18505 if (kr == KERN_SUCCESS) {
18506 vm_map_destroy(zap_map,
18507 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
18508 zap_map = VM_MAP_NULL;
18509 }
18510 }
18511
18512 /*
18513 * ... the starting address isn't allocated
18514 */
18515
18516 if (vm_map_lookup_entry(map, start, &temp_entry)) {
18517 return KERN_NO_SPACE;
18518 }
18519
18520 entry = temp_entry;
18521
18522 /*
18523 * ... the next region doesn't overlap the
18524 * end point.
18525 */
18526
18527 if ((entry->vme_next != vm_map_to_entry(map)) &&
18528 (entry->vme_next->vme_start < end)) {
18529 return KERN_NO_SPACE;
18530 }
18531 }
18532 *map_entry = entry;
18533 return KERN_SUCCESS;
18534 }
18535
18536 /*
18537 * vm_map_switch:
18538 *
18539 * Set the address map for the current thread to the specified map
18540 */
18541
18542 vm_map_t
vm_map_switch(vm_map_t map)18543 vm_map_switch(
18544 vm_map_t map)
18545 {
18546 int mycpu;
18547 thread_t thread = current_thread();
18548 vm_map_t oldmap = thread->map;
18549
18550 mp_disable_preemption();
18551 mycpu = cpu_number();
18552
18553 /*
18554 * Deactivate the current map and activate the requested map
18555 */
18556 PMAP_SWITCH_USER(thread, map, mycpu);
18557
18558 mp_enable_preemption();
18559 return oldmap;
18560 }
18561
18562
18563 /*
18564 * Routine: vm_map_write_user
18565 *
18566 * Description:
18567 * Copy out data from a kernel space into space in the
18568 * destination map. The space must already exist in the
18569 * destination map.
18570 * NOTE: This routine should only be called by threads
18571 * which can block on a page fault. i.e. kernel mode user
18572 * threads.
18573 *
18574 */
18575 kern_return_t
vm_map_write_user(vm_map_t map,void * src_p,vm_map_address_t dst_addr,vm_size_t size)18576 vm_map_write_user(
18577 vm_map_t map,
18578 void *src_p,
18579 vm_map_address_t dst_addr,
18580 vm_size_t size)
18581 {
18582 kern_return_t kr = KERN_SUCCESS;
18583
18584 if (current_map() == map) {
18585 if (copyout(src_p, dst_addr, size)) {
18586 kr = KERN_INVALID_ADDRESS;
18587 }
18588 } else {
18589 vm_map_t oldmap;
18590
18591 /* take on the identity of the target map while doing */
18592 /* the transfer */
18593
18594 vm_map_reference(map);
18595 oldmap = vm_map_switch(map);
18596 if (copyout(src_p, dst_addr, size)) {
18597 kr = KERN_INVALID_ADDRESS;
18598 }
18599 vm_map_switch(oldmap);
18600 vm_map_deallocate(map);
18601 }
18602 return kr;
18603 }
18604
18605 /*
18606 * Routine: vm_map_read_user
18607 *
18608 * Description:
18609 * Copy in data from a user space source map into the
18610 * kernel map. The space must already exist in the
18611 * kernel map.
18612 * NOTE: This routine should only be called by threads
18613 * which can block on a page fault. i.e. kernel mode user
18614 * threads.
18615 *
18616 */
18617 kern_return_t
vm_map_read_user(vm_map_t map,vm_map_address_t src_addr,void * dst_p,vm_size_t size)18618 vm_map_read_user(
18619 vm_map_t map,
18620 vm_map_address_t src_addr,
18621 void *dst_p,
18622 vm_size_t size)
18623 {
18624 kern_return_t kr = KERN_SUCCESS;
18625
18626 if (current_map() == map) {
18627 if (copyin(src_addr, dst_p, size)) {
18628 kr = KERN_INVALID_ADDRESS;
18629 }
18630 } else {
18631 vm_map_t oldmap;
18632
18633 /* take on the identity of the target map while doing */
18634 /* the transfer */
18635
18636 vm_map_reference(map);
18637 oldmap = vm_map_switch(map);
18638 if (copyin(src_addr, dst_p, size)) {
18639 kr = KERN_INVALID_ADDRESS;
18640 }
18641 vm_map_switch(oldmap);
18642 vm_map_deallocate(map);
18643 }
18644 return kr;
18645 }
18646
18647
18648 /*
18649 * vm_map_check_protection:
18650 *
18651 * Assert that the target map allows the specified
18652 * privilege on the entire address region given.
18653 * The entire region must be allocated.
18654 */
18655 boolean_t
vm_map_check_protection(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t protection)18656 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
18657 vm_map_offset_t end, vm_prot_t protection)
18658 {
18659 vm_map_entry_t entry;
18660 vm_map_entry_t tmp_entry;
18661
18662 vm_map_lock(map);
18663
18664 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
18665 vm_map_unlock(map);
18666 return FALSE;
18667 }
18668
18669 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
18670 vm_map_unlock(map);
18671 return FALSE;
18672 }
18673
18674 entry = tmp_entry;
18675
18676 while (start < end) {
18677 if (entry == vm_map_to_entry(map)) {
18678 vm_map_unlock(map);
18679 return FALSE;
18680 }
18681
18682 /*
18683 * No holes allowed!
18684 */
18685
18686 if (start < entry->vme_start) {
18687 vm_map_unlock(map);
18688 return FALSE;
18689 }
18690
18691 /*
18692 * Check protection associated with entry.
18693 */
18694
18695 if ((entry->protection & protection) != protection) {
18696 vm_map_unlock(map);
18697 return FALSE;
18698 }
18699
18700 /* go to next entry */
18701
18702 start = entry->vme_end;
18703 entry = entry->vme_next;
18704 }
18705 vm_map_unlock(map);
18706 return TRUE;
18707 }
18708
18709 kern_return_t
vm_map_purgable_control(vm_map_t map,vm_map_offset_t address,vm_purgable_t control,int * state)18710 vm_map_purgable_control(
18711 vm_map_t map,
18712 vm_map_offset_t address,
18713 vm_purgable_t control,
18714 int *state)
18715 {
18716 vm_map_entry_t entry;
18717 vm_object_t object;
18718 kern_return_t kr;
18719 boolean_t was_nonvolatile;
18720
18721 /*
18722 * Vet all the input parameters and current type and state of the
18723 * underlaying object. Return with an error if anything is amiss.
18724 */
18725 if (map == VM_MAP_NULL) {
18726 return KERN_INVALID_ARGUMENT;
18727 }
18728
18729 if (control != VM_PURGABLE_SET_STATE &&
18730 control != VM_PURGABLE_GET_STATE &&
18731 control != VM_PURGABLE_PURGE_ALL &&
18732 control != VM_PURGABLE_SET_STATE_FROM_KERNEL) {
18733 return KERN_INVALID_ARGUMENT;
18734 }
18735
18736 if (control == VM_PURGABLE_PURGE_ALL) {
18737 vm_purgeable_object_purge_all();
18738 return KERN_SUCCESS;
18739 }
18740
18741 if ((control == VM_PURGABLE_SET_STATE ||
18742 control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
18743 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
18744 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) {
18745 return KERN_INVALID_ARGUMENT;
18746 }
18747
18748 vm_map_lock_read(map);
18749
18750 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
18751 /*
18752 * Must pass a valid non-submap address.
18753 */
18754 vm_map_unlock_read(map);
18755 return KERN_INVALID_ADDRESS;
18756 }
18757
18758 if ((entry->protection & VM_PROT_WRITE) == 0 &&
18759 control != VM_PURGABLE_GET_STATE) {
18760 /*
18761 * Can't apply purgable controls to something you can't write.
18762 */
18763 vm_map_unlock_read(map);
18764 return KERN_PROTECTION_FAILURE;
18765 }
18766
18767 object = VME_OBJECT(entry);
18768 if (object == VM_OBJECT_NULL ||
18769 object->purgable == VM_PURGABLE_DENY) {
18770 /*
18771 * Object must already be present and be purgeable.
18772 */
18773 vm_map_unlock_read(map);
18774 return KERN_INVALID_ARGUMENT;
18775 }
18776
18777 vm_object_lock(object);
18778
18779 #if 00
18780 if (VME_OFFSET(entry) != 0 ||
18781 entry->vme_end - entry->vme_start != object->vo_size) {
18782 /*
18783 * Can only apply purgable controls to the whole (existing)
18784 * object at once.
18785 */
18786 vm_map_unlock_read(map);
18787 vm_object_unlock(object);
18788 return KERN_INVALID_ARGUMENT;
18789 }
18790 #endif
18791
18792 assert(!entry->is_sub_map);
18793 assert(!entry->use_pmap); /* purgeable has its own accounting */
18794
18795 vm_map_unlock_read(map);
18796
18797 was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
18798
18799 kr = vm_object_purgable_control(object, control, state);
18800
18801 if (was_nonvolatile &&
18802 object->purgable != VM_PURGABLE_NONVOLATILE &&
18803 map->pmap == kernel_pmap) {
18804 #if DEBUG
18805 object->vo_purgeable_volatilizer = kernel_task;
18806 #endif /* DEBUG */
18807 }
18808
18809 vm_object_unlock(object);
18810
18811 return kr;
18812 }
18813
18814 void
vm_map_footprint_query_page_info(vm_map_t map,vm_map_entry_t map_entry,vm_map_offset_t curr_s_offset,int * disposition_p)18815 vm_map_footprint_query_page_info(
18816 vm_map_t map,
18817 vm_map_entry_t map_entry,
18818 vm_map_offset_t curr_s_offset,
18819 int *disposition_p)
18820 {
18821 int pmap_disp;
18822 vm_object_t object;
18823 int disposition;
18824 int effective_page_size;
18825
18826 vm_map_lock_assert_held(map);
18827 assert(!map->has_corpse_footprint);
18828 assert(curr_s_offset >= map_entry->vme_start);
18829 assert(curr_s_offset < map_entry->vme_end);
18830
18831 object = VME_OBJECT(map_entry);
18832 if (object == VM_OBJECT_NULL) {
18833 *disposition_p = 0;
18834 return;
18835 }
18836
18837 effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(map));
18838
18839 pmap_disp = 0;
18840 if (object == VM_OBJECT_NULL) {
18841 /* nothing mapped here: no need to ask */
18842 *disposition_p = 0;
18843 return;
18844 } else if (map_entry->is_sub_map &&
18845 !map_entry->use_pmap) {
18846 /* nested pmap: no footprint */
18847 *disposition_p = 0;
18848 return;
18849 }
18850
18851 /*
18852 * Query the pmap.
18853 */
18854 pmap_query_page_info(map->pmap, curr_s_offset, &pmap_disp);
18855
18856 /*
18857 * Compute this page's disposition.
18858 */
18859 disposition = 0;
18860
18861 /* deal with "alternate accounting" first */
18862 if (!map_entry->is_sub_map &&
18863 object->vo_no_footprint) {
18864 /* does not count in footprint */
18865 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18866 } else if (!map_entry->is_sub_map &&
18867 (object->purgable == VM_PURGABLE_NONVOLATILE ||
18868 (object->purgable == VM_PURGABLE_DENY &&
18869 object->vo_ledger_tag)) &&
18870 VM_OBJECT_OWNER(object) != NULL &&
18871 VM_OBJECT_OWNER(object)->map == map) {
18872 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18873 if ((((curr_s_offset
18874 - map_entry->vme_start
18875 + VME_OFFSET(map_entry))
18876 / effective_page_size) <
18877 (object->resident_page_count +
18878 vm_compressor_pager_get_count(object->pager)))) {
18879 /*
18880 * Non-volatile purgeable object owned
18881 * by this task: report the first
18882 * "#resident + #compressed" pages as
18883 * "resident" (to show that they
18884 * contribute to the footprint) but not
18885 * "dirty" (to avoid double-counting
18886 * with the fake "non-volatile" region
18887 * we'll report at the end of the
18888 * address space to account for all
18889 * (mapped or not) non-volatile memory
18890 * owned by this task.
18891 */
18892 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18893 }
18894 } else if (!map_entry->is_sub_map &&
18895 (object->purgable == VM_PURGABLE_VOLATILE ||
18896 object->purgable == VM_PURGABLE_EMPTY) &&
18897 VM_OBJECT_OWNER(object) != NULL &&
18898 VM_OBJECT_OWNER(object)->map == map) {
18899 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18900 if ((((curr_s_offset
18901 - map_entry->vme_start
18902 + VME_OFFSET(map_entry))
18903 / effective_page_size) <
18904 object->wired_page_count)) {
18905 /*
18906 * Volatile|empty purgeable object owned
18907 * by this task: report the first
18908 * "#wired" pages as "resident" (to
18909 * show that they contribute to the
18910 * footprint) but not "dirty" (to avoid
18911 * double-counting with the fake
18912 * "non-volatile" region we'll report
18913 * at the end of the address space to
18914 * account for all (mapped or not)
18915 * non-volatile memory owned by this
18916 * task.
18917 */
18918 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18919 }
18920 } else if (!map_entry->is_sub_map &&
18921 map_entry->iokit_acct &&
18922 object->internal &&
18923 object->purgable == VM_PURGABLE_DENY) {
18924 /*
18925 * Non-purgeable IOKit memory: phys_footprint
18926 * includes the entire virtual mapping.
18927 */
18928 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18929 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18930 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
18931 } else if (pmap_disp & (PMAP_QUERY_PAGE_ALTACCT |
18932 PMAP_QUERY_PAGE_COMPRESSED_ALTACCT)) {
18933 /* alternate accounting */
18934 #if (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG)
18935 if (map->pmap->footprint_was_suspended) {
18936 /*
18937 * The assertion below can fail if dyld
18938 * suspended footprint accounting
18939 * while doing some adjustments to
18940 * this page; the mapping would say
18941 * "use pmap accounting" but the page
18942 * would be marked "alternate
18943 * accounting".
18944 */
18945 } else
18946 #endif /* (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG) */
18947 {
18948 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18949 }
18950 disposition = 0;
18951 } else {
18952 if (pmap_disp & PMAP_QUERY_PAGE_PRESENT) {
18953 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18954 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18955 disposition |= VM_PAGE_QUERY_PAGE_REF;
18956 if (pmap_disp & PMAP_QUERY_PAGE_INTERNAL) {
18957 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
18958 } else {
18959 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
18960 }
18961 if (pmap_disp & PMAP_QUERY_PAGE_REUSABLE) {
18962 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
18963 }
18964 } else if (pmap_disp & PMAP_QUERY_PAGE_COMPRESSED) {
18965 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18966 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
18967 }
18968 }
18969
18970 *disposition_p = disposition;
18971 }
18972
18973 kern_return_t
vm_map_page_query_internal(vm_map_t target_map,vm_map_offset_t offset,int * disposition,int * ref_count)18974 vm_map_page_query_internal(
18975 vm_map_t target_map,
18976 vm_map_offset_t offset,
18977 int *disposition,
18978 int *ref_count)
18979 {
18980 kern_return_t kr;
18981 vm_page_info_basic_data_t info;
18982 mach_msg_type_number_t count;
18983
18984 count = VM_PAGE_INFO_BASIC_COUNT;
18985 kr = vm_map_page_info(target_map,
18986 offset,
18987 VM_PAGE_INFO_BASIC,
18988 (vm_page_info_t) &info,
18989 &count);
18990 if (kr == KERN_SUCCESS) {
18991 *disposition = info.disposition;
18992 *ref_count = info.ref_count;
18993 } else {
18994 *disposition = 0;
18995 *ref_count = 0;
18996 }
18997
18998 return kr;
18999 }
19000
19001 kern_return_t
vm_map_page_info(vm_map_t map,vm_map_offset_t offset,vm_page_info_flavor_t flavor,vm_page_info_t info,mach_msg_type_number_t * count)19002 vm_map_page_info(
19003 vm_map_t map,
19004 vm_map_offset_t offset,
19005 vm_page_info_flavor_t flavor,
19006 vm_page_info_t info,
19007 mach_msg_type_number_t *count)
19008 {
19009 return vm_map_page_range_info_internal(map,
19010 offset, /* start of range */
19011 (offset + 1), /* this will get rounded in the call to the page boundary */
19012 (int)-1, /* effective_page_shift: unspecified */
19013 flavor,
19014 info,
19015 count);
19016 }
19017
19018 kern_return_t
vm_map_page_range_info_internal(vm_map_t map,vm_map_offset_t start_offset,vm_map_offset_t end_offset,int effective_page_shift,vm_page_info_flavor_t flavor,vm_page_info_t info,mach_msg_type_number_t * count)19019 vm_map_page_range_info_internal(
19020 vm_map_t map,
19021 vm_map_offset_t start_offset,
19022 vm_map_offset_t end_offset,
19023 int effective_page_shift,
19024 vm_page_info_flavor_t flavor,
19025 vm_page_info_t info,
19026 mach_msg_type_number_t *count)
19027 {
19028 vm_map_entry_t map_entry = VM_MAP_ENTRY_NULL;
19029 vm_object_t object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
19030 vm_page_t m = VM_PAGE_NULL;
19031 kern_return_t retval = KERN_SUCCESS;
19032 int disposition = 0;
19033 int ref_count = 0;
19034 int depth = 0, info_idx = 0;
19035 vm_page_info_basic_t basic_info = 0;
19036 vm_map_offset_t offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
19037 vm_map_offset_t start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
19038 boolean_t do_region_footprint;
19039 ledger_amount_t ledger_resident, ledger_compressed;
19040 int effective_page_size;
19041 vm_map_offset_t effective_page_mask;
19042
19043 switch (flavor) {
19044 case VM_PAGE_INFO_BASIC:
19045 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
19046 /*
19047 * The "vm_page_info_basic_data" structure was not
19048 * properly padded, so allow the size to be off by
19049 * one to maintain backwards binary compatibility...
19050 */
19051 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1) {
19052 return KERN_INVALID_ARGUMENT;
19053 }
19054 }
19055 break;
19056 default:
19057 return KERN_INVALID_ARGUMENT;
19058 }
19059
19060 if (effective_page_shift == -1) {
19061 effective_page_shift = vm_self_region_page_shift_safely(map);
19062 if (effective_page_shift == -1) {
19063 return KERN_INVALID_ARGUMENT;
19064 }
19065 }
19066 effective_page_size = (1 << effective_page_shift);
19067 effective_page_mask = effective_page_size - 1;
19068
19069 do_region_footprint = task_self_region_footprint();
19070 disposition = 0;
19071 ref_count = 0;
19072 depth = 0;
19073 info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
19074 retval = KERN_SUCCESS;
19075
19076 offset_in_page = start_offset & effective_page_mask;
19077 start = vm_map_trunc_page(start_offset, effective_page_mask);
19078 end = vm_map_round_page(end_offset, effective_page_mask);
19079
19080 if (end < start) {
19081 return KERN_INVALID_ARGUMENT;
19082 }
19083
19084 assert((end - start) <= MAX_PAGE_RANGE_QUERY);
19085
19086 vm_map_lock_read(map);
19087
19088 task_ledgers_footprint(map->pmap->ledger, &ledger_resident, &ledger_compressed);
19089
19090 for (curr_s_offset = start; curr_s_offset < end;) {
19091 /*
19092 * New lookup needs reset of these variables.
19093 */
19094 curr_object = object = VM_OBJECT_NULL;
19095 offset_in_object = 0;
19096 ref_count = 0;
19097 depth = 0;
19098
19099 if (do_region_footprint &&
19100 curr_s_offset >= vm_map_last_entry(map)->vme_end) {
19101 /*
19102 * Request for "footprint" info about a page beyond
19103 * the end of address space: this must be for
19104 * the fake region vm_map_region_recurse_64()
19105 * reported to account for non-volatile purgeable
19106 * memory owned by this task.
19107 */
19108 disposition = 0;
19109
19110 if (curr_s_offset - vm_map_last_entry(map)->vme_end <=
19111 (unsigned) ledger_compressed) {
19112 /*
19113 * We haven't reported all the "non-volatile
19114 * compressed" pages yet, so report this fake
19115 * page as "compressed".
19116 */
19117 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
19118 } else {
19119 /*
19120 * We've reported all the non-volatile
19121 * compressed page but not all the non-volatile
19122 * pages , so report this fake page as
19123 * "resident dirty".
19124 */
19125 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19126 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
19127 disposition |= VM_PAGE_QUERY_PAGE_REF;
19128 }
19129 switch (flavor) {
19130 case VM_PAGE_INFO_BASIC:
19131 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19132 basic_info->disposition = disposition;
19133 basic_info->ref_count = 1;
19134 basic_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
19135 basic_info->offset = 0;
19136 basic_info->depth = 0;
19137
19138 info_idx++;
19139 break;
19140 }
19141 curr_s_offset += effective_page_size;
19142 continue;
19143 }
19144
19145 /*
19146 * First, find the map entry covering "curr_s_offset", going down
19147 * submaps if necessary.
19148 */
19149 if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
19150 /* no entry -> no object -> no page */
19151
19152 if (curr_s_offset < vm_map_min(map)) {
19153 /*
19154 * Illegal address that falls below map min.
19155 */
19156 curr_e_offset = MIN(end, vm_map_min(map));
19157 } else if (curr_s_offset >= vm_map_max(map)) {
19158 /*
19159 * Illegal address that falls on/after map max.
19160 */
19161 curr_e_offset = end;
19162 } else if (map_entry == vm_map_to_entry(map)) {
19163 /*
19164 * Hit a hole.
19165 */
19166 if (map_entry->vme_next == vm_map_to_entry(map)) {
19167 /*
19168 * Empty map.
19169 */
19170 curr_e_offset = MIN(map->max_offset, end);
19171 } else {
19172 /*
19173 * Hole at start of the map.
19174 */
19175 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
19176 }
19177 } else {
19178 if (map_entry->vme_next == vm_map_to_entry(map)) {
19179 /*
19180 * Hole at the end of the map.
19181 */
19182 curr_e_offset = MIN(map->max_offset, end);
19183 } else {
19184 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
19185 }
19186 }
19187
19188 assert(curr_e_offset >= curr_s_offset);
19189
19190 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> effective_page_shift;
19191
19192 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19193
19194 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
19195
19196 curr_s_offset = curr_e_offset;
19197
19198 info_idx += num_pages;
19199
19200 continue;
19201 }
19202
19203 /* compute offset from this map entry's start */
19204 offset_in_object = curr_s_offset - map_entry->vme_start;
19205
19206 /* compute offset into this map entry's object (or submap) */
19207 offset_in_object += VME_OFFSET(map_entry);
19208
19209 if (map_entry->is_sub_map) {
19210 vm_map_t sub_map = VM_MAP_NULL;
19211 vm_page_info_t submap_info = 0;
19212 vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
19213
19214 range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
19215
19216 submap_s_offset = offset_in_object;
19217 submap_e_offset = submap_s_offset + range_len;
19218
19219 sub_map = VME_SUBMAP(map_entry);
19220
19221 vm_map_reference(sub_map);
19222 vm_map_unlock_read(map);
19223
19224 submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19225
19226 assertf(VM_MAP_PAGE_SHIFT(sub_map) >= VM_MAP_PAGE_SHIFT(map),
19227 "Submap page size (%d) differs from current map (%d)\n", VM_MAP_PAGE_SIZE(sub_map), VM_MAP_PAGE_SIZE(map));
19228
19229 retval = vm_map_page_range_info_internal(sub_map,
19230 submap_s_offset,
19231 submap_e_offset,
19232 effective_page_shift,
19233 VM_PAGE_INFO_BASIC,
19234 (vm_page_info_t) submap_info,
19235 count);
19236
19237 assert(retval == KERN_SUCCESS);
19238
19239 vm_map_lock_read(map);
19240 vm_map_deallocate(sub_map);
19241
19242 /* Move the "info" index by the number of pages we inspected.*/
19243 info_idx += range_len >> effective_page_shift;
19244
19245 /* Move our current offset by the size of the range we inspected.*/
19246 curr_s_offset += range_len;
19247
19248 continue;
19249 }
19250
19251 object = VME_OBJECT(map_entry);
19252
19253 if (object == VM_OBJECT_NULL) {
19254 /*
19255 * We don't have an object here and, hence,
19256 * no pages to inspect. We'll fill up the
19257 * info structure appropriately.
19258 */
19259
19260 curr_e_offset = MIN(map_entry->vme_end, end);
19261
19262 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> effective_page_shift;
19263
19264 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19265
19266 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
19267
19268 curr_s_offset = curr_e_offset;
19269
19270 info_idx += num_pages;
19271
19272 continue;
19273 }
19274
19275 if (do_region_footprint) {
19276 disposition = 0;
19277 if (map->has_corpse_footprint) {
19278 /*
19279 * Query the page info data we saved
19280 * while forking the corpse.
19281 */
19282 vm_map_corpse_footprint_query_page_info(
19283 map,
19284 curr_s_offset,
19285 &disposition);
19286 } else {
19287 /*
19288 * Query the live pmap for footprint info
19289 * about this page.
19290 */
19291 vm_map_footprint_query_page_info(
19292 map,
19293 map_entry,
19294 curr_s_offset,
19295 &disposition);
19296 }
19297 switch (flavor) {
19298 case VM_PAGE_INFO_BASIC:
19299 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19300 basic_info->disposition = disposition;
19301 basic_info->ref_count = 1;
19302 basic_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
19303 basic_info->offset = 0;
19304 basic_info->depth = 0;
19305
19306 info_idx++;
19307 break;
19308 }
19309 curr_s_offset += effective_page_size;
19310 continue;
19311 }
19312
19313 vm_object_reference(object);
19314 /*
19315 * Shared mode -- so we can allow other readers
19316 * to grab the lock too.
19317 */
19318 vm_object_lock_shared(object);
19319
19320 curr_e_offset = MIN(map_entry->vme_end, end);
19321
19322 vm_map_unlock_read(map);
19323
19324 map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
19325
19326 curr_object = object;
19327
19328 for (; curr_s_offset < curr_e_offset;) {
19329 if (object == curr_object) {
19330 ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
19331 } else {
19332 ref_count = curr_object->ref_count;
19333 }
19334
19335 curr_offset_in_object = offset_in_object;
19336
19337 for (;;) {
19338 m = vm_page_lookup(curr_object, vm_object_trunc_page(curr_offset_in_object));
19339
19340 if (m != VM_PAGE_NULL) {
19341 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19342 break;
19343 } else {
19344 if (curr_object->internal &&
19345 curr_object->alive &&
19346 !curr_object->terminating &&
19347 curr_object->pager_ready) {
19348 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, vm_object_trunc_page(curr_offset_in_object))
19349 == VM_EXTERNAL_STATE_EXISTS) {
19350 /* the pager has that page */
19351 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
19352 break;
19353 }
19354 }
19355
19356 /*
19357 * Go down the VM object shadow chain until we find the page
19358 * we're looking for.
19359 */
19360
19361 if (curr_object->shadow != VM_OBJECT_NULL) {
19362 vm_object_t shadow = VM_OBJECT_NULL;
19363
19364 curr_offset_in_object += curr_object->vo_shadow_offset;
19365 shadow = curr_object->shadow;
19366
19367 vm_object_lock_shared(shadow);
19368 vm_object_unlock(curr_object);
19369
19370 curr_object = shadow;
19371 depth++;
19372 continue;
19373 } else {
19374 break;
19375 }
19376 }
19377 }
19378
19379 /* The ref_count is not strictly accurate, it measures the number */
19380 /* of entities holding a ref on the object, they may not be mapping */
19381 /* the object or may not be mapping the section holding the */
19382 /* target page but its still a ball park number and though an over- */
19383 /* count, it picks up the copy-on-write cases */
19384
19385 /* We could also get a picture of page sharing from pmap_attributes */
19386 /* but this would under count as only faulted-in mappings would */
19387 /* show up. */
19388
19389 if ((curr_object == object) && curr_object->shadow) {
19390 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
19391 }
19392
19393 if (!curr_object->internal) {
19394 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
19395 }
19396
19397 if (m != VM_PAGE_NULL) {
19398 if (m->vmp_fictitious) {
19399 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
19400 } else {
19401 if (m->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m))) {
19402 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
19403 }
19404
19405 if (m->vmp_reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m))) {
19406 disposition |= VM_PAGE_QUERY_PAGE_REF;
19407 }
19408
19409 if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
19410 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
19411 }
19412
19413 /*
19414 * XXX TODO4K:
19415 * when this routine deals with 4k
19416 * pages, check the appropriate CS bit
19417 * here.
19418 */
19419 if (m->vmp_cs_validated) {
19420 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
19421 }
19422 if (m->vmp_cs_tainted) {
19423 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
19424 }
19425 if (m->vmp_cs_nx) {
19426 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
19427 }
19428 if (m->vmp_reusable || curr_object->all_reusable) {
19429 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
19430 }
19431 }
19432 }
19433
19434 switch (flavor) {
19435 case VM_PAGE_INFO_BASIC:
19436 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19437 basic_info->disposition = disposition;
19438 basic_info->ref_count = ref_count;
19439 basic_info->object_id = (vm_object_id_t) (uintptr_t)
19440 VM_KERNEL_ADDRPERM(curr_object);
19441 basic_info->offset =
19442 (memory_object_offset_t) curr_offset_in_object + offset_in_page;
19443 basic_info->depth = depth;
19444
19445 info_idx++;
19446 break;
19447 }
19448
19449 disposition = 0;
19450 offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
19451
19452 /*
19453 * Move to next offset in the range and in our object.
19454 */
19455 curr_s_offset += effective_page_size;
19456 offset_in_object += effective_page_size;
19457 curr_offset_in_object = offset_in_object;
19458
19459 if (curr_object != object) {
19460 vm_object_unlock(curr_object);
19461
19462 curr_object = object;
19463
19464 vm_object_lock_shared(curr_object);
19465 } else {
19466 vm_object_lock_yield_shared(curr_object);
19467 }
19468 }
19469
19470 vm_object_unlock(curr_object);
19471 vm_object_deallocate(curr_object);
19472
19473 vm_map_lock_read(map);
19474 }
19475
19476 vm_map_unlock_read(map);
19477 return retval;
19478 }
19479
19480 /*
19481 * vm_map_msync
19482 *
19483 * Synchronises the memory range specified with its backing store
19484 * image by either flushing or cleaning the contents to the appropriate
19485 * memory manager engaging in a memory object synchronize dialog with
19486 * the manager. The client doesn't return until the manager issues
19487 * m_o_s_completed message. MIG Magically converts user task parameter
19488 * to the task's address map.
19489 *
19490 * interpretation of sync_flags
19491 * VM_SYNC_INVALIDATE - discard pages, only return precious
19492 * pages to manager.
19493 *
19494 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
19495 * - discard pages, write dirty or precious
19496 * pages back to memory manager.
19497 *
19498 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
19499 * - write dirty or precious pages back to
19500 * the memory manager.
19501 *
19502 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
19503 * is a hole in the region, and we would
19504 * have returned KERN_SUCCESS, return
19505 * KERN_INVALID_ADDRESS instead.
19506 *
19507 * NOTE
19508 * The memory object attributes have not yet been implemented, this
19509 * function will have to deal with the invalidate attribute
19510 *
19511 * RETURNS
19512 * KERN_INVALID_TASK Bad task parameter
19513 * KERN_INVALID_ARGUMENT both sync and async were specified.
19514 * KERN_SUCCESS The usual.
19515 * KERN_INVALID_ADDRESS There was a hole in the region.
19516 */
19517
19518 kern_return_t
vm_map_msync(vm_map_t map,vm_map_address_t address,vm_map_size_t size,vm_sync_t sync_flags)19519 vm_map_msync(
19520 vm_map_t map,
19521 vm_map_address_t address,
19522 vm_map_size_t size,
19523 vm_sync_t sync_flags)
19524 {
19525 vm_map_entry_t entry;
19526 vm_map_size_t amount_left;
19527 vm_object_offset_t offset;
19528 vm_object_offset_t start_offset, end_offset;
19529 boolean_t do_sync_req;
19530 boolean_t had_hole = FALSE;
19531 vm_map_offset_t pmap_offset;
19532
19533 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
19534 (sync_flags & VM_SYNC_SYNCHRONOUS)) {
19535 return KERN_INVALID_ARGUMENT;
19536 }
19537
19538 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19539 DEBUG4K_SHARE("map %p address 0x%llx size 0x%llx flags 0x%x\n", map, (uint64_t)address, (uint64_t)size, sync_flags);
19540 }
19541
19542 /*
19543 * align address and size on page boundaries
19544 */
19545 size = (vm_map_round_page(address + size,
19546 VM_MAP_PAGE_MASK(map)) -
19547 vm_map_trunc_page(address,
19548 VM_MAP_PAGE_MASK(map)));
19549 address = vm_map_trunc_page(address,
19550 VM_MAP_PAGE_MASK(map));
19551
19552 if (map == VM_MAP_NULL) {
19553 return KERN_INVALID_TASK;
19554 }
19555
19556 if (size == 0) {
19557 return KERN_SUCCESS;
19558 }
19559
19560 amount_left = size;
19561
19562 while (amount_left > 0) {
19563 vm_object_size_t flush_size;
19564 vm_object_t object;
19565
19566 vm_map_lock(map);
19567 if (!vm_map_lookup_entry(map,
19568 address,
19569 &entry)) {
19570 vm_map_size_t skip;
19571
19572 /*
19573 * hole in the address map.
19574 */
19575 had_hole = TRUE;
19576
19577 if (sync_flags & VM_SYNC_KILLPAGES) {
19578 /*
19579 * For VM_SYNC_KILLPAGES, there should be
19580 * no holes in the range, since we couldn't
19581 * prevent someone else from allocating in
19582 * that hole and we wouldn't want to "kill"
19583 * their pages.
19584 */
19585 vm_map_unlock(map);
19586 break;
19587 }
19588
19589 /*
19590 * Check for empty map.
19591 */
19592 if (entry == vm_map_to_entry(map) &&
19593 entry->vme_next == entry) {
19594 vm_map_unlock(map);
19595 break;
19596 }
19597 /*
19598 * Check that we don't wrap and that
19599 * we have at least one real map entry.
19600 */
19601 if ((map->hdr.nentries == 0) ||
19602 (entry->vme_next->vme_start < address)) {
19603 vm_map_unlock(map);
19604 break;
19605 }
19606 /*
19607 * Move up to the next entry if needed
19608 */
19609 skip = (entry->vme_next->vme_start - address);
19610 if (skip >= amount_left) {
19611 amount_left = 0;
19612 } else {
19613 amount_left -= skip;
19614 }
19615 address = entry->vme_next->vme_start;
19616 vm_map_unlock(map);
19617 continue;
19618 }
19619
19620 offset = address - entry->vme_start;
19621 pmap_offset = address;
19622
19623 /*
19624 * do we have more to flush than is contained in this
19625 * entry ?
19626 */
19627 if (amount_left + entry->vme_start + offset > entry->vme_end) {
19628 flush_size = entry->vme_end -
19629 (entry->vme_start + offset);
19630 } else {
19631 flush_size = amount_left;
19632 }
19633 amount_left -= flush_size;
19634 address += flush_size;
19635
19636 if (entry->is_sub_map == TRUE) {
19637 vm_map_t local_map;
19638 vm_map_offset_t local_offset;
19639
19640 local_map = VME_SUBMAP(entry);
19641 local_offset = VME_OFFSET(entry);
19642 vm_map_reference(local_map);
19643 vm_map_unlock(map);
19644 if (vm_map_msync(
19645 local_map,
19646 local_offset,
19647 flush_size,
19648 sync_flags) == KERN_INVALID_ADDRESS) {
19649 had_hole = TRUE;
19650 }
19651 vm_map_deallocate(local_map);
19652 continue;
19653 }
19654 object = VME_OBJECT(entry);
19655
19656 /*
19657 * We can't sync this object if the object has not been
19658 * created yet
19659 */
19660 if (object == VM_OBJECT_NULL) {
19661 vm_map_unlock(map);
19662 continue;
19663 }
19664 offset += VME_OFFSET(entry);
19665
19666 vm_object_lock(object);
19667
19668 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
19669 int kill_pages = 0;
19670 boolean_t reusable_pages = FALSE;
19671
19672 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19673 /*
19674 * This is a destructive operation and so we
19675 * err on the side of limiting the range of
19676 * the operation.
19677 */
19678 start_offset = vm_object_round_page(offset);
19679 end_offset = vm_object_trunc_page(offset + flush_size);
19680
19681 if (end_offset <= start_offset) {
19682 vm_object_unlock(object);
19683 vm_map_unlock(map);
19684 continue;
19685 }
19686
19687 pmap_offset += start_offset - offset;
19688 } else {
19689 start_offset = offset;
19690 end_offset = offset + flush_size;
19691 }
19692
19693 if (sync_flags & VM_SYNC_KILLPAGES) {
19694 if (((object->ref_count == 1) ||
19695 ((object->copy_strategy !=
19696 MEMORY_OBJECT_COPY_SYMMETRIC) &&
19697 (object->copy == VM_OBJECT_NULL))) &&
19698 (object->shadow == VM_OBJECT_NULL)) {
19699 if (object->ref_count != 1) {
19700 vm_page_stats_reusable.free_shared++;
19701 }
19702 kill_pages = 1;
19703 } else {
19704 kill_pages = -1;
19705 }
19706 }
19707 if (kill_pages != -1) {
19708 vm_object_deactivate_pages(
19709 object,
19710 start_offset,
19711 (vm_object_size_t) (end_offset - start_offset),
19712 kill_pages,
19713 reusable_pages,
19714 map->pmap,
19715 pmap_offset);
19716 }
19717 vm_object_unlock(object);
19718 vm_map_unlock(map);
19719 continue;
19720 }
19721 /*
19722 * We can't sync this object if there isn't a pager.
19723 * Don't bother to sync internal objects, since there can't
19724 * be any "permanent" storage for these objects anyway.
19725 */
19726 if ((object->pager == MEMORY_OBJECT_NULL) ||
19727 (object->internal) || (object->private)) {
19728 vm_object_unlock(object);
19729 vm_map_unlock(map);
19730 continue;
19731 }
19732 /*
19733 * keep reference on the object until syncing is done
19734 */
19735 vm_object_reference_locked(object);
19736 vm_object_unlock(object);
19737
19738 vm_map_unlock(map);
19739
19740 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19741 start_offset = vm_object_trunc_page(offset);
19742 end_offset = vm_object_round_page(offset + flush_size);
19743 } else {
19744 start_offset = offset;
19745 end_offset = offset + flush_size;
19746 }
19747
19748 do_sync_req = vm_object_sync(object,
19749 start_offset,
19750 (end_offset - start_offset),
19751 sync_flags & VM_SYNC_INVALIDATE,
19752 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
19753 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
19754 sync_flags & VM_SYNC_SYNCHRONOUS);
19755
19756 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
19757 /*
19758 * clear out the clustering and read-ahead hints
19759 */
19760 vm_object_lock(object);
19761
19762 object->pages_created = 0;
19763 object->pages_used = 0;
19764 object->sequential = 0;
19765 object->last_alloc = 0;
19766
19767 vm_object_unlock(object);
19768 }
19769 vm_object_deallocate(object);
19770 } /* while */
19771
19772 /* for proper msync() behaviour */
19773 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS)) {
19774 return KERN_INVALID_ADDRESS;
19775 }
19776
19777 return KERN_SUCCESS;
19778 }/* vm_msync */
19779
19780 void
vm_named_entry_associate_vm_object(vm_named_entry_t named_entry,vm_object_t object,vm_object_offset_t offset,vm_object_size_t size,vm_prot_t prot)19781 vm_named_entry_associate_vm_object(
19782 vm_named_entry_t named_entry,
19783 vm_object_t object,
19784 vm_object_offset_t offset,
19785 vm_object_size_t size,
19786 vm_prot_t prot)
19787 {
19788 vm_map_copy_t copy;
19789 vm_map_entry_t copy_entry;
19790
19791 assert(!named_entry->is_sub_map);
19792 assert(!named_entry->is_copy);
19793 assert(!named_entry->is_object);
19794 assert(!named_entry->internal);
19795 assert(named_entry->backing.copy == VM_MAP_COPY_NULL);
19796
19797 copy = vm_map_copy_allocate();
19798 copy->type = VM_MAP_COPY_ENTRY_LIST;
19799 copy->offset = offset;
19800 copy->size = size;
19801 copy->cpy_hdr.page_shift = (uint16_t)PAGE_SHIFT;
19802 vm_map_store_init(©->cpy_hdr);
19803
19804 copy_entry = vm_map_copy_entry_create(copy, FALSE);
19805 copy_entry->protection = prot;
19806 copy_entry->max_protection = prot;
19807 copy_entry->use_pmap = TRUE;
19808 copy_entry->vme_start = VM_MAP_TRUNC_PAGE(offset, PAGE_MASK);
19809 copy_entry->vme_end = VM_MAP_ROUND_PAGE(offset + size, PAGE_MASK);
19810 VME_OBJECT_SET(copy_entry, object);
19811 VME_OFFSET_SET(copy_entry, vm_object_trunc_page(offset));
19812 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy), copy_entry);
19813
19814 named_entry->backing.copy = copy;
19815 named_entry->is_object = TRUE;
19816 if (object->internal) {
19817 named_entry->internal = TRUE;
19818 }
19819
19820 DEBUG4K_MEMENTRY("named_entry %p copy %p object %p offset 0x%llx size 0x%llx prot 0x%x\n",
19821 named_entry, copy, object, offset, size, prot);
19822 }
19823
19824 vm_object_t
vm_named_entry_to_vm_object(vm_named_entry_t named_entry)19825 vm_named_entry_to_vm_object(
19826 vm_named_entry_t named_entry)
19827 {
19828 vm_map_copy_t copy;
19829 vm_map_entry_t copy_entry;
19830 vm_object_t object;
19831
19832 assert(!named_entry->is_sub_map);
19833 assert(!named_entry->is_copy);
19834 assert(named_entry->is_object);
19835 copy = named_entry->backing.copy;
19836 assert(copy != VM_MAP_COPY_NULL);
19837 assert(copy->cpy_hdr.nentries == 1);
19838 copy_entry = vm_map_copy_first_entry(copy);
19839 assert(!copy_entry->is_sub_map);
19840 object = VME_OBJECT(copy_entry);
19841
19842 DEBUG4K_MEMENTRY("%p -> %p -> %p [0x%llx 0x%llx 0x%llx 0x%x/0x%x ] -> %p offset 0x%llx size 0x%llx prot 0x%x\n", named_entry, copy, copy_entry, (uint64_t)copy_entry->vme_start, (uint64_t)copy_entry->vme_end, copy_entry->vme_offset, copy_entry->protection, copy_entry->max_protection, object, named_entry->offset, named_entry->size, named_entry->protection);
19843
19844 return object;
19845 }
19846
19847 /*
19848 * Routine: convert_port_entry_to_map
19849 * Purpose:
19850 * Convert from a port specifying an entry or a task
19851 * to a map. Doesn't consume the port ref; produces a map ref,
19852 * which may be null. Unlike convert_port_to_map, the
19853 * port may be task or a named entry backed.
19854 * Conditions:
19855 * Nothing locked.
19856 */
19857
19858 vm_map_t
convert_port_entry_to_map(ipc_port_t port)19859 convert_port_entry_to_map(
19860 ipc_port_t port)
19861 {
19862 vm_map_t map = VM_MAP_NULL;
19863 vm_named_entry_t named_entry;
19864
19865 if (!IP_VALID(port)) {
19866 return VM_MAP_NULL;
19867 }
19868
19869 if (ip_kotype(port) != IKOT_NAMED_ENTRY) {
19870 return convert_port_to_map(port);
19871 }
19872
19873 named_entry = mach_memory_entry_from_port(port);
19874
19875 if ((named_entry->is_sub_map) &&
19876 (named_entry->protection & VM_PROT_WRITE)) {
19877 map = named_entry->backing.map;
19878 if (map->pmap != PMAP_NULL) {
19879 if (map->pmap == kernel_pmap) {
19880 panic("userspace has access "
19881 "to a kernel map %p", map);
19882 }
19883 pmap_require(map->pmap);
19884 }
19885 vm_map_reference(map);
19886 }
19887
19888 return map;
19889 }
19890
19891 /*
19892 * Export routines to other components for the things we access locally through
19893 * macros.
19894 */
19895 #undef current_map
19896 vm_map_t
current_map(void)19897 current_map(void)
19898 {
19899 return current_map_fast();
19900 }
19901
19902 /*
19903 * vm_map_reference:
19904 *
19905 * Takes a reference on the specified map.
19906 */
19907 void
vm_map_reference(vm_map_t map)19908 vm_map_reference(
19909 vm_map_t map)
19910 {
19911 if (__probable(map != VM_MAP_NULL)) {
19912 vm_map_require(map);
19913 os_ref_retain_raw(&map->map_refcnt, &map_refgrp);
19914 }
19915 }
19916
19917 /*
19918 * vm_map_deallocate:
19919 *
19920 * Removes a reference from the specified map,
19921 * destroying it if no references remain.
19922 * The map should not be locked.
19923 */
19924 void
vm_map_deallocate(vm_map_t map)19925 vm_map_deallocate(
19926 vm_map_t map)
19927 {
19928 if (__probable(map != VM_MAP_NULL)) {
19929 vm_map_require(map);
19930 if (os_ref_release_raw(&map->map_refcnt, &map_refgrp) == 0) {
19931 vm_map_destroy(map, VM_MAP_REMOVE_NO_FLAGS);
19932 }
19933 }
19934 }
19935
19936 void
vm_map_inspect_deallocate(vm_map_inspect_t map)19937 vm_map_inspect_deallocate(
19938 vm_map_inspect_t map)
19939 {
19940 vm_map_deallocate((vm_map_t)map);
19941 }
19942
19943 void
vm_map_read_deallocate(vm_map_read_t map)19944 vm_map_read_deallocate(
19945 vm_map_read_t map)
19946 {
19947 vm_map_deallocate((vm_map_t)map);
19948 }
19949
19950
19951 void
vm_map_disable_NX(vm_map_t map)19952 vm_map_disable_NX(vm_map_t map)
19953 {
19954 if (map == NULL) {
19955 return;
19956 }
19957 if (map->pmap == NULL) {
19958 return;
19959 }
19960
19961 pmap_disable_NX(map->pmap);
19962 }
19963
19964 void
vm_map_disallow_data_exec(vm_map_t map)19965 vm_map_disallow_data_exec(vm_map_t map)
19966 {
19967 if (map == NULL) {
19968 return;
19969 }
19970
19971 map->map_disallow_data_exec = TRUE;
19972 }
19973
19974 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
19975 * more descriptive.
19976 */
19977 void
vm_map_set_32bit(vm_map_t map)19978 vm_map_set_32bit(vm_map_t map)
19979 {
19980 #if defined(__arm__) || defined(__arm64__)
19981 map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
19982 #else
19983 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
19984 #endif
19985 }
19986
19987
19988 void
vm_map_set_64bit(vm_map_t map)19989 vm_map_set_64bit(vm_map_t map)
19990 {
19991 #if defined(__arm__) || defined(__arm64__)
19992 map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
19993 #else
19994 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
19995 #endif
19996 }
19997
19998 /*
19999 * Expand the maximum size of an existing map to the maximum supported.
20000 */
20001 void
vm_map_set_jumbo(vm_map_t map)20002 vm_map_set_jumbo(vm_map_t map)
20003 {
20004 #if defined (__arm64__) && !defined(CONFIG_ARROW)
20005 vm_map_set_max_addr(map, ~0);
20006 #else /* arm64 */
20007 (void) map;
20008 #endif
20009 }
20010
20011 /*
20012 * This map has a JIT entitlement
20013 */
20014 void
vm_map_set_jit_entitled(vm_map_t map)20015 vm_map_set_jit_entitled(vm_map_t map)
20016 {
20017 #if defined (__arm64__)
20018 pmap_set_jit_entitled(map->pmap);
20019 #else /* arm64 */
20020 (void) map;
20021 #endif
20022 }
20023
20024 /*
20025 * Expand the maximum size of an existing map.
20026 */
20027 void
vm_map_set_max_addr(vm_map_t map,vm_map_offset_t new_max_offset)20028 vm_map_set_max_addr(vm_map_t map, vm_map_offset_t new_max_offset)
20029 {
20030 #if defined(__arm64__)
20031 vm_map_offset_t max_supported_offset = 0;
20032 vm_map_offset_t old_max_offset = map->max_offset;
20033 max_supported_offset = pmap_max_offset(vm_map_is_64bit(map), ARM_PMAP_MAX_OFFSET_JUMBO);
20034
20035 new_max_offset = trunc_page(new_max_offset);
20036
20037 /* The address space cannot be shrunk using this routine. */
20038 if (old_max_offset >= new_max_offset) {
20039 return;
20040 }
20041
20042 if (max_supported_offset < new_max_offset) {
20043 new_max_offset = max_supported_offset;
20044 }
20045
20046 map->max_offset = new_max_offset;
20047
20048 if (map->holes_list->prev->vme_end == old_max_offset) {
20049 /*
20050 * There is already a hole at the end of the map; simply make it bigger.
20051 */
20052 map->holes_list->prev->vme_end = map->max_offset;
20053 } else {
20054 /*
20055 * There is no hole at the end, so we need to create a new hole
20056 * for the new empty space we're creating.
20057 */
20058 struct vm_map_links *new_hole = zalloc(vm_map_holes_zone);
20059 new_hole->start = old_max_offset;
20060 new_hole->end = map->max_offset;
20061 new_hole->prev = map->holes_list->prev;
20062 new_hole->next = (struct vm_map_entry *)map->holes_list;
20063 map->holes_list->prev->links.next = (struct vm_map_entry *)new_hole;
20064 map->holes_list->prev = (struct vm_map_entry *)new_hole;
20065 }
20066 #else
20067 (void)map;
20068 (void)new_max_offset;
20069 #endif
20070 }
20071
20072 vm_map_offset_t
vm_compute_max_offset(boolean_t is64)20073 vm_compute_max_offset(boolean_t is64)
20074 {
20075 #if defined(__arm__) || defined(__arm64__)
20076 return pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE);
20077 #else
20078 return is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS;
20079 #endif
20080 }
20081
20082 void
vm_map_get_max_aslr_slide_section(vm_map_t map __unused,int64_t * max_sections,int64_t * section_size)20083 vm_map_get_max_aslr_slide_section(
20084 vm_map_t map __unused,
20085 int64_t *max_sections,
20086 int64_t *section_size)
20087 {
20088 #if defined(__arm64__)
20089 *max_sections = 3;
20090 *section_size = ARM_TT_TWIG_SIZE;
20091 #else
20092 *max_sections = 1;
20093 *section_size = 0;
20094 #endif
20095 }
20096
20097 uint64_t
vm_map_get_max_aslr_slide_pages(vm_map_t map)20098 vm_map_get_max_aslr_slide_pages(vm_map_t map)
20099 {
20100 #if defined(__arm64__)
20101 /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
20102 * limited embedded address space; this is also meant to minimize pmap
20103 * memory usage on 16KB page systems.
20104 */
20105 return 1 << (24 - VM_MAP_PAGE_SHIFT(map));
20106 #else
20107 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
20108 #endif
20109 }
20110
20111 uint64_t
vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)20112 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
20113 {
20114 #if defined(__arm64__)
20115 /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
20116 * of independent entropy on 16KB page systems.
20117 */
20118 return 1 << (22 - VM_MAP_PAGE_SHIFT(map));
20119 #else
20120 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
20121 #endif
20122 }
20123
20124 #ifndef __arm__
20125 boolean_t
vm_map_is_64bit(vm_map_t map)20126 vm_map_is_64bit(
20127 vm_map_t map)
20128 {
20129 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
20130 }
20131 #endif
20132
20133 boolean_t
vm_map_has_hard_pagezero(vm_map_t map,vm_map_offset_t pagezero_size)20134 vm_map_has_hard_pagezero(
20135 vm_map_t map,
20136 vm_map_offset_t pagezero_size)
20137 {
20138 /*
20139 * XXX FBDP
20140 * We should lock the VM map (for read) here but we can get away
20141 * with it for now because there can't really be any race condition:
20142 * the VM map's min_offset is changed only when the VM map is created
20143 * and when the zero page is established (when the binary gets loaded),
20144 * and this routine gets called only when the task terminates and the
20145 * VM map is being torn down, and when a new map is created via
20146 * load_machfile()/execve().
20147 */
20148 return map->min_offset >= pagezero_size;
20149 }
20150
20151 /*
20152 * Raise a VM map's maximun offset.
20153 */
20154 kern_return_t
vm_map_raise_max_offset(vm_map_t map,vm_map_offset_t new_max_offset)20155 vm_map_raise_max_offset(
20156 vm_map_t map,
20157 vm_map_offset_t new_max_offset)
20158 {
20159 kern_return_t ret;
20160
20161 vm_map_lock(map);
20162 ret = KERN_INVALID_ADDRESS;
20163
20164 if (new_max_offset >= map->max_offset) {
20165 if (!vm_map_is_64bit(map)) {
20166 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
20167 map->max_offset = new_max_offset;
20168 ret = KERN_SUCCESS;
20169 }
20170 } else {
20171 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
20172 map->max_offset = new_max_offset;
20173 ret = KERN_SUCCESS;
20174 }
20175 }
20176 }
20177
20178 vm_map_unlock(map);
20179 return ret;
20180 }
20181
20182
20183 /*
20184 * Raise a VM map's minimum offset.
20185 * To strictly enforce "page zero" reservation.
20186 */
20187 kern_return_t
vm_map_raise_min_offset(vm_map_t map,vm_map_offset_t new_min_offset)20188 vm_map_raise_min_offset(
20189 vm_map_t map,
20190 vm_map_offset_t new_min_offset)
20191 {
20192 vm_map_entry_t first_entry;
20193
20194 new_min_offset = vm_map_round_page(new_min_offset,
20195 VM_MAP_PAGE_MASK(map));
20196
20197 vm_map_lock(map);
20198
20199 if (new_min_offset < map->min_offset) {
20200 /*
20201 * Can't move min_offset backwards, as that would expose
20202 * a part of the address space that was previously, and for
20203 * possibly good reasons, inaccessible.
20204 */
20205 vm_map_unlock(map);
20206 return KERN_INVALID_ADDRESS;
20207 }
20208 if (new_min_offset >= map->max_offset) {
20209 /* can't go beyond the end of the address space */
20210 vm_map_unlock(map);
20211 return KERN_INVALID_ADDRESS;
20212 }
20213
20214 first_entry = vm_map_first_entry(map);
20215 if (first_entry != vm_map_to_entry(map) &&
20216 first_entry->vme_start < new_min_offset) {
20217 /*
20218 * Some memory was already allocated below the new
20219 * minimun offset. It's too late to change it now...
20220 */
20221 vm_map_unlock(map);
20222 return KERN_NO_SPACE;
20223 }
20224
20225 map->min_offset = new_min_offset;
20226
20227 assert(map->holes_list);
20228 map->holes_list->start = new_min_offset;
20229 assert(new_min_offset < map->holes_list->end);
20230
20231 vm_map_unlock(map);
20232
20233 return KERN_SUCCESS;
20234 }
20235
20236 /*
20237 * Set the limit on the maximum amount of address space and user wired memory allowed for this map.
20238 * This is basically a copy of the RLIMIT_AS and RLIMIT_MEMLOCK rlimit value maintained by the BSD
20239 * side of the kernel. The limits are checked in the mach VM side, so we keep a copy so we don't
20240 * have to reach over to the BSD data structures.
20241 */
20242
20243 uint64_t vm_map_set_size_limit_count = 0;
20244 kern_return_t
vm_map_set_size_limit(vm_map_t map,uint64_t new_size_limit)20245 vm_map_set_size_limit(vm_map_t map, uint64_t new_size_limit)
20246 {
20247 kern_return_t kr;
20248
20249 vm_map_lock(map);
20250 if (new_size_limit < map->size) {
20251 /* new limit should not be lower than its current size */
20252 DTRACE_VM2(vm_map_set_size_limit_fail,
20253 vm_map_size_t, map->size,
20254 uint64_t, new_size_limit);
20255 kr = KERN_FAILURE;
20256 } else if (new_size_limit == map->size_limit) {
20257 /* no change */
20258 kr = KERN_SUCCESS;
20259 } else {
20260 /* set new limit */
20261 DTRACE_VM2(vm_map_set_size_limit,
20262 vm_map_size_t, map->size,
20263 uint64_t, new_size_limit);
20264 if (new_size_limit != RLIM_INFINITY) {
20265 vm_map_set_size_limit_count++;
20266 }
20267 map->size_limit = new_size_limit;
20268 kr = KERN_SUCCESS;
20269 }
20270 vm_map_unlock(map);
20271 return kr;
20272 }
20273
20274 uint64_t vm_map_set_data_limit_count = 0;
20275 kern_return_t
vm_map_set_data_limit(vm_map_t map,uint64_t new_data_limit)20276 vm_map_set_data_limit(vm_map_t map, uint64_t new_data_limit)
20277 {
20278 kern_return_t kr;
20279
20280 vm_map_lock(map);
20281 if (new_data_limit < map->size) {
20282 /* new limit should not be lower than its current size */
20283 DTRACE_VM2(vm_map_set_data_limit_fail,
20284 vm_map_size_t, map->size,
20285 uint64_t, new_data_limit);
20286 kr = KERN_FAILURE;
20287 } else if (new_data_limit == map->data_limit) {
20288 /* no change */
20289 kr = KERN_SUCCESS;
20290 } else {
20291 /* set new limit */
20292 DTRACE_VM2(vm_map_set_data_limit,
20293 vm_map_size_t, map->size,
20294 uint64_t, new_data_limit);
20295 if (new_data_limit != RLIM_INFINITY) {
20296 vm_map_set_data_limit_count++;
20297 }
20298 map->data_limit = new_data_limit;
20299 kr = KERN_SUCCESS;
20300 }
20301 vm_map_unlock(map);
20302 return kr;
20303 }
20304
20305 void
vm_map_set_user_wire_limit(vm_map_t map,vm_size_t limit)20306 vm_map_set_user_wire_limit(vm_map_t map,
20307 vm_size_t limit)
20308 {
20309 vm_map_lock(map);
20310 map->user_wire_limit = limit;
20311 vm_map_unlock(map);
20312 }
20313
20314
20315 void
vm_map_switch_protect(vm_map_t map,boolean_t val)20316 vm_map_switch_protect(vm_map_t map,
20317 boolean_t val)
20318 {
20319 vm_map_lock(map);
20320 map->switch_protect = val;
20321 vm_map_unlock(map);
20322 }
20323
20324 extern int cs_process_enforcement_enable;
20325 boolean_t
vm_map_cs_enforcement(vm_map_t map)20326 vm_map_cs_enforcement(
20327 vm_map_t map)
20328 {
20329 if (cs_process_enforcement_enable) {
20330 return TRUE;
20331 }
20332 return map->cs_enforcement;
20333 }
20334
20335 kern_return_t
vm_map_cs_wx_enable(vm_map_t map)20336 vm_map_cs_wx_enable(
20337 vm_map_t map)
20338 {
20339 return pmap_cs_allow_invalid(vm_map_pmap(map));
20340 }
20341
20342 void
vm_map_cs_debugged_set(vm_map_t map,boolean_t val)20343 vm_map_cs_debugged_set(
20344 vm_map_t map,
20345 boolean_t val)
20346 {
20347 vm_map_lock(map);
20348 map->cs_debugged = val;
20349 vm_map_unlock(map);
20350 }
20351
20352 void
vm_map_cs_enforcement_set(vm_map_t map,boolean_t val)20353 vm_map_cs_enforcement_set(
20354 vm_map_t map,
20355 boolean_t val)
20356 {
20357 vm_map_lock(map);
20358 map->cs_enforcement = val;
20359 pmap_set_vm_map_cs_enforced(map->pmap, val);
20360 vm_map_unlock(map);
20361 }
20362
20363 /*
20364 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
20365 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
20366 * bump both counters.
20367 */
20368 void
vm_map_iokit_mapped_region(vm_map_t map,vm_size_t bytes)20369 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
20370 {
20371 pmap_t pmap = vm_map_pmap(map);
20372
20373 ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
20374 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
20375 }
20376
20377 void
vm_map_iokit_unmapped_region(vm_map_t map,vm_size_t bytes)20378 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
20379 {
20380 pmap_t pmap = vm_map_pmap(map);
20381
20382 ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
20383 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
20384 }
20385
20386 /* Add (generate) code signature for memory range */
20387 #if CONFIG_DYNAMIC_CODE_SIGNING
20388 kern_return_t
vm_map_sign(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)20389 vm_map_sign(vm_map_t map,
20390 vm_map_offset_t start,
20391 vm_map_offset_t end)
20392 {
20393 vm_map_entry_t entry;
20394 vm_page_t m;
20395 vm_object_t object;
20396
20397 /*
20398 * Vet all the input parameters and current type and state of the
20399 * underlaying object. Return with an error if anything is amiss.
20400 */
20401 if (map == VM_MAP_NULL) {
20402 return KERN_INVALID_ARGUMENT;
20403 }
20404
20405 vm_map_lock_read(map);
20406
20407 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
20408 /*
20409 * Must pass a valid non-submap address.
20410 */
20411 vm_map_unlock_read(map);
20412 return KERN_INVALID_ADDRESS;
20413 }
20414
20415 if ((entry->vme_start > start) || (entry->vme_end < end)) {
20416 /*
20417 * Map entry doesn't cover the requested range. Not handling
20418 * this situation currently.
20419 */
20420 vm_map_unlock_read(map);
20421 return KERN_INVALID_ARGUMENT;
20422 }
20423
20424 object = VME_OBJECT(entry);
20425 if (object == VM_OBJECT_NULL) {
20426 /*
20427 * Object must already be present or we can't sign.
20428 */
20429 vm_map_unlock_read(map);
20430 return KERN_INVALID_ARGUMENT;
20431 }
20432
20433 vm_object_lock(object);
20434 vm_map_unlock_read(map);
20435
20436 while (start < end) {
20437 uint32_t refmod;
20438
20439 m = vm_page_lookup(object,
20440 start - entry->vme_start + VME_OFFSET(entry));
20441 if (m == VM_PAGE_NULL) {
20442 /* shoud we try to fault a page here? we can probably
20443 * demand it exists and is locked for this request */
20444 vm_object_unlock(object);
20445 return KERN_FAILURE;
20446 }
20447 /* deal with special page status */
20448 if (m->vmp_busy ||
20449 (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_private || m->vmp_absent))) {
20450 vm_object_unlock(object);
20451 return KERN_FAILURE;
20452 }
20453
20454 /* Page is OK... now "validate" it */
20455 /* This is the place where we'll call out to create a code
20456 * directory, later */
20457 /* XXX TODO4K: deal with 4k subpages individually? */
20458 m->vmp_cs_validated = VMP_CS_ALL_TRUE;
20459
20460 /* The page is now "clean" for codesigning purposes. That means
20461 * we don't consider it as modified (wpmapped) anymore. But
20462 * we'll disconnect the page so we note any future modification
20463 * attempts. */
20464 m->vmp_wpmapped = FALSE;
20465 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
20466
20467 /* Pull the dirty status from the pmap, since we cleared the
20468 * wpmapped bit */
20469 if ((refmod & VM_MEM_MODIFIED) && !m->vmp_dirty) {
20470 SET_PAGE_DIRTY(m, FALSE);
20471 }
20472
20473 /* On to the next page */
20474 start += PAGE_SIZE;
20475 }
20476 vm_object_unlock(object);
20477
20478 return KERN_SUCCESS;
20479 }
20480 #endif
20481
20482 kern_return_t
vm_map_partial_reap(vm_map_t map,unsigned int * reclaimed_resident,unsigned int * reclaimed_compressed)20483 vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
20484 {
20485 vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
20486 vm_map_entry_t next_entry;
20487 kern_return_t kr = KERN_SUCCESS;
20488 vm_map_t zap_map;
20489
20490 vm_map_lock(map);
20491
20492 /*
20493 * We use a "zap_map" to avoid having to unlock
20494 * the "map" in vm_map_delete().
20495 */
20496 zap_map = vm_map_create_options(PMAP_NULL,
20497 map->min_offset,
20498 map->max_offset,
20499 VM_MAP_CREATE_ZAP_OPTIONS(map));
20500 vm_map_set_page_shift(zap_map,
20501 VM_MAP_PAGE_SHIFT(map));
20502
20503 for (entry = vm_map_first_entry(map);
20504 entry != vm_map_to_entry(map);
20505 entry = next_entry) {
20506 next_entry = entry->vme_next;
20507
20508 if (VME_OBJECT(entry) &&
20509 !entry->is_sub_map &&
20510 (VME_OBJECT(entry)->internal == TRUE) &&
20511 (VME_OBJECT(entry)->ref_count == 1)) {
20512 *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
20513 *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
20514
20515 (void)vm_map_delete(map,
20516 entry->vme_start,
20517 entry->vme_end,
20518 VM_MAP_REMOVE_SAVE_ENTRIES,
20519 zap_map);
20520 }
20521 }
20522
20523 vm_map_unlock(map);
20524
20525 /*
20526 * Get rid of the "zap_maps" and all the map entries that
20527 * they may still contain.
20528 */
20529 if (zap_map != VM_MAP_NULL) {
20530 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
20531 zap_map = VM_MAP_NULL;
20532 }
20533
20534 return kr;
20535 }
20536
20537
20538 #if DEVELOPMENT || DEBUG
20539
20540 int
vm_map_disconnect_page_mappings(vm_map_t map,boolean_t do_unnest)20541 vm_map_disconnect_page_mappings(
20542 vm_map_t map,
20543 boolean_t do_unnest)
20544 {
20545 vm_map_entry_t entry;
20546 ledger_amount_t byte_count = 0;
20547
20548 if (do_unnest == TRUE) {
20549 #ifndef NO_NESTED_PMAP
20550 vm_map_lock(map);
20551
20552 for (entry = vm_map_first_entry(map);
20553 entry != vm_map_to_entry(map);
20554 entry = entry->vme_next) {
20555 if (entry->is_sub_map && entry->use_pmap) {
20556 /*
20557 * Make sure the range between the start of this entry and
20558 * the end of this entry is no longer nested, so that
20559 * we will only remove mappings from the pmap in use by this
20560 * this task
20561 */
20562 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
20563 }
20564 }
20565 vm_map_unlock(map);
20566 #endif
20567 }
20568 vm_map_lock_read(map);
20569
20570 ledger_get_balance(map->pmap->ledger, task_ledgers.phys_mem, &byte_count);
20571
20572 for (entry = vm_map_first_entry(map);
20573 entry != vm_map_to_entry(map);
20574 entry = entry->vme_next) {
20575 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
20576 (VME_OBJECT(entry)->phys_contiguous))) {
20577 continue;
20578 }
20579 if (entry->is_sub_map) {
20580 assert(!entry->use_pmap);
20581 }
20582
20583 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
20584 }
20585 vm_map_unlock_read(map);
20586
20587 return (int) (byte_count / VM_MAP_PAGE_SIZE(map));
20588 }
20589
20590 kern_return_t
vm_map_inject_error(vm_map_t map,vm_map_offset_t vaddr)20591 vm_map_inject_error(vm_map_t map, vm_map_offset_t vaddr)
20592 {
20593 vm_object_t object = NULL;
20594 vm_object_offset_t offset;
20595 vm_prot_t prot;
20596 boolean_t wired;
20597 vm_map_version_t version;
20598 vm_map_t real_map;
20599 int result = KERN_FAILURE;
20600
20601 vaddr = vm_map_trunc_page(vaddr, PAGE_MASK);
20602 vm_map_lock(map);
20603
20604 result = vm_map_lookup_locked(&map, vaddr, VM_PROT_READ,
20605 OBJECT_LOCK_EXCLUSIVE, &version, &object, &offset, &prot, &wired,
20606 NULL, &real_map, NULL);
20607 if (object == NULL) {
20608 result = KERN_MEMORY_ERROR;
20609 } else if (object->pager) {
20610 result = vm_compressor_pager_inject_error(object->pager,
20611 offset);
20612 } else {
20613 result = KERN_MEMORY_PRESENT;
20614 }
20615
20616 if (object != NULL) {
20617 vm_object_unlock(object);
20618 }
20619
20620 if (real_map != map) {
20621 vm_map_unlock(real_map);
20622 }
20623 vm_map_unlock(map);
20624
20625 return result;
20626 }
20627
20628 #endif
20629
20630
20631 #if CONFIG_FREEZE
20632
20633
20634 extern struct freezer_context freezer_context_global;
20635 AbsoluteTime c_freezer_last_yield_ts = 0;
20636
20637 extern unsigned int memorystatus_freeze_private_shared_pages_ratio;
20638 extern unsigned int memorystatus_freeze_shared_mb_per_process_max;
20639
20640 kern_return_t
vm_map_freeze(task_t task,unsigned int * purgeable_count,unsigned int * wired_count,unsigned int * clean_count,unsigned int * dirty_count,unsigned int dirty_budget,unsigned int * shared_count,int * freezer_error_code,boolean_t eval_only)20641 vm_map_freeze(
20642 task_t task,
20643 unsigned int *purgeable_count,
20644 unsigned int *wired_count,
20645 unsigned int *clean_count,
20646 unsigned int *dirty_count,
20647 unsigned int dirty_budget,
20648 unsigned int *shared_count,
20649 int *freezer_error_code,
20650 boolean_t eval_only)
20651 {
20652 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
20653 kern_return_t kr = KERN_SUCCESS;
20654 boolean_t evaluation_phase = TRUE;
20655 vm_object_t cur_shared_object = NULL;
20656 int cur_shared_obj_ref_cnt = 0;
20657 unsigned int dirty_private_count = 0, dirty_shared_count = 0, obj_pages_snapshot = 0;
20658
20659 *purgeable_count = *wired_count = *clean_count = *dirty_count = *shared_count = 0;
20660
20661 /*
20662 * We need the exclusive lock here so that we can
20663 * block any page faults or lookups while we are
20664 * in the middle of freezing this vm map.
20665 */
20666 vm_map_t map = task->map;
20667
20668 vm_map_lock(map);
20669
20670 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
20671
20672 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
20673 if (vm_compressor_low_on_space()) {
20674 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
20675 }
20676
20677 if (vm_swap_low_on_space()) {
20678 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
20679 }
20680
20681 kr = KERN_NO_SPACE;
20682 goto done;
20683 }
20684
20685 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
20686 /*
20687 * In-memory compressor backing the freezer. No disk.
20688 * So no need to do the evaluation phase.
20689 */
20690 evaluation_phase = FALSE;
20691
20692 if (eval_only == TRUE) {
20693 /*
20694 * We don't support 'eval_only' mode
20695 * in this non-swap config.
20696 */
20697 *freezer_error_code = FREEZER_ERROR_GENERIC;
20698 kr = KERN_INVALID_ARGUMENT;
20699 goto done;
20700 }
20701
20702 freezer_context_global.freezer_ctx_uncompressed_pages = 0;
20703 clock_get_uptime(&c_freezer_last_yield_ts);
20704 }
20705 again:
20706
20707 for (entry2 = vm_map_first_entry(map);
20708 entry2 != vm_map_to_entry(map);
20709 entry2 = entry2->vme_next) {
20710 vm_object_t src_object = VME_OBJECT(entry2);
20711
20712 if (src_object &&
20713 !entry2->is_sub_map &&
20714 !src_object->phys_contiguous) {
20715 /* If eligible, scan the entry, moving eligible pages over to our parent object */
20716
20717 if (src_object->internal == TRUE) {
20718 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
20719 /*
20720 * We skip purgeable objects during evaluation phase only.
20721 * If we decide to freeze this process, we'll explicitly
20722 * purge these objects before we go around again with
20723 * 'evaluation_phase' set to FALSE.
20724 */
20725
20726 if ((src_object->purgable == VM_PURGABLE_EMPTY) || (src_object->purgable == VM_PURGABLE_VOLATILE)) {
20727 /*
20728 * We want to purge objects that may not belong to this task but are mapped
20729 * in this task alone. Since we already purged this task's purgeable memory
20730 * at the end of a successful evaluation phase, we want to avoid doing no-op calls
20731 * on this task's purgeable objects. Hence the check for only volatile objects.
20732 */
20733 if (evaluation_phase == FALSE &&
20734 (src_object->purgable == VM_PURGABLE_VOLATILE) &&
20735 (src_object->ref_count == 1)) {
20736 vm_object_lock(src_object);
20737 vm_object_purge(src_object, 0);
20738 vm_object_unlock(src_object);
20739 }
20740 continue;
20741 }
20742
20743 /*
20744 * Pages belonging to this object could be swapped to disk.
20745 * Make sure it's not a shared object because we could end
20746 * up just bringing it back in again.
20747 *
20748 * We try to optimize somewhat by checking for objects that are mapped
20749 * more than once within our own map. But we don't do full searches,
20750 * we just look at the entries following our current entry.
20751 */
20752
20753 if (src_object->ref_count > 1) {
20754 if (src_object != cur_shared_object) {
20755 obj_pages_snapshot = (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
20756 dirty_shared_count += obj_pages_snapshot;
20757
20758 cur_shared_object = src_object;
20759 cur_shared_obj_ref_cnt = 1;
20760 continue;
20761 } else {
20762 cur_shared_obj_ref_cnt++;
20763 if (src_object->ref_count == cur_shared_obj_ref_cnt) {
20764 /*
20765 * Fall through to below and treat this object as private.
20766 * So deduct its pages from our shared total and add it to the
20767 * private total.
20768 */
20769
20770 dirty_shared_count -= obj_pages_snapshot;
20771 dirty_private_count += obj_pages_snapshot;
20772 } else {
20773 continue;
20774 }
20775 }
20776 }
20777
20778
20779 if (src_object->ref_count == 1) {
20780 dirty_private_count += (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
20781 }
20782
20783 if (evaluation_phase == TRUE) {
20784 continue;
20785 }
20786 }
20787
20788 uint32_t paged_out_count = vm_object_compressed_freezer_pageout(src_object, dirty_budget);
20789 *wired_count += src_object->wired_page_count;
20790
20791 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
20792 if (vm_compressor_low_on_space()) {
20793 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
20794 }
20795
20796 if (vm_swap_low_on_space()) {
20797 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
20798 }
20799
20800 kr = KERN_NO_SPACE;
20801 break;
20802 }
20803 if (paged_out_count >= dirty_budget) {
20804 break;
20805 }
20806 dirty_budget -= paged_out_count;
20807 }
20808 }
20809 }
20810
20811 *shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
20812 if (evaluation_phase) {
20813 unsigned int shared_pages_threshold = (memorystatus_freeze_shared_mb_per_process_max * 1024 * 1024ULL) / PAGE_SIZE_64;
20814
20815 if (dirty_shared_count > shared_pages_threshold) {
20816 *freezer_error_code = FREEZER_ERROR_EXCESS_SHARED_MEMORY;
20817 kr = KERN_FAILURE;
20818 goto done;
20819 }
20820
20821 if (dirty_shared_count &&
20822 ((dirty_private_count / dirty_shared_count) < memorystatus_freeze_private_shared_pages_ratio)) {
20823 *freezer_error_code = FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO;
20824 kr = KERN_FAILURE;
20825 goto done;
20826 }
20827
20828 evaluation_phase = FALSE;
20829 dirty_shared_count = dirty_private_count = 0;
20830
20831 freezer_context_global.freezer_ctx_uncompressed_pages = 0;
20832 clock_get_uptime(&c_freezer_last_yield_ts);
20833
20834 if (eval_only) {
20835 kr = KERN_SUCCESS;
20836 goto done;
20837 }
20838
20839 vm_purgeable_purge_task_owned(task);
20840
20841 goto again;
20842 } else {
20843 kr = KERN_SUCCESS;
20844 }
20845
20846 done:
20847 vm_map_unlock(map);
20848
20849 if ((eval_only == FALSE) && (kr == KERN_SUCCESS)) {
20850 vm_object_compressed_freezer_done();
20851 }
20852 return kr;
20853 }
20854
20855 #endif
20856
20857 /*
20858 * vm_map_entry_should_cow_for_true_share:
20859 *
20860 * Determines if the map entry should be clipped and setup for copy-on-write
20861 * to avoid applying "true_share" to a large VM object when only a subset is
20862 * targeted.
20863 *
20864 * For now, we target only the map entries created for the Objective C
20865 * Garbage Collector, which initially have the following properties:
20866 * - alias == VM_MEMORY_MALLOC
20867 * - wired_count == 0
20868 * - !needs_copy
20869 * and a VM object with:
20870 * - internal
20871 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
20872 * - !true_share
20873 * - vo_size == ANON_CHUNK_SIZE
20874 *
20875 * Only non-kernel map entries.
20876 */
20877 boolean_t
vm_map_entry_should_cow_for_true_share(vm_map_entry_t entry)20878 vm_map_entry_should_cow_for_true_share(
20879 vm_map_entry_t entry)
20880 {
20881 vm_object_t object;
20882
20883 if (entry->is_sub_map) {
20884 /* entry does not point at a VM object */
20885 return FALSE;
20886 }
20887
20888 if (entry->needs_copy) {
20889 /* already set for copy_on_write: done! */
20890 return FALSE;
20891 }
20892
20893 if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
20894 VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
20895 /* not a malloc heap or Obj-C Garbage Collector heap */
20896 return FALSE;
20897 }
20898
20899 if (entry->wired_count) {
20900 /* wired: can't change the map entry... */
20901 vm_counters.should_cow_but_wired++;
20902 return FALSE;
20903 }
20904
20905 object = VME_OBJECT(entry);
20906
20907 if (object == VM_OBJECT_NULL) {
20908 /* no object yet... */
20909 return FALSE;
20910 }
20911
20912 if (!object->internal) {
20913 /* not an internal object */
20914 return FALSE;
20915 }
20916
20917 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
20918 /* not the default copy strategy */
20919 return FALSE;
20920 }
20921
20922 if (object->true_share) {
20923 /* already true_share: too late to avoid it */
20924 return FALSE;
20925 }
20926
20927 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
20928 object->vo_size != ANON_CHUNK_SIZE) {
20929 /* ... not an object created for the ObjC Garbage Collector */
20930 return FALSE;
20931 }
20932
20933 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
20934 object->vo_size != 2048 * 4096) {
20935 /* ... not a "MALLOC_SMALL" heap */
20936 return FALSE;
20937 }
20938
20939 /*
20940 * All the criteria match: we have a large object being targeted for "true_share".
20941 * To limit the adverse side-effects linked with "true_share", tell the caller to
20942 * try and avoid setting up the entire object for "true_share" by clipping the
20943 * targeted range and setting it up for copy-on-write.
20944 */
20945 return TRUE;
20946 }
20947
20948 vm_map_offset_t
vm_map_round_page_mask(vm_map_offset_t offset,vm_map_offset_t mask)20949 vm_map_round_page_mask(
20950 vm_map_offset_t offset,
20951 vm_map_offset_t mask)
20952 {
20953 return VM_MAP_ROUND_PAGE(offset, mask);
20954 }
20955
20956 vm_map_offset_t
vm_map_trunc_page_mask(vm_map_offset_t offset,vm_map_offset_t mask)20957 vm_map_trunc_page_mask(
20958 vm_map_offset_t offset,
20959 vm_map_offset_t mask)
20960 {
20961 return VM_MAP_TRUNC_PAGE(offset, mask);
20962 }
20963
20964 boolean_t
vm_map_page_aligned(vm_map_offset_t offset,vm_map_offset_t mask)20965 vm_map_page_aligned(
20966 vm_map_offset_t offset,
20967 vm_map_offset_t mask)
20968 {
20969 return ((offset) & mask) == 0;
20970 }
20971
20972 int
vm_map_page_shift(vm_map_t map)20973 vm_map_page_shift(
20974 vm_map_t map)
20975 {
20976 return VM_MAP_PAGE_SHIFT(map);
20977 }
20978
20979 int
vm_map_page_size(vm_map_t map)20980 vm_map_page_size(
20981 vm_map_t map)
20982 {
20983 return VM_MAP_PAGE_SIZE(map);
20984 }
20985
20986 vm_map_offset_t
vm_map_page_mask(vm_map_t map)20987 vm_map_page_mask(
20988 vm_map_t map)
20989 {
20990 return VM_MAP_PAGE_MASK(map);
20991 }
20992
20993 kern_return_t
vm_map_set_page_shift(vm_map_t map,int pageshift)20994 vm_map_set_page_shift(
20995 vm_map_t map,
20996 int pageshift)
20997 {
20998 if (map->hdr.nentries != 0) {
20999 /* too late to change page size */
21000 return KERN_FAILURE;
21001 }
21002
21003 map->hdr.page_shift = (uint16_t)pageshift;
21004
21005 return KERN_SUCCESS;
21006 }
21007
21008 kern_return_t
vm_map_query_volatile(vm_map_t map,mach_vm_size_t * volatile_virtual_size_p,mach_vm_size_t * volatile_resident_size_p,mach_vm_size_t * volatile_compressed_size_p,mach_vm_size_t * volatile_pmap_size_p,mach_vm_size_t * volatile_compressed_pmap_size_p)21009 vm_map_query_volatile(
21010 vm_map_t map,
21011 mach_vm_size_t *volatile_virtual_size_p,
21012 mach_vm_size_t *volatile_resident_size_p,
21013 mach_vm_size_t *volatile_compressed_size_p,
21014 mach_vm_size_t *volatile_pmap_size_p,
21015 mach_vm_size_t *volatile_compressed_pmap_size_p)
21016 {
21017 mach_vm_size_t volatile_virtual_size;
21018 mach_vm_size_t volatile_resident_count;
21019 mach_vm_size_t volatile_compressed_count;
21020 mach_vm_size_t volatile_pmap_count;
21021 mach_vm_size_t volatile_compressed_pmap_count;
21022 mach_vm_size_t resident_count;
21023 vm_map_entry_t entry;
21024 vm_object_t object;
21025
21026 /* map should be locked by caller */
21027
21028 volatile_virtual_size = 0;
21029 volatile_resident_count = 0;
21030 volatile_compressed_count = 0;
21031 volatile_pmap_count = 0;
21032 volatile_compressed_pmap_count = 0;
21033
21034 for (entry = vm_map_first_entry(map);
21035 entry != vm_map_to_entry(map);
21036 entry = entry->vme_next) {
21037 mach_vm_size_t pmap_resident_bytes, pmap_compressed_bytes;
21038
21039 if (entry->is_sub_map) {
21040 continue;
21041 }
21042 if (!(entry->protection & VM_PROT_WRITE)) {
21043 continue;
21044 }
21045 object = VME_OBJECT(entry);
21046 if (object == VM_OBJECT_NULL) {
21047 continue;
21048 }
21049 if (object->purgable != VM_PURGABLE_VOLATILE &&
21050 object->purgable != VM_PURGABLE_EMPTY) {
21051 continue;
21052 }
21053 if (VME_OFFSET(entry)) {
21054 /*
21055 * If the map entry has been split and the object now
21056 * appears several times in the VM map, we don't want
21057 * to count the object's resident_page_count more than
21058 * once. We count it only for the first one, starting
21059 * at offset 0 and ignore the other VM map entries.
21060 */
21061 continue;
21062 }
21063 resident_count = object->resident_page_count;
21064 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
21065 resident_count = 0;
21066 } else {
21067 resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
21068 }
21069
21070 volatile_virtual_size += entry->vme_end - entry->vme_start;
21071 volatile_resident_count += resident_count;
21072 if (object->pager) {
21073 volatile_compressed_count +=
21074 vm_compressor_pager_get_count(object->pager);
21075 }
21076 pmap_compressed_bytes = 0;
21077 pmap_resident_bytes =
21078 pmap_query_resident(map->pmap,
21079 entry->vme_start,
21080 entry->vme_end,
21081 &pmap_compressed_bytes);
21082 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
21083 volatile_compressed_pmap_count += (pmap_compressed_bytes
21084 / PAGE_SIZE);
21085 }
21086
21087 /* map is still locked on return */
21088
21089 *volatile_virtual_size_p = volatile_virtual_size;
21090 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
21091 *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
21092 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
21093 *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
21094
21095 return KERN_SUCCESS;
21096 }
21097
21098 void
vm_map_sizes(vm_map_t map,vm_map_size_t * psize,vm_map_size_t * pfree,vm_map_size_t * plargest_free)21099 vm_map_sizes(vm_map_t map,
21100 vm_map_size_t * psize,
21101 vm_map_size_t * pfree,
21102 vm_map_size_t * plargest_free)
21103 {
21104 vm_map_entry_t entry;
21105 vm_map_offset_t prev;
21106 vm_map_size_t free, total_free, largest_free;
21107 boolean_t end;
21108
21109 if (!map) {
21110 *psize = *pfree = *plargest_free = 0;
21111 return;
21112 }
21113 total_free = largest_free = 0;
21114
21115 vm_map_lock_read(map);
21116 if (psize) {
21117 *psize = map->max_offset - map->min_offset;
21118 }
21119
21120 prev = map->min_offset;
21121 for (entry = vm_map_first_entry(map);; entry = entry->vme_next) {
21122 end = (entry == vm_map_to_entry(map));
21123
21124 if (end) {
21125 free = entry->vme_end - prev;
21126 } else {
21127 free = entry->vme_start - prev;
21128 }
21129
21130 total_free += free;
21131 if (free > largest_free) {
21132 largest_free = free;
21133 }
21134
21135 if (end) {
21136 break;
21137 }
21138 prev = entry->vme_end;
21139 }
21140 vm_map_unlock_read(map);
21141 if (pfree) {
21142 *pfree = total_free;
21143 }
21144 if (plargest_free) {
21145 *plargest_free = largest_free;
21146 }
21147 }
21148
21149 #if VM_SCAN_FOR_SHADOW_CHAIN
21150 int vm_map_shadow_max(vm_map_t map);
21151 int
vm_map_shadow_max(vm_map_t map)21152 vm_map_shadow_max(
21153 vm_map_t map)
21154 {
21155 int shadows, shadows_max;
21156 vm_map_entry_t entry;
21157 vm_object_t object, next_object;
21158
21159 if (map == NULL) {
21160 return 0;
21161 }
21162
21163 shadows_max = 0;
21164
21165 vm_map_lock_read(map);
21166
21167 for (entry = vm_map_first_entry(map);
21168 entry != vm_map_to_entry(map);
21169 entry = entry->vme_next) {
21170 if (entry->is_sub_map) {
21171 continue;
21172 }
21173 object = VME_OBJECT(entry);
21174 if (object == NULL) {
21175 continue;
21176 }
21177 vm_object_lock_shared(object);
21178 for (shadows = 0;
21179 object->shadow != NULL;
21180 shadows++, object = next_object) {
21181 next_object = object->shadow;
21182 vm_object_lock_shared(next_object);
21183 vm_object_unlock(object);
21184 }
21185 vm_object_unlock(object);
21186 if (shadows > shadows_max) {
21187 shadows_max = shadows;
21188 }
21189 }
21190
21191 vm_map_unlock_read(map);
21192
21193 return shadows_max;
21194 }
21195 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
21196
21197 void
vm_commit_pagezero_status(vm_map_t lmap)21198 vm_commit_pagezero_status(vm_map_t lmap)
21199 {
21200 pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
21201 }
21202
21203 #if XNU_TARGET_OS_OSX
21204 void
vm_map_set_high_start(vm_map_t map,vm_map_offset_t high_start)21205 vm_map_set_high_start(
21206 vm_map_t map,
21207 vm_map_offset_t high_start)
21208 {
21209 map->vmmap_high_start = high_start;
21210 }
21211 #endif /* XNU_TARGET_OS_OSX */
21212
21213
21214 /*
21215 * FORKED CORPSE FOOTPRINT
21216 *
21217 * A forked corpse gets a copy of the original VM map but its pmap is mostly
21218 * empty since it never ran and never got to fault in any pages.
21219 * Collecting footprint info (via "sysctl vm.self_region_footprint") for
21220 * a forked corpse would therefore return very little information.
21221 *
21222 * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
21223 * to vm_map_fork() to collect footprint information from the original VM map
21224 * and its pmap, and store it in the forked corpse's VM map. That information
21225 * is stored in place of the VM map's "hole list" since we'll never need to
21226 * lookup for holes in the corpse's map.
21227 *
21228 * The corpse's footprint info looks like this:
21229 *
21230 * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
21231 * as follows:
21232 * +---------------------------------------+
21233 * header-> | cf_size |
21234 * +-------------------+-------------------+
21235 * | cf_last_region | cf_last_zeroes |
21236 * +-------------------+-------------------+
21237 * region1-> | cfr_vaddr |
21238 * +-------------------+-------------------+
21239 * | cfr_num_pages | d0 | d1 | d2 | d3 |
21240 * +---------------------------------------+
21241 * | d4 | d5 | ... |
21242 * +---------------------------------------+
21243 * | ... |
21244 * +-------------------+-------------------+
21245 * | dy | dz | na | na | cfr_vaddr... | <-region2
21246 * +-------------------+-------------------+
21247 * | cfr_vaddr (ctd) | cfr_num_pages |
21248 * +---------------------------------------+
21249 * | d0 | d1 ... |
21250 * +---------------------------------------+
21251 * ...
21252 * +---------------------------------------+
21253 * last region-> | cfr_vaddr |
21254 * +---------------------------------------+
21255 * + cfr_num_pages | d0 | d1 | d2 | d3 |
21256 * +---------------------------------------+
21257 * ...
21258 * +---------------------------------------+
21259 * | dx | dy | dz | na | na | na | na | na |
21260 * +---------------------------------------+
21261 *
21262 * where:
21263 * cf_size: total size of the buffer (rounded to page size)
21264 * cf_last_region: offset in the buffer of the last "region" sub-header
21265 * cf_last_zeroes: number of trailing "zero" dispositions at the end
21266 * of last region
21267 * cfr_vaddr: virtual address of the start of the covered "region"
21268 * cfr_num_pages: number of pages in the covered "region"
21269 * d*: disposition of the page at that virtual address
21270 * Regions in the buffer are word-aligned.
21271 *
21272 * We estimate the size of the buffer based on the number of memory regions
21273 * and the virtual size of the address space. While copying each memory region
21274 * during vm_map_fork(), we also collect the footprint info for that region
21275 * and store it in the buffer, packing it as much as possible (coalescing
21276 * contiguous memory regions to avoid having too many region headers and
21277 * avoiding long streaks of "zero" page dispositions by splitting footprint
21278 * "regions", so the number of regions in the footprint buffer might not match
21279 * the number of memory regions in the address space.
21280 *
21281 * We also have to copy the original task's "nonvolatile" ledgers since that's
21282 * part of the footprint and will need to be reported to any tool asking for
21283 * the footprint information of the forked corpse.
21284 */
21285
21286 uint64_t vm_map_corpse_footprint_count = 0;
21287 uint64_t vm_map_corpse_footprint_size_avg = 0;
21288 uint64_t vm_map_corpse_footprint_size_max = 0;
21289 uint64_t vm_map_corpse_footprint_full = 0;
21290 uint64_t vm_map_corpse_footprint_no_buf = 0;
21291
21292 struct vm_map_corpse_footprint_header {
21293 vm_size_t cf_size; /* allocated buffer size */
21294 uint32_t cf_last_region; /* offset of last region in buffer */
21295 union {
21296 uint32_t cfu_last_zeroes; /* during creation:
21297 * number of "zero" dispositions at
21298 * end of last region */
21299 uint32_t cfu_hint_region; /* during lookup:
21300 * offset of last looked up region */
21301 #define cf_last_zeroes cfu.cfu_last_zeroes
21302 #define cf_hint_region cfu.cfu_hint_region
21303 } cfu;
21304 };
21305 typedef uint8_t cf_disp_t;
21306 struct vm_map_corpse_footprint_region {
21307 vm_map_offset_t cfr_vaddr; /* region start virtual address */
21308 uint32_t cfr_num_pages; /* number of pages in this "region" */
21309 cf_disp_t cfr_disposition[0]; /* disposition of each page */
21310 } __attribute__((packed));
21311
21312 static cf_disp_t
vm_page_disposition_to_cf_disp(int disposition)21313 vm_page_disposition_to_cf_disp(
21314 int disposition)
21315 {
21316 assert(sizeof(cf_disp_t) == 1);
21317 /* relocate bits that don't fit in a "uint8_t" */
21318 if (disposition & VM_PAGE_QUERY_PAGE_REUSABLE) {
21319 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
21320 }
21321 /* cast gets rid of extra bits */
21322 return (cf_disp_t) disposition;
21323 }
21324
21325 static int
vm_page_cf_disp_to_disposition(cf_disp_t cf_disp)21326 vm_page_cf_disp_to_disposition(
21327 cf_disp_t cf_disp)
21328 {
21329 int disposition;
21330
21331 assert(sizeof(cf_disp_t) == 1);
21332 disposition = (int) cf_disp;
21333 /* move relocated bits back in place */
21334 if (cf_disp & VM_PAGE_QUERY_PAGE_FICTITIOUS) {
21335 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
21336 disposition &= ~VM_PAGE_QUERY_PAGE_FICTITIOUS;
21337 }
21338 return disposition;
21339 }
21340
21341 /*
21342 * vm_map_corpse_footprint_new_region:
21343 * closes the current footprint "region" and creates a new one
21344 *
21345 * Returns NULL if there's not enough space in the buffer for a new region.
21346 */
21347 static struct vm_map_corpse_footprint_region *
vm_map_corpse_footprint_new_region(struct vm_map_corpse_footprint_header * footprint_header)21348 vm_map_corpse_footprint_new_region(
21349 struct vm_map_corpse_footprint_header *footprint_header)
21350 {
21351 uintptr_t footprint_edge;
21352 uint32_t new_region_offset;
21353 struct vm_map_corpse_footprint_region *footprint_region;
21354 struct vm_map_corpse_footprint_region *new_footprint_region;
21355
21356 footprint_edge = ((uintptr_t)footprint_header +
21357 footprint_header->cf_size);
21358 footprint_region = ((struct vm_map_corpse_footprint_region *)
21359 ((char *)footprint_header +
21360 footprint_header->cf_last_region));
21361 assert((uintptr_t)footprint_region + sizeof(*footprint_region) <=
21362 footprint_edge);
21363
21364 /* get rid of trailing zeroes in the last region */
21365 assert(footprint_region->cfr_num_pages >=
21366 footprint_header->cf_last_zeroes);
21367 footprint_region->cfr_num_pages -=
21368 footprint_header->cf_last_zeroes;
21369 footprint_header->cf_last_zeroes = 0;
21370
21371 /* reuse this region if it's now empty */
21372 if (footprint_region->cfr_num_pages == 0) {
21373 return footprint_region;
21374 }
21375
21376 /* compute offset of new region */
21377 new_region_offset = footprint_header->cf_last_region;
21378 new_region_offset += sizeof(*footprint_region);
21379 new_region_offset += (footprint_region->cfr_num_pages * sizeof(cf_disp_t));
21380 new_region_offset = roundup(new_region_offset, sizeof(int));
21381
21382 /* check if we're going over the edge */
21383 if (((uintptr_t)footprint_header +
21384 new_region_offset +
21385 sizeof(*footprint_region)) >=
21386 footprint_edge) {
21387 /* over the edge: no new region */
21388 return NULL;
21389 }
21390
21391 /* adjust offset of last region in header */
21392 footprint_header->cf_last_region = new_region_offset;
21393
21394 new_footprint_region = (struct vm_map_corpse_footprint_region *)
21395 ((char *)footprint_header +
21396 footprint_header->cf_last_region);
21397 new_footprint_region->cfr_vaddr = 0;
21398 new_footprint_region->cfr_num_pages = 0;
21399 /* caller needs to initialize new region */
21400
21401 return new_footprint_region;
21402 }
21403
21404 /*
21405 * vm_map_corpse_footprint_collect:
21406 * collect footprint information for "old_entry" in "old_map" and
21407 * stores it in "new_map"'s vmmap_footprint_info.
21408 */
21409 kern_return_t
vm_map_corpse_footprint_collect(vm_map_t old_map,vm_map_entry_t old_entry,vm_map_t new_map)21410 vm_map_corpse_footprint_collect(
21411 vm_map_t old_map,
21412 vm_map_entry_t old_entry,
21413 vm_map_t new_map)
21414 {
21415 vm_map_offset_t va;
21416 kern_return_t kr;
21417 struct vm_map_corpse_footprint_header *footprint_header;
21418 struct vm_map_corpse_footprint_region *footprint_region;
21419 struct vm_map_corpse_footprint_region *new_footprint_region;
21420 cf_disp_t *next_disp_p;
21421 uintptr_t footprint_edge;
21422 uint32_t num_pages_tmp;
21423 int effective_page_size;
21424
21425 effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(old_map));
21426
21427 va = old_entry->vme_start;
21428
21429 vm_map_lock_assert_exclusive(old_map);
21430 vm_map_lock_assert_exclusive(new_map);
21431
21432 assert(new_map->has_corpse_footprint);
21433 assert(!old_map->has_corpse_footprint);
21434 if (!new_map->has_corpse_footprint ||
21435 old_map->has_corpse_footprint) {
21436 /*
21437 * This can only transfer footprint info from a
21438 * map with a live pmap to a map with a corpse footprint.
21439 */
21440 return KERN_NOT_SUPPORTED;
21441 }
21442
21443 if (new_map->vmmap_corpse_footprint == NULL) {
21444 vm_offset_t buf;
21445 vm_size_t buf_size;
21446
21447 buf = 0;
21448 buf_size = (sizeof(*footprint_header) +
21449 (old_map->hdr.nentries
21450 *
21451 (sizeof(*footprint_region) +
21452 +3)) /* potential alignment for each region */
21453 +
21454 ((old_map->size / effective_page_size)
21455 *
21456 sizeof(cf_disp_t))); /* disposition for each page */
21457 // printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
21458 buf_size = round_page(buf_size);
21459
21460 /* limit buffer to 1 page to validate overflow detection */
21461 // buf_size = PAGE_SIZE;
21462
21463 /* limit size to a somewhat sane amount */
21464 #if XNU_TARGET_OS_OSX
21465 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (8*1024*1024) /* 8MB */
21466 #else /* XNU_TARGET_OS_OSX */
21467 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (256*1024) /* 256KB */
21468 #endif /* XNU_TARGET_OS_OSX */
21469 if (buf_size > VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE) {
21470 buf_size = VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE;
21471 }
21472
21473 /*
21474 * Allocate the pageable buffer (with a trailing guard page).
21475 * It will be zero-filled on demand.
21476 */
21477 kr = kernel_memory_allocate(kernel_map,
21478 &buf,
21479 (buf_size
21480 + PAGE_SIZE), /* trailing guard page */
21481 0, /* mask */
21482 KMA_PAGEABLE | KMA_GUARD_LAST,
21483 VM_KERN_MEMORY_DIAG);
21484 if (kr != KERN_SUCCESS) {
21485 vm_map_corpse_footprint_no_buf++;
21486 return kr;
21487 }
21488
21489 /* initialize header and 1st region */
21490 footprint_header = (struct vm_map_corpse_footprint_header *)buf;
21491 new_map->vmmap_corpse_footprint = footprint_header;
21492
21493 footprint_header->cf_size = buf_size;
21494 footprint_header->cf_last_region =
21495 sizeof(*footprint_header);
21496 footprint_header->cf_last_zeroes = 0;
21497
21498 footprint_region = (struct vm_map_corpse_footprint_region *)
21499 ((char *)footprint_header +
21500 footprint_header->cf_last_region);
21501 footprint_region->cfr_vaddr = 0;
21502 footprint_region->cfr_num_pages = 0;
21503 } else {
21504 /* retrieve header and last region */
21505 footprint_header = (struct vm_map_corpse_footprint_header *)
21506 new_map->vmmap_corpse_footprint;
21507 footprint_region = (struct vm_map_corpse_footprint_region *)
21508 ((char *)footprint_header +
21509 footprint_header->cf_last_region);
21510 }
21511 footprint_edge = ((uintptr_t)footprint_header +
21512 footprint_header->cf_size);
21513
21514 if ((footprint_region->cfr_vaddr +
21515 (((vm_map_offset_t)footprint_region->cfr_num_pages) *
21516 effective_page_size))
21517 != old_entry->vme_start) {
21518 uint64_t num_pages_delta, num_pages_delta_size;
21519 uint32_t region_offset_delta_size;
21520
21521 /*
21522 * Not the next contiguous virtual address:
21523 * start a new region or store "zero" dispositions for
21524 * the missing pages?
21525 */
21526 /* size of gap in actual page dispositions */
21527 num_pages_delta = ((old_entry->vme_start -
21528 footprint_region->cfr_vaddr) / effective_page_size)
21529 - footprint_region->cfr_num_pages;
21530 num_pages_delta_size = num_pages_delta * sizeof(cf_disp_t);
21531 /* size of gap as a new footprint region header */
21532 region_offset_delta_size =
21533 (sizeof(*footprint_region) +
21534 roundup(((footprint_region->cfr_num_pages -
21535 footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)),
21536 sizeof(int)) -
21537 ((footprint_region->cfr_num_pages -
21538 footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)));
21539 // printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
21540 if (region_offset_delta_size < num_pages_delta_size ||
21541 os_add3_overflow(footprint_region->cfr_num_pages,
21542 (uint32_t) num_pages_delta,
21543 1,
21544 &num_pages_tmp)) {
21545 /*
21546 * Storing data for this gap would take more space
21547 * than inserting a new footprint region header:
21548 * let's start a new region and save space. If it's a
21549 * tie, let's avoid using a new region, since that
21550 * would require more region hops to find the right
21551 * range during lookups.
21552 *
21553 * If the current region's cfr_num_pages would overflow
21554 * if we added "zero" page dispositions for the gap,
21555 * no choice but to start a new region.
21556 */
21557 // printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
21558 new_footprint_region =
21559 vm_map_corpse_footprint_new_region(footprint_header);
21560 /* check that we're not going over the edge */
21561 if (new_footprint_region == NULL) {
21562 goto over_the_edge;
21563 }
21564 footprint_region = new_footprint_region;
21565 /* initialize new region as empty */
21566 footprint_region->cfr_vaddr = old_entry->vme_start;
21567 footprint_region->cfr_num_pages = 0;
21568 } else {
21569 /*
21570 * Store "zero" page dispositions for the missing
21571 * pages.
21572 */
21573 // printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
21574 for (; num_pages_delta > 0; num_pages_delta--) {
21575 next_disp_p = (cf_disp_t *)
21576 ((uintptr_t) footprint_region +
21577 sizeof(*footprint_region));
21578 next_disp_p += footprint_region->cfr_num_pages;
21579 /* check that we're not going over the edge */
21580 if ((uintptr_t)next_disp_p >= footprint_edge) {
21581 goto over_the_edge;
21582 }
21583 /* store "zero" disposition for this gap page */
21584 footprint_region->cfr_num_pages++;
21585 *next_disp_p = (cf_disp_t) 0;
21586 footprint_header->cf_last_zeroes++;
21587 }
21588 }
21589 }
21590
21591 for (va = old_entry->vme_start;
21592 va < old_entry->vme_end;
21593 va += effective_page_size) {
21594 int disposition;
21595 cf_disp_t cf_disp;
21596
21597 vm_map_footprint_query_page_info(old_map,
21598 old_entry,
21599 va,
21600 &disposition);
21601 cf_disp = vm_page_disposition_to_cf_disp(disposition);
21602
21603 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
21604
21605 if (cf_disp == 0 && footprint_region->cfr_num_pages == 0) {
21606 /*
21607 * Ignore "zero" dispositions at start of
21608 * region: just move start of region.
21609 */
21610 footprint_region->cfr_vaddr += effective_page_size;
21611 continue;
21612 }
21613
21614 /* would region's cfr_num_pages overflow? */
21615 if (os_add_overflow(footprint_region->cfr_num_pages, 1,
21616 &num_pages_tmp)) {
21617 /* overflow: create a new region */
21618 new_footprint_region =
21619 vm_map_corpse_footprint_new_region(
21620 footprint_header);
21621 if (new_footprint_region == NULL) {
21622 goto over_the_edge;
21623 }
21624 footprint_region = new_footprint_region;
21625 footprint_region->cfr_vaddr = va;
21626 footprint_region->cfr_num_pages = 0;
21627 }
21628
21629 next_disp_p = (cf_disp_t *) ((uintptr_t) footprint_region +
21630 sizeof(*footprint_region));
21631 next_disp_p += footprint_region->cfr_num_pages;
21632 /* check that we're not going over the edge */
21633 if ((uintptr_t)next_disp_p >= footprint_edge) {
21634 goto over_the_edge;
21635 }
21636 /* store this dispostion */
21637 *next_disp_p = cf_disp;
21638 footprint_region->cfr_num_pages++;
21639
21640 if (cf_disp != 0) {
21641 /* non-zero disp: break the current zero streak */
21642 footprint_header->cf_last_zeroes = 0;
21643 /* done */
21644 continue;
21645 }
21646
21647 /* zero disp: add to the current streak of zeroes */
21648 footprint_header->cf_last_zeroes++;
21649 if ((footprint_header->cf_last_zeroes +
21650 roundup(((footprint_region->cfr_num_pages -
21651 footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)) &
21652 (sizeof(int) - 1),
21653 sizeof(int))) <
21654 (sizeof(*footprint_header))) {
21655 /*
21656 * There are not enough trailing "zero" dispositions
21657 * (+ the extra padding we would need for the previous
21658 * region); creating a new region would not save space
21659 * at this point, so let's keep this "zero" disposition
21660 * in this region and reconsider later.
21661 */
21662 continue;
21663 }
21664 /*
21665 * Create a new region to avoid having too many consecutive
21666 * "zero" dispositions.
21667 */
21668 new_footprint_region =
21669 vm_map_corpse_footprint_new_region(footprint_header);
21670 if (new_footprint_region == NULL) {
21671 goto over_the_edge;
21672 }
21673 footprint_region = new_footprint_region;
21674 /* initialize the new region as empty ... */
21675 footprint_region->cfr_num_pages = 0;
21676 /* ... and skip this "zero" disp */
21677 footprint_region->cfr_vaddr = va + effective_page_size;
21678 }
21679
21680 return KERN_SUCCESS;
21681
21682 over_the_edge:
21683 // printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
21684 vm_map_corpse_footprint_full++;
21685 return KERN_RESOURCE_SHORTAGE;
21686 }
21687
21688 /*
21689 * vm_map_corpse_footprint_collect_done:
21690 * completes the footprint collection by getting rid of any remaining
21691 * trailing "zero" dispositions and trimming the unused part of the
21692 * kernel buffer
21693 */
21694 void
vm_map_corpse_footprint_collect_done(vm_map_t new_map)21695 vm_map_corpse_footprint_collect_done(
21696 vm_map_t new_map)
21697 {
21698 struct vm_map_corpse_footprint_header *footprint_header;
21699 struct vm_map_corpse_footprint_region *footprint_region;
21700 vm_size_t buf_size, actual_size;
21701 kern_return_t kr;
21702
21703 assert(new_map->has_corpse_footprint);
21704 if (!new_map->has_corpse_footprint ||
21705 new_map->vmmap_corpse_footprint == NULL) {
21706 return;
21707 }
21708
21709 footprint_header = (struct vm_map_corpse_footprint_header *)
21710 new_map->vmmap_corpse_footprint;
21711 buf_size = footprint_header->cf_size;
21712
21713 footprint_region = (struct vm_map_corpse_footprint_region *)
21714 ((char *)footprint_header +
21715 footprint_header->cf_last_region);
21716
21717 /* get rid of trailing zeroes in last region */
21718 assert(footprint_region->cfr_num_pages >= footprint_header->cf_last_zeroes);
21719 footprint_region->cfr_num_pages -= footprint_header->cf_last_zeroes;
21720 footprint_header->cf_last_zeroes = 0;
21721
21722 actual_size = (vm_size_t)(footprint_header->cf_last_region +
21723 sizeof(*footprint_region) +
21724 (footprint_region->cfr_num_pages * sizeof(cf_disp_t)));
21725
21726 // printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
21727 vm_map_corpse_footprint_size_avg =
21728 (((vm_map_corpse_footprint_size_avg *
21729 vm_map_corpse_footprint_count) +
21730 actual_size) /
21731 (vm_map_corpse_footprint_count + 1));
21732 vm_map_corpse_footprint_count++;
21733 if (actual_size > vm_map_corpse_footprint_size_max) {
21734 vm_map_corpse_footprint_size_max = actual_size;
21735 }
21736
21737 actual_size = round_page(actual_size);
21738 if (buf_size > actual_size) {
21739 kr = vm_deallocate(kernel_map,
21740 ((vm_address_t)footprint_header +
21741 actual_size +
21742 PAGE_SIZE), /* trailing guard page */
21743 (buf_size - actual_size));
21744 assertf(kr == KERN_SUCCESS,
21745 "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21746 footprint_header,
21747 (uint64_t) buf_size,
21748 (uint64_t) actual_size,
21749 kr);
21750 kr = vm_protect(kernel_map,
21751 ((vm_address_t)footprint_header +
21752 actual_size),
21753 PAGE_SIZE,
21754 FALSE, /* set_maximum */
21755 VM_PROT_NONE);
21756 assertf(kr == KERN_SUCCESS,
21757 "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21758 footprint_header,
21759 (uint64_t) buf_size,
21760 (uint64_t) actual_size,
21761 kr);
21762 }
21763
21764 footprint_header->cf_size = actual_size;
21765 }
21766
21767 /*
21768 * vm_map_corpse_footprint_query_page_info:
21769 * retrieves the disposition of the page at virtual address "vaddr"
21770 * in the forked corpse's VM map
21771 *
21772 * This is the equivalent of vm_map_footprint_query_page_info() for a forked corpse.
21773 */
21774 kern_return_t
vm_map_corpse_footprint_query_page_info(vm_map_t map,vm_map_offset_t va,int * disposition_p)21775 vm_map_corpse_footprint_query_page_info(
21776 vm_map_t map,
21777 vm_map_offset_t va,
21778 int *disposition_p)
21779 {
21780 struct vm_map_corpse_footprint_header *footprint_header;
21781 struct vm_map_corpse_footprint_region *footprint_region;
21782 uint32_t footprint_region_offset;
21783 vm_map_offset_t region_start, region_end;
21784 int disp_idx;
21785 kern_return_t kr;
21786 int effective_page_size;
21787 cf_disp_t cf_disp;
21788
21789 if (!map->has_corpse_footprint) {
21790 *disposition_p = 0;
21791 kr = KERN_INVALID_ARGUMENT;
21792 goto done;
21793 }
21794
21795 footprint_header = map->vmmap_corpse_footprint;
21796 if (footprint_header == NULL) {
21797 *disposition_p = 0;
21798 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21799 kr = KERN_INVALID_ARGUMENT;
21800 goto done;
21801 }
21802
21803 /* start looking at the hint ("cf_hint_region") */
21804 footprint_region_offset = footprint_header->cf_hint_region;
21805
21806 effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(map));
21807
21808 lookup_again:
21809 if (footprint_region_offset < sizeof(*footprint_header)) {
21810 /* hint too low: start from 1st region */
21811 footprint_region_offset = sizeof(*footprint_header);
21812 }
21813 if (footprint_region_offset >= footprint_header->cf_last_region) {
21814 /* hint too high: re-start from 1st region */
21815 footprint_region_offset = sizeof(*footprint_header);
21816 }
21817 footprint_region = (struct vm_map_corpse_footprint_region *)
21818 ((char *)footprint_header + footprint_region_offset);
21819 region_start = footprint_region->cfr_vaddr;
21820 region_end = (region_start +
21821 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
21822 effective_page_size));
21823 if (va < region_start &&
21824 footprint_region_offset != sizeof(*footprint_header)) {
21825 /* our range starts before the hint region */
21826
21827 /* reset the hint (in a racy way...) */
21828 footprint_header->cf_hint_region = sizeof(*footprint_header);
21829 /* lookup "va" again from 1st region */
21830 footprint_region_offset = sizeof(*footprint_header);
21831 goto lookup_again;
21832 }
21833
21834 while (va >= region_end) {
21835 if (footprint_region_offset >= footprint_header->cf_last_region) {
21836 break;
21837 }
21838 /* skip the region's header */
21839 footprint_region_offset += sizeof(*footprint_region);
21840 /* skip the region's page dispositions */
21841 footprint_region_offset += (footprint_region->cfr_num_pages * sizeof(cf_disp_t));
21842 /* align to next word boundary */
21843 footprint_region_offset =
21844 roundup(footprint_region_offset,
21845 sizeof(int));
21846 footprint_region = (struct vm_map_corpse_footprint_region *)
21847 ((char *)footprint_header + footprint_region_offset);
21848 region_start = footprint_region->cfr_vaddr;
21849 region_end = (region_start +
21850 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
21851 effective_page_size));
21852 }
21853 if (va < region_start || va >= region_end) {
21854 /* page not found */
21855 *disposition_p = 0;
21856 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21857 kr = KERN_SUCCESS;
21858 goto done;
21859 }
21860
21861 /* "va" found: set the lookup hint for next lookup (in a racy way...) */
21862 footprint_header->cf_hint_region = footprint_region_offset;
21863
21864 /* get page disposition for "va" in this region */
21865 disp_idx = (int) ((va - footprint_region->cfr_vaddr) / effective_page_size);
21866 cf_disp = footprint_region->cfr_disposition[disp_idx];
21867 *disposition_p = vm_page_cf_disp_to_disposition(cf_disp);
21868 kr = KERN_SUCCESS;
21869 done:
21870 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21871 /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
21872 DTRACE_VM4(footprint_query_page_info,
21873 vm_map_t, map,
21874 vm_map_offset_t, va,
21875 int, *disposition_p,
21876 kern_return_t, kr);
21877
21878 return kr;
21879 }
21880
21881 void
vm_map_corpse_footprint_destroy(vm_map_t map)21882 vm_map_corpse_footprint_destroy(
21883 vm_map_t map)
21884 {
21885 if (map->has_corpse_footprint &&
21886 map->vmmap_corpse_footprint != 0) {
21887 struct vm_map_corpse_footprint_header *footprint_header;
21888 vm_size_t buf_size;
21889 kern_return_t kr;
21890
21891 footprint_header = map->vmmap_corpse_footprint;
21892 buf_size = footprint_header->cf_size;
21893 kr = vm_deallocate(kernel_map,
21894 (vm_offset_t) map->vmmap_corpse_footprint,
21895 ((vm_size_t) buf_size
21896 + PAGE_SIZE)); /* trailing guard page */
21897 assertf(kr == KERN_SUCCESS, "kr=0x%x\n", kr);
21898 map->vmmap_corpse_footprint = 0;
21899 map->has_corpse_footprint = FALSE;
21900 }
21901 }
21902
21903 /*
21904 * vm_map_copy_footprint_ledgers:
21905 * copies any ledger that's relevant to the memory footprint of "old_task"
21906 * into the forked corpse's task ("new_task")
21907 */
21908 void
vm_map_copy_footprint_ledgers(task_t old_task,task_t new_task)21909 vm_map_copy_footprint_ledgers(
21910 task_t old_task,
21911 task_t new_task)
21912 {
21913 vm_map_copy_ledger(old_task, new_task, task_ledgers.phys_footprint);
21914 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile);
21915 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile_compressed);
21916 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal);
21917 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal_compressed);
21918 vm_map_copy_ledger(old_task, new_task, task_ledgers.iokit_mapped);
21919 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting);
21920 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting_compressed);
21921 vm_map_copy_ledger(old_task, new_task, task_ledgers.page_table);
21922 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint);
21923 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint_compressed);
21924 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile);
21925 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile_compressed);
21926 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint);
21927 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint_compressed);
21928 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint);
21929 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint_compressed);
21930 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint);
21931 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint_compressed);
21932 vm_map_copy_ledger(old_task, new_task, task_ledgers.wired_mem);
21933 }
21934
21935 /*
21936 * vm_map_copy_ledger:
21937 * copy a single ledger from "old_task" to "new_task"
21938 */
21939 void
vm_map_copy_ledger(task_t old_task,task_t new_task,int ledger_entry)21940 vm_map_copy_ledger(
21941 task_t old_task,
21942 task_t new_task,
21943 int ledger_entry)
21944 {
21945 ledger_amount_t old_balance, new_balance, delta;
21946
21947 assert(new_task->map->has_corpse_footprint);
21948 if (!new_task->map->has_corpse_footprint) {
21949 return;
21950 }
21951
21952 /* turn off sanity checks for the ledger we're about to mess with */
21953 ledger_disable_panic_on_negative(new_task->ledger,
21954 ledger_entry);
21955
21956 /* adjust "new_task" to match "old_task" */
21957 ledger_get_balance(old_task->ledger,
21958 ledger_entry,
21959 &old_balance);
21960 ledger_get_balance(new_task->ledger,
21961 ledger_entry,
21962 &new_balance);
21963 if (new_balance == old_balance) {
21964 /* new == old: done */
21965 } else if (new_balance > old_balance) {
21966 /* new > old ==> new -= new - old */
21967 delta = new_balance - old_balance;
21968 ledger_debit(new_task->ledger,
21969 ledger_entry,
21970 delta);
21971 } else {
21972 /* new < old ==> new += old - new */
21973 delta = old_balance - new_balance;
21974 ledger_credit(new_task->ledger,
21975 ledger_entry,
21976 delta);
21977 }
21978 }
21979
21980 /*
21981 * vm_map_get_pmap:
21982 * returns the pmap associated with the vm_map
21983 */
21984 pmap_t
vm_map_get_pmap(vm_map_t map)21985 vm_map_get_pmap(vm_map_t map)
21986 {
21987 return vm_map_pmap(map);
21988 }
21989
21990 #if MACH_ASSERT
21991
21992 extern int pmap_ledgers_panic;
21993 extern int pmap_ledgers_panic_leeway;
21994
21995 #define LEDGER_DRIFT(__LEDGER) \
21996 int __LEDGER##_over; \
21997 ledger_amount_t __LEDGER##_over_total; \
21998 ledger_amount_t __LEDGER##_over_max; \
21999 int __LEDGER##_under; \
22000 ledger_amount_t __LEDGER##_under_total; \
22001 ledger_amount_t __LEDGER##_under_max
22002
22003 struct {
22004 uint64_t num_pmaps_checked;
22005
22006 LEDGER_DRIFT(phys_footprint);
22007 LEDGER_DRIFT(internal);
22008 LEDGER_DRIFT(internal_compressed);
22009 LEDGER_DRIFT(external);
22010 LEDGER_DRIFT(reusable);
22011 LEDGER_DRIFT(iokit_mapped);
22012 LEDGER_DRIFT(alternate_accounting);
22013 LEDGER_DRIFT(alternate_accounting_compressed);
22014 LEDGER_DRIFT(page_table);
22015 LEDGER_DRIFT(purgeable_volatile);
22016 LEDGER_DRIFT(purgeable_nonvolatile);
22017 LEDGER_DRIFT(purgeable_volatile_compressed);
22018 LEDGER_DRIFT(purgeable_nonvolatile_compressed);
22019 LEDGER_DRIFT(tagged_nofootprint);
22020 LEDGER_DRIFT(tagged_footprint);
22021 LEDGER_DRIFT(tagged_nofootprint_compressed);
22022 LEDGER_DRIFT(tagged_footprint_compressed);
22023 LEDGER_DRIFT(network_volatile);
22024 LEDGER_DRIFT(network_nonvolatile);
22025 LEDGER_DRIFT(network_volatile_compressed);
22026 LEDGER_DRIFT(network_nonvolatile_compressed);
22027 LEDGER_DRIFT(media_nofootprint);
22028 LEDGER_DRIFT(media_footprint);
22029 LEDGER_DRIFT(media_nofootprint_compressed);
22030 LEDGER_DRIFT(media_footprint_compressed);
22031 LEDGER_DRIFT(graphics_nofootprint);
22032 LEDGER_DRIFT(graphics_footprint);
22033 LEDGER_DRIFT(graphics_nofootprint_compressed);
22034 LEDGER_DRIFT(graphics_footprint_compressed);
22035 LEDGER_DRIFT(neural_nofootprint);
22036 LEDGER_DRIFT(neural_footprint);
22037 LEDGER_DRIFT(neural_nofootprint_compressed);
22038 LEDGER_DRIFT(neural_footprint_compressed);
22039 } pmap_ledgers_drift;
22040
22041 void
vm_map_pmap_check_ledgers(pmap_t pmap,ledger_t ledger,int pid,char * procname)22042 vm_map_pmap_check_ledgers(
22043 pmap_t pmap,
22044 ledger_t ledger,
22045 int pid,
22046 char *procname)
22047 {
22048 ledger_amount_t bal;
22049 boolean_t do_panic;
22050
22051 do_panic = FALSE;
22052
22053 pmap_ledgers_drift.num_pmaps_checked++;
22054
22055 #define LEDGER_CHECK_BALANCE(__LEDGER) \
22056 MACRO_BEGIN \
22057 int panic_on_negative = TRUE; \
22058 ledger_get_balance(ledger, \
22059 task_ledgers.__LEDGER, \
22060 &bal); \
22061 ledger_get_panic_on_negative(ledger, \
22062 task_ledgers.__LEDGER, \
22063 &panic_on_negative); \
22064 if (bal != 0) { \
22065 if (panic_on_negative || \
22066 (pmap_ledgers_panic && \
22067 pmap_ledgers_panic_leeway > 0 && \
22068 (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) || \
22069 bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
22070 do_panic = TRUE; \
22071 } \
22072 printf("LEDGER BALANCE proc %d (%s) " \
22073 "\"%s\" = %lld\n", \
22074 pid, procname, #__LEDGER, bal); \
22075 if (bal > 0) { \
22076 pmap_ledgers_drift.__LEDGER##_over++; \
22077 pmap_ledgers_drift.__LEDGER##_over_total += bal; \
22078 if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
22079 pmap_ledgers_drift.__LEDGER##_over_max = bal; \
22080 } \
22081 } else if (bal < 0) { \
22082 pmap_ledgers_drift.__LEDGER##_under++; \
22083 pmap_ledgers_drift.__LEDGER##_under_total += bal; \
22084 if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
22085 pmap_ledgers_drift.__LEDGER##_under_max = bal; \
22086 } \
22087 } \
22088 } \
22089 MACRO_END
22090
22091 LEDGER_CHECK_BALANCE(phys_footprint);
22092 LEDGER_CHECK_BALANCE(internal);
22093 LEDGER_CHECK_BALANCE(internal_compressed);
22094 LEDGER_CHECK_BALANCE(external);
22095 LEDGER_CHECK_BALANCE(reusable);
22096 LEDGER_CHECK_BALANCE(iokit_mapped);
22097 LEDGER_CHECK_BALANCE(alternate_accounting);
22098 LEDGER_CHECK_BALANCE(alternate_accounting_compressed);
22099 LEDGER_CHECK_BALANCE(page_table);
22100 LEDGER_CHECK_BALANCE(purgeable_volatile);
22101 LEDGER_CHECK_BALANCE(purgeable_nonvolatile);
22102 LEDGER_CHECK_BALANCE(purgeable_volatile_compressed);
22103 LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed);
22104 LEDGER_CHECK_BALANCE(tagged_nofootprint);
22105 LEDGER_CHECK_BALANCE(tagged_footprint);
22106 LEDGER_CHECK_BALANCE(tagged_nofootprint_compressed);
22107 LEDGER_CHECK_BALANCE(tagged_footprint_compressed);
22108 LEDGER_CHECK_BALANCE(network_volatile);
22109 LEDGER_CHECK_BALANCE(network_nonvolatile);
22110 LEDGER_CHECK_BALANCE(network_volatile_compressed);
22111 LEDGER_CHECK_BALANCE(network_nonvolatile_compressed);
22112 LEDGER_CHECK_BALANCE(media_nofootprint);
22113 LEDGER_CHECK_BALANCE(media_footprint);
22114 LEDGER_CHECK_BALANCE(media_nofootprint_compressed);
22115 LEDGER_CHECK_BALANCE(media_footprint_compressed);
22116 LEDGER_CHECK_BALANCE(graphics_nofootprint);
22117 LEDGER_CHECK_BALANCE(graphics_footprint);
22118 LEDGER_CHECK_BALANCE(graphics_nofootprint_compressed);
22119 LEDGER_CHECK_BALANCE(graphics_footprint_compressed);
22120 LEDGER_CHECK_BALANCE(neural_nofootprint);
22121 LEDGER_CHECK_BALANCE(neural_footprint);
22122 LEDGER_CHECK_BALANCE(neural_nofootprint_compressed);
22123 LEDGER_CHECK_BALANCE(neural_footprint_compressed);
22124
22125 if (do_panic) {
22126 if (pmap_ledgers_panic) {
22127 panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers",
22128 pmap, pid, procname);
22129 } else {
22130 printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
22131 pmap, pid, procname);
22132 }
22133 }
22134 }
22135 #endif /* MACH_ASSERT */
22136