1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66 #include <mach_assert.h>
67
68 #include <vm/vm_options.h>
69
70 #include <libkern/OSAtomic.h>
71
72 #include <mach/kern_return.h>
73 #include <mach/port.h>
74 #include <mach/vm_attributes.h>
75 #include <mach/vm_param.h>
76 #include <mach/vm_behavior.h>
77 #include <mach/vm_statistics.h>
78 #include <mach/memory_object.h>
79 #include <mach/mach_vm.h>
80 #include <machine/cpu_capabilities.h>
81 #include <mach/sdt.h>
82
83 #include <kern/assert.h>
84 #include <kern/backtrace.h>
85 #include <kern/counter.h>
86 #include <kern/exc_guard.h>
87 #include <kern/kalloc.h>
88 #include <kern/zalloc_internal.h>
89
90 #include <vm/cpm.h>
91 #include <vm/vm_compressor.h>
92 #include <vm/vm_compressor_pager.h>
93 #include <vm/vm_init.h>
94 #include <vm/vm_fault.h>
95 #include <vm/vm_map.h>
96 #include <vm/vm_object.h>
97 #include <vm/vm_page.h>
98 #include <vm/vm_pageout.h>
99 #include <vm/pmap.h>
100 #include <vm/vm_kern.h>
101 #include <ipc/ipc_port.h>
102 #include <kern/sched_prim.h>
103 #include <kern/misc_protos.h>
104
105 #include <mach/vm_map_server.h>
106 #include <mach/mach_host_server.h>
107 #include <vm/vm_protos.h>
108 #include <vm/vm_purgeable_internal.h>
109
110 #include <vm/vm_protos.h>
111 #include <vm/vm_shared_region.h>
112 #include <vm/vm_map_store.h>
113
114 #include <san/kasan.h>
115
116 #include <sys/resource.h>
117 #include <sys/codesign.h>
118 #include <sys/mman.h>
119 #include <sys/reboot.h>
120 #include <sys/kdebug_triage.h>
121
122 #if __LP64__
123 #define HAVE_VM_MAP_RESERVED_ENTRY_ZONE 0
124 #else
125 #define HAVE_VM_MAP_RESERVED_ENTRY_ZONE 1
126 #endif
127
128 #include <libkern/section_keywords.h>
129 #if DEVELOPMENT || DEBUG
130 extern int proc_selfcsflags(void);
131 int panic_on_unsigned_execute = 0;
132 int panic_on_mlock_failure = 0;
133 #endif /* DEVELOPMENT || DEBUG */
134
135 #if MACH_ASSERT
136 int debug4k_filter = 0;
137 char debug4k_proc_name[1024] = "";
138 int debug4k_proc_filter = (int)-1 & ~(1 << __DEBUG4K_FAULT);
139 int debug4k_panic_on_misaligned_sharing = 0;
140 const char *debug4k_category_name[] = {
141 "error", /* 0 */
142 "life", /* 1 */
143 "load", /* 2 */
144 "fault", /* 3 */
145 "copy", /* 4 */
146 "share", /* 5 */
147 "adjust", /* 6 */
148 "pmap", /* 7 */
149 "mementry", /* 8 */
150 "iokit", /* 9 */
151 "upl", /* 10 */
152 "exc", /* 11 */
153 "vfs" /* 12 */
154 };
155 #endif /* MACH_ASSERT */
156 int debug4k_no_cow_copyin = 0;
157
158
159 #if __arm64__
160 extern const int fourk_binary_compatibility_unsafe;
161 extern const int fourk_binary_compatibility_allow_wx;
162 #endif /* __arm64__ */
163 extern int proc_selfpid(void);
164 extern char *proc_name_address(void *p);
165
166 #if VM_MAP_DEBUG_APPLE_PROTECT
167 int vm_map_debug_apple_protect = 0;
168 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
169 #if VM_MAP_DEBUG_FOURK
170 int vm_map_debug_fourk = 0;
171 #endif /* VM_MAP_DEBUG_FOURK */
172
173 SECURITY_READ_ONLY_LATE(int) vm_map_executable_immutable = 1;
174 int vm_map_executable_immutable_verbose = 0;
175
176 os_refgrp_decl(static, map_refgrp, "vm_map", NULL);
177
178 extern u_int32_t random(void); /* from <libkern/libkern.h> */
179 /* Internal prototypes
180 */
181
182 static void vm_map_simplify_range(
183 vm_map_t map,
184 vm_map_offset_t start,
185 vm_map_offset_t end); /* forward */
186
187 static boolean_t vm_map_range_check(
188 vm_map_t map,
189 vm_map_offset_t start,
190 vm_map_offset_t end,
191 vm_map_entry_t *entry);
192
193 static vm_map_entry_t _vm_map_entry_create(
194 struct vm_map_header *map_header, boolean_t map_locked);
195
196 static void _vm_map_entry_dispose(
197 struct vm_map_header *map_header,
198 vm_map_entry_t entry);
199
200 static void vm_map_pmap_enter(
201 vm_map_t map,
202 vm_map_offset_t addr,
203 vm_map_offset_t end_addr,
204 vm_object_t object,
205 vm_object_offset_t offset,
206 vm_prot_t protection);
207
208 static void _vm_map_clip_end(
209 struct vm_map_header *map_header,
210 vm_map_entry_t entry,
211 vm_map_offset_t end);
212
213 static void _vm_map_clip_start(
214 struct vm_map_header *map_header,
215 vm_map_entry_t entry,
216 vm_map_offset_t start);
217
218 static void vm_map_entry_delete(
219 vm_map_t map,
220 vm_map_entry_t entry);
221
222 static kern_return_t vm_map_delete(
223 vm_map_t map,
224 vm_map_offset_t start,
225 vm_map_offset_t end,
226 int flags,
227 vm_map_t zap_map);
228
229 static void vm_map_copy_insert(
230 vm_map_t map,
231 vm_map_entry_t after_where,
232 vm_map_copy_t copy);
233
234 static kern_return_t vm_map_copy_overwrite_unaligned(
235 vm_map_t dst_map,
236 vm_map_entry_t entry,
237 vm_map_copy_t copy,
238 vm_map_address_t start,
239 boolean_t discard_on_success);
240
241 static kern_return_t vm_map_copy_overwrite_aligned(
242 vm_map_t dst_map,
243 vm_map_entry_t tmp_entry,
244 vm_map_copy_t copy,
245 vm_map_offset_t start,
246 pmap_t pmap);
247
248 static kern_return_t vm_map_copyin_kernel_buffer(
249 vm_map_t src_map,
250 vm_map_address_t src_addr,
251 vm_map_size_t len,
252 boolean_t src_destroy,
253 vm_map_copy_t *copy_result); /* OUT */
254
255 static kern_return_t vm_map_copyout_kernel_buffer(
256 vm_map_t map,
257 vm_map_address_t *addr, /* IN/OUT */
258 vm_map_copy_t copy,
259 vm_map_size_t copy_size,
260 boolean_t overwrite,
261 boolean_t consume_on_success);
262
263 static void vm_map_fork_share(
264 vm_map_t old_map,
265 vm_map_entry_t old_entry,
266 vm_map_t new_map);
267
268 static boolean_t vm_map_fork_copy(
269 vm_map_t old_map,
270 vm_map_entry_t *old_entry_p,
271 vm_map_t new_map,
272 int vm_map_copyin_flags);
273
274 static kern_return_t vm_map_wire_nested(
275 vm_map_t map,
276 vm_map_offset_t start,
277 vm_map_offset_t end,
278 vm_prot_t caller_prot,
279 vm_tag_t tag,
280 boolean_t user_wire,
281 pmap_t map_pmap,
282 vm_map_offset_t pmap_addr,
283 ppnum_t *physpage_p);
284
285 static kern_return_t vm_map_unwire_nested(
286 vm_map_t map,
287 vm_map_offset_t start,
288 vm_map_offset_t end,
289 boolean_t user_wire,
290 pmap_t map_pmap,
291 vm_map_offset_t pmap_addr);
292
293 static kern_return_t vm_map_overwrite_submap_recurse(
294 vm_map_t dst_map,
295 vm_map_offset_t dst_addr,
296 vm_map_size_t dst_size);
297
298 static kern_return_t vm_map_copy_overwrite_nested(
299 vm_map_t dst_map,
300 vm_map_offset_t dst_addr,
301 vm_map_copy_t copy,
302 boolean_t interruptible,
303 pmap_t pmap,
304 boolean_t discard_on_success);
305
306 static kern_return_t vm_map_remap_extract(
307 vm_map_t map,
308 vm_map_offset_t addr,
309 vm_map_size_t size,
310 boolean_t copy,
311 struct vm_map_header *map_header,
312 vm_prot_t *cur_protection,
313 vm_prot_t *max_protection,
314 vm_inherit_t inheritance,
315 vm_map_kernel_flags_t vmk_flags);
316
317 static kern_return_t vm_map_remap_range_allocate(
318 vm_map_t map,
319 vm_map_address_t *address,
320 vm_map_size_t size,
321 vm_map_offset_t mask,
322 int flags,
323 vm_map_kernel_flags_t vmk_flags,
324 vm_tag_t tag,
325 vm_map_entry_t *map_entry);
326
327 static void vm_map_region_look_for_page(
328 vm_map_t map,
329 vm_map_offset_t va,
330 vm_object_t object,
331 vm_object_offset_t offset,
332 int max_refcnt,
333 unsigned short depth,
334 vm_region_extended_info_t extended,
335 mach_msg_type_number_t count);
336
337 static int vm_map_region_count_obj_refs(
338 vm_map_entry_t entry,
339 vm_object_t object);
340
341
342 static kern_return_t vm_map_willneed(
343 vm_map_t map,
344 vm_map_offset_t start,
345 vm_map_offset_t end);
346
347 static kern_return_t vm_map_reuse_pages(
348 vm_map_t map,
349 vm_map_offset_t start,
350 vm_map_offset_t end);
351
352 static kern_return_t vm_map_reusable_pages(
353 vm_map_t map,
354 vm_map_offset_t start,
355 vm_map_offset_t end);
356
357 static kern_return_t vm_map_can_reuse(
358 vm_map_t map,
359 vm_map_offset_t start,
360 vm_map_offset_t end);
361
362 #if MACH_ASSERT
363 static kern_return_t vm_map_pageout(
364 vm_map_t map,
365 vm_map_offset_t start,
366 vm_map_offset_t end);
367 #endif /* MACH_ASSERT */
368
369 kern_return_t vm_map_corpse_footprint_collect(
370 vm_map_t old_map,
371 vm_map_entry_t old_entry,
372 vm_map_t new_map);
373 void vm_map_corpse_footprint_collect_done(
374 vm_map_t new_map);
375 void vm_map_corpse_footprint_destroy(
376 vm_map_t map);
377 kern_return_t vm_map_corpse_footprint_query_page_info(
378 vm_map_t map,
379 vm_map_offset_t va,
380 int *disposition_p);
381 void vm_map_footprint_query_page_info(
382 vm_map_t map,
383 vm_map_entry_t map_entry,
384 vm_map_offset_t curr_s_offset,
385 int *disposition_p);
386
387 pid_t find_largest_process_vm_map_entries(void);
388
389 extern int exit_with_guard_exception(void *p, mach_exception_data_type_t code,
390 mach_exception_data_type_t subcode);
391
392 /*
393 * Macros to copy a vm_map_entry. We must be careful to correctly
394 * manage the wired page count. vm_map_entry_copy() creates a new
395 * map entry to the same memory - the wired count in the new entry
396 * must be set to zero. vm_map_entry_copy_full() creates a new
397 * entry that is identical to the old entry. This preserves the
398 * wire count; it's used for map splitting and zone changing in
399 * vm_map_copyout.
400 */
401
402 static inline void
vm_map_entry_copy_pmap_cs_assoc(vm_map_t map __unused,vm_map_entry_t new __unused,vm_map_entry_t old __unused)403 vm_map_entry_copy_pmap_cs_assoc(
404 vm_map_t map __unused,
405 vm_map_entry_t new __unused,
406 vm_map_entry_t old __unused)
407 {
408 /* when pmap_cs is not enabled, assert as a sanity check */
409 assert(new->pmap_cs_associated == FALSE);
410 }
411
412 /*
413 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
414 * But for security reasons on some platforms, we don't want the
415 * new mapping to be "used for jit", so we reset the flag here.
416 */
417 static inline void
vm_map_entry_copy_code_signing(vm_map_t map,vm_map_entry_t new,vm_map_entry_t old __unused)418 vm_map_entry_copy_code_signing(
419 vm_map_t map,
420 vm_map_entry_t new,
421 vm_map_entry_t old __unused)
422 {
423 if (VM_MAP_POLICY_ALLOW_JIT_COPY(map)) {
424 assert(new->used_for_jit == old->used_for_jit);
425 } else {
426 new->used_for_jit = FALSE;
427 }
428 }
429
430 static inline void
vm_map_entry_copy(vm_map_t map,vm_map_entry_t new,vm_map_entry_t old)431 vm_map_entry_copy(
432 vm_map_t map,
433 vm_map_entry_t new,
434 vm_map_entry_t old)
435 {
436 *new = *old;
437 new->is_shared = FALSE;
438 new->needs_wakeup = FALSE;
439 new->in_transition = FALSE;
440 new->wired_count = 0;
441 new->user_wired_count = 0;
442 new->permanent = FALSE;
443 vm_map_entry_copy_code_signing(map, new, old);
444 vm_map_entry_copy_pmap_cs_assoc(map, new, old);
445 if (new->iokit_acct) {
446 assertf(!new->use_pmap, "old %p new %p\n", old, new);
447 new->iokit_acct = FALSE;
448 new->use_pmap = TRUE;
449 }
450 new->vme_resilient_codesign = FALSE;
451 new->vme_resilient_media = FALSE;
452 new->vme_atomic = FALSE;
453 new->vme_no_copy_on_read = FALSE;
454 }
455
456 static inline void
vm_map_entry_copy_full(vm_map_entry_t new,vm_map_entry_t old)457 vm_map_entry_copy_full(
458 vm_map_entry_t new,
459 vm_map_entry_t old)
460 {
461 *new = *old;
462 }
463
464 /*
465 * Normal lock_read_to_write() returns FALSE/0 on failure.
466 * These functions evaluate to zero on success and non-zero value on failure.
467 */
468 __attribute__((always_inline))
469 int
vm_map_lock_read_to_write(vm_map_t map)470 vm_map_lock_read_to_write(vm_map_t map)
471 {
472 if (lck_rw_lock_shared_to_exclusive(&(map)->lock)) {
473 DTRACE_VM(vm_map_lock_upgrade);
474 return 0;
475 }
476 return 1;
477 }
478
479 __attribute__((always_inline))
480 boolean_t
vm_map_try_lock(vm_map_t map)481 vm_map_try_lock(vm_map_t map)
482 {
483 if (lck_rw_try_lock_exclusive(&(map)->lock)) {
484 DTRACE_VM(vm_map_lock_w);
485 return TRUE;
486 }
487 return FALSE;
488 }
489
490 __attribute__((always_inline))
491 boolean_t
vm_map_try_lock_read(vm_map_t map)492 vm_map_try_lock_read(vm_map_t map)
493 {
494 if (lck_rw_try_lock_shared(&(map)->lock)) {
495 DTRACE_VM(vm_map_lock_r);
496 return TRUE;
497 }
498 return FALSE;
499 }
500
501 /*
502 * Routines to get the page size the caller should
503 * use while inspecting the target address space.
504 * Use the "_safely" variant if the caller is dealing with a user-provided
505 * array whose size depends on the page size, to avoid any overflow or
506 * underflow of a user-allocated buffer.
507 */
508 int
vm_self_region_page_shift_safely(vm_map_t target_map)509 vm_self_region_page_shift_safely(
510 vm_map_t target_map)
511 {
512 int effective_page_shift = 0;
513
514 if (PAGE_SIZE == (4096)) {
515 /* x86_64 and 4k watches: always use 4k */
516 return PAGE_SHIFT;
517 }
518 /* did caller provide an explicit page size for this thread to use? */
519 effective_page_shift = thread_self_region_page_shift();
520 if (effective_page_shift) {
521 /* use the explicitly-provided page size */
522 return effective_page_shift;
523 }
524 /* no explicit page size: use the caller's page size... */
525 effective_page_shift = VM_MAP_PAGE_SHIFT(current_map());
526 if (effective_page_shift == VM_MAP_PAGE_SHIFT(target_map)) {
527 /* page size match: safe to use */
528 return effective_page_shift;
529 }
530 /* page size mismatch */
531 return -1;
532 }
533 int
vm_self_region_page_shift(vm_map_t target_map)534 vm_self_region_page_shift(
535 vm_map_t target_map)
536 {
537 int effective_page_shift;
538
539 effective_page_shift = vm_self_region_page_shift_safely(target_map);
540 if (effective_page_shift == -1) {
541 /* no safe value but OK to guess for caller */
542 effective_page_shift = MIN(VM_MAP_PAGE_SHIFT(current_map()),
543 VM_MAP_PAGE_SHIFT(target_map));
544 }
545 return effective_page_shift;
546 }
547
548
549 /*
550 * Decide if we want to allow processes to execute from their data or stack areas.
551 * override_nx() returns true if we do. Data/stack execution can be enabled independently
552 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
553 * or allow_stack_exec to enable data execution for that type of data area for that particular
554 * ABI (or both by or'ing the flags together). These are initialized in the architecture
555 * specific pmap files since the default behavior varies according to architecture. The
556 * main reason it varies is because of the need to provide binary compatibility with old
557 * applications that were written before these restrictions came into being. In the old
558 * days, an app could execute anything it could read, but this has slowly been tightened
559 * up over time. The default behavior is:
560 *
561 * 32-bit PPC apps may execute from both stack and data areas
562 * 32-bit Intel apps may exeucte from data areas but not stack
563 * 64-bit PPC/Intel apps may not execute from either data or stack
564 *
565 * An application on any architecture may override these defaults by explicitly
566 * adding PROT_EXEC permission to the page in question with the mprotect(2)
567 * system call. This code here just determines what happens when an app tries to
568 * execute from a page that lacks execute permission.
569 *
570 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
571 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
572 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
573 * execution from data areas for a particular binary even if the arch normally permits it. As
574 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
575 * to support some complicated use cases, notably browsers with out-of-process plugins that
576 * are not all NX-safe.
577 */
578
579 extern int allow_data_exec, allow_stack_exec;
580
581 int
override_nx(vm_map_t map,uint32_t user_tag)582 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
583 {
584 int current_abi;
585
586 if (map->pmap == kernel_pmap) {
587 return FALSE;
588 }
589
590 /*
591 * Determine if the app is running in 32 or 64 bit mode.
592 */
593
594 if (vm_map_is_64bit(map)) {
595 current_abi = VM_ABI_64;
596 } else {
597 current_abi = VM_ABI_32;
598 }
599
600 /*
601 * Determine if we should allow the execution based on whether it's a
602 * stack or data area and the current architecture.
603 */
604
605 if (user_tag == VM_MEMORY_STACK) {
606 return allow_stack_exec & current_abi;
607 }
608
609 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
610 }
611
612
613 /*
614 * Virtual memory maps provide for the mapping, protection,
615 * and sharing of virtual memory objects. In addition,
616 * this module provides for an efficient virtual copy of
617 * memory from one map to another.
618 *
619 * Synchronization is required prior to most operations.
620 *
621 * Maps consist of an ordered doubly-linked list of simple
622 * entries; a single hint is used to speed up lookups.
623 *
624 * Sharing maps have been deleted from this version of Mach.
625 * All shared objects are now mapped directly into the respective
626 * maps. This requires a change in the copy on write strategy;
627 * the asymmetric (delayed) strategy is used for shared temporary
628 * objects instead of the symmetric (shadow) strategy. All maps
629 * are now "top level" maps (either task map, kernel map or submap
630 * of the kernel map).
631 *
632 * Since portions of maps are specified by start/end addreses,
633 * which may not align with existing map entries, all
634 * routines merely "clip" entries to these start/end values.
635 * [That is, an entry is split into two, bordering at a
636 * start or end value.] Note that these clippings may not
637 * always be necessary (as the two resulting entries are then
638 * not changed); however, the clipping is done for convenience.
639 * No attempt is currently made to "glue back together" two
640 * abutting entries.
641 *
642 * The symmetric (shadow) copy strategy implements virtual copy
643 * by copying VM object references from one map to
644 * another, and then marking both regions as copy-on-write.
645 * It is important to note that only one writeable reference
646 * to a VM object region exists in any map when this strategy
647 * is used -- this means that shadow object creation can be
648 * delayed until a write operation occurs. The symmetric (delayed)
649 * strategy allows multiple maps to have writeable references to
650 * the same region of a vm object, and hence cannot delay creating
651 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
652 * Copying of permanent objects is completely different; see
653 * vm_object_copy_strategically() in vm_object.c.
654 */
655
656 static SECURITY_READ_ONLY_LATE(zone_t) vm_map_zone; /* zone for vm_map structures */
657 static SECURITY_READ_ONLY_LATE(zone_t) vm_map_copy_zone; /* zone for vm_map_copy structures */
658
659 SECURITY_READ_ONLY_LATE(zone_t) vm_map_entry_zone; /* zone for vm_map_entry structures */
660 SECURITY_READ_ONLY_LATE(zone_t) vm_map_holes_zone; /* zone for vm map holes (vm_map_links) structures */
661 #if HAVE_VM_MAP_RESERVED_ENTRY_ZONE
662 SECURITY_READ_ONLY_LATE(zone_t) vm_map_entry_reserved_zone;
663 #endif /* HAVE_VM_MAP_RESERVED_ENTRY_ZONE */
664
665 #define VM_MAP_ZONE_NAME "maps"
666 #define VM_MAP_ZFLAGS ( \
667 ZC_NOENCRYPT | \
668 ZC_NOGZALLOC | \
669 ZC_ALLOW_FOREIGN)
670
671 #define VM_MAP_ENTRY_ZONE_NAME "VM map entries"
672 #define VM_MAP_ENTRY_ZFLAGS ( \
673 ZC_NOENCRYPT | \
674 ZC_CACHING | \
675 ZC_NOGZALLOC | \
676 ZC_KASAN_NOQUARANTINE | \
677 ZC_VM_LP64 | \
678 ZC_ALLOW_FOREIGN)
679
680 #if HAVE_VM_MAP_RESERVED_ENTRY_ZONE
681 #define VM_MAP_ENTRY_RESERVED_ZONE_NAME "Reserved VM map entries"
682 #define VM_MAP_ENTRY_RESERVED_ZFLAGS ( \
683 ZC_NOENCRYPT | \
684 ZC_NOCACHING | \
685 ZC_NOGZALLOC | \
686 ZC_KASAN_NOQUARANTINE | \
687 ZC_VM)
688 #endif /* HAVE_VM_MAP_RESERVED_ENTRY_ZONE */
689
690 #define VM_MAP_HOLES_ZONE_NAME "VM map holes"
691 #define VM_MAP_HOLES_ZFLAGS ( \
692 ZC_NOENCRYPT | \
693 ZC_CACHING | \
694 ZC_NOGZALLOC | \
695 ZC_KASAN_NOQUARANTINE | \
696 ZC_VM_LP64 | \
697 ZC_ALLOW_FOREIGN)
698
699 /*
700 * Asserts that a vm_map_copy object is coming from the
701 * vm_map_copy_zone to ensure that it isn't a fake constructed
702 * anywhere else.
703 */
704 static inline void
vm_map_copy_require(struct vm_map_copy * copy)705 vm_map_copy_require(struct vm_map_copy *copy)
706 {
707 zone_id_require(ZONE_ID_VM_MAP_COPY, sizeof(struct vm_map_copy), copy);
708 }
709
710 /*
711 * vm_map_require:
712 *
713 * Ensures that the argument is memory allocated from the genuine
714 * vm map zone. (See zone_id_require_allow_foreign).
715 */
716 void
vm_map_require(vm_map_t map)717 vm_map_require(vm_map_t map)
718 {
719 zone_id_require_allow_foreign(ZONE_ID_VM_MAP, sizeof(struct _vm_map), map);
720 }
721
722 static __startup_data vm_offset_t map_data;
723 static __startup_data vm_size_t map_data_size;
724 static __startup_data vm_offset_t kentry_data;
725 static __startup_data vm_size_t kentry_data_size;
726 static __startup_data vm_offset_t map_holes_data;
727 static __startup_data vm_size_t map_holes_data_size;
728
729 #if XNU_TARGET_OS_OSX
730 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
731 #else /* XNU_TARGET_OS_OSX */
732 #define NO_COALESCE_LIMIT 0
733 #endif /* XNU_TARGET_OS_OSX */
734
735 /* Skip acquiring locks if we're in the midst of a kernel core dump */
736 unsigned int not_in_kdp = 1;
737
738 unsigned int vm_map_set_cache_attr_count = 0;
739
740 kern_return_t
vm_map_set_cache_attr(vm_map_t map,vm_map_offset_t va)741 vm_map_set_cache_attr(
742 vm_map_t map,
743 vm_map_offset_t va)
744 {
745 vm_map_entry_t map_entry;
746 vm_object_t object;
747 kern_return_t kr = KERN_SUCCESS;
748
749 vm_map_lock_read(map);
750
751 if (!vm_map_lookup_entry(map, va, &map_entry) ||
752 map_entry->is_sub_map) {
753 /*
754 * that memory is not properly mapped
755 */
756 kr = KERN_INVALID_ARGUMENT;
757 goto done;
758 }
759 object = VME_OBJECT(map_entry);
760
761 if (object == VM_OBJECT_NULL) {
762 /*
763 * there should be a VM object here at this point
764 */
765 kr = KERN_INVALID_ARGUMENT;
766 goto done;
767 }
768 vm_object_lock(object);
769 object->set_cache_attr = TRUE;
770 vm_object_unlock(object);
771
772 vm_map_set_cache_attr_count++;
773 done:
774 vm_map_unlock_read(map);
775
776 return kr;
777 }
778
779
780 #if CONFIG_CODE_DECRYPTION
781 /*
782 * vm_map_apple_protected:
783 * This remaps the requested part of the object with an object backed by
784 * the decrypting pager.
785 * crypt_info contains entry points and session data for the crypt module.
786 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
787 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
788 */
789 kern_return_t
vm_map_apple_protected(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_object_offset_t crypto_backing_offset,struct pager_crypt_info * crypt_info,uint32_t cryptid)790 vm_map_apple_protected(
791 vm_map_t map,
792 vm_map_offset_t start,
793 vm_map_offset_t end,
794 vm_object_offset_t crypto_backing_offset,
795 struct pager_crypt_info *crypt_info,
796 uint32_t cryptid)
797 {
798 boolean_t map_locked;
799 kern_return_t kr;
800 vm_map_entry_t map_entry;
801 struct vm_map_entry tmp_entry;
802 memory_object_t unprotected_mem_obj;
803 vm_object_t protected_object;
804 vm_map_offset_t map_addr;
805 vm_map_offset_t start_aligned, end_aligned;
806 vm_object_offset_t crypto_start, crypto_end;
807 int vm_flags;
808 vm_map_kernel_flags_t vmk_flags;
809 boolean_t cache_pager;
810
811 vm_flags = 0;
812 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
813
814 map_locked = FALSE;
815 unprotected_mem_obj = MEMORY_OBJECT_NULL;
816
817 start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
818 end_aligned = vm_map_round_page(end, PAGE_MASK_64);
819 start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
820 end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
821
822 #if __arm64__
823 /*
824 * "start" and "end" might be 4K-aligned but not 16K-aligned,
825 * so we might have to loop and establish up to 3 mappings:
826 *
827 * + the first 16K-page, which might overlap with the previous
828 * 4K-aligned mapping,
829 * + the center,
830 * + the last 16K-page, which might overlap with the next
831 * 4K-aligned mapping.
832 * Each of these mapping might be backed by a vnode pager (if
833 * properly page-aligned) or a "fourk_pager", itself backed by a
834 * vnode pager (if 4K-aligned but not page-aligned).
835 */
836 #endif /* __arm64__ */
837
838 map_addr = start_aligned;
839 for (map_addr = start_aligned;
840 map_addr < end;
841 map_addr = tmp_entry.vme_end) {
842 vm_map_lock(map);
843 map_locked = TRUE;
844
845 /* lookup the protected VM object */
846 if (!vm_map_lookup_entry(map,
847 map_addr,
848 &map_entry) ||
849 map_entry->is_sub_map ||
850 VME_OBJECT(map_entry) == VM_OBJECT_NULL) {
851 /* that memory is not properly mapped */
852 kr = KERN_INVALID_ARGUMENT;
853 goto done;
854 }
855
856 /* ensure mapped memory is mapped as executable except
857 * except for model decryption flow */
858 if ((cryptid != CRYPTID_MODEL_ENCRYPTION) &&
859 !(map_entry->protection & VM_PROT_EXECUTE)) {
860 kr = KERN_INVALID_ARGUMENT;
861 goto done;
862 }
863
864 /* get the protected object to be decrypted */
865 protected_object = VME_OBJECT(map_entry);
866 if (protected_object == VM_OBJECT_NULL) {
867 /* there should be a VM object here at this point */
868 kr = KERN_INVALID_ARGUMENT;
869 goto done;
870 }
871 /* ensure protected object stays alive while map is unlocked */
872 vm_object_reference(protected_object);
873
874 /* limit the map entry to the area we want to cover */
875 vm_map_clip_start(map, map_entry, start_aligned);
876 vm_map_clip_end(map, map_entry, end_aligned);
877
878 tmp_entry = *map_entry;
879 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
880 vm_map_unlock(map);
881 map_locked = FALSE;
882
883 /*
884 * This map entry might be only partially encrypted
885 * (if not fully "page-aligned").
886 */
887 crypto_start = 0;
888 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
889 if (tmp_entry.vme_start < start) {
890 if (tmp_entry.vme_start != start_aligned) {
891 kr = KERN_INVALID_ADDRESS;
892 }
893 crypto_start += (start - tmp_entry.vme_start);
894 }
895 if (tmp_entry.vme_end > end) {
896 if (tmp_entry.vme_end != end_aligned) {
897 kr = KERN_INVALID_ADDRESS;
898 }
899 crypto_end -= (tmp_entry.vme_end - end);
900 }
901
902 /*
903 * This "extra backing offset" is needed to get the decryption
904 * routine to use the right key. It adjusts for the possibly
905 * relative offset of an interposed "4K" pager...
906 */
907 if (crypto_backing_offset == (vm_object_offset_t) -1) {
908 crypto_backing_offset = VME_OFFSET(&tmp_entry);
909 }
910
911 cache_pager = TRUE;
912 #if XNU_TARGET_OS_OSX
913 if (vm_map_is_alien(map)) {
914 cache_pager = FALSE;
915 }
916 #endif /* XNU_TARGET_OS_OSX */
917
918 /*
919 * Lookup (and create if necessary) the protected memory object
920 * matching that VM object.
921 * If successful, this also grabs a reference on the memory object,
922 * to guarantee that it doesn't go away before we get a chance to map
923 * it.
924 */
925 unprotected_mem_obj = apple_protect_pager_setup(
926 protected_object,
927 VME_OFFSET(&tmp_entry),
928 crypto_backing_offset,
929 crypt_info,
930 crypto_start,
931 crypto_end,
932 cache_pager);
933
934 /* release extra ref on protected object */
935 vm_object_deallocate(protected_object);
936
937 if (unprotected_mem_obj == NULL) {
938 kr = KERN_FAILURE;
939 goto done;
940 }
941
942 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
943 /* can overwrite an immutable mapping */
944 vmk_flags.vmkf_overwrite_immutable = TRUE;
945 #if __arm64__
946 if (tmp_entry.used_for_jit &&
947 (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
948 PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
949 fourk_binary_compatibility_unsafe &&
950 fourk_binary_compatibility_allow_wx) {
951 printf("** FOURK_COMPAT [%d]: "
952 "allowing write+execute at 0x%llx\n",
953 proc_selfpid(), tmp_entry.vme_start);
954 vmk_flags.vmkf_map_jit = TRUE;
955 }
956 #endif /* __arm64__ */
957
958 /* map this memory object in place of the current one */
959 map_addr = tmp_entry.vme_start;
960 kr = vm_map_enter_mem_object(map,
961 &map_addr,
962 (tmp_entry.vme_end -
963 tmp_entry.vme_start),
964 (mach_vm_offset_t) 0,
965 vm_flags,
966 vmk_flags,
967 VM_KERN_MEMORY_NONE,
968 (ipc_port_t)(uintptr_t) unprotected_mem_obj,
969 0,
970 TRUE,
971 tmp_entry.protection,
972 tmp_entry.max_protection,
973 tmp_entry.inheritance);
974 assertf(kr == KERN_SUCCESS,
975 "kr = 0x%x\n", kr);
976 assertf(map_addr == tmp_entry.vme_start,
977 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
978 (uint64_t)map_addr,
979 (uint64_t) tmp_entry.vme_start,
980 &tmp_entry);
981
982 #if VM_MAP_DEBUG_APPLE_PROTECT
983 if (vm_map_debug_apple_protect) {
984 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
985 " backing:[object:%p,offset:0x%llx,"
986 "crypto_backing_offset:0x%llx,"
987 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
988 map,
989 (uint64_t) map_addr,
990 (uint64_t) (map_addr + (tmp_entry.vme_end -
991 tmp_entry.vme_start)),
992 unprotected_mem_obj,
993 protected_object,
994 VME_OFFSET(&tmp_entry),
995 crypto_backing_offset,
996 crypto_start,
997 crypto_end);
998 }
999 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1000
1001 /*
1002 * Release the reference obtained by
1003 * apple_protect_pager_setup().
1004 * The mapping (if it succeeded) is now holding a reference on
1005 * the memory object.
1006 */
1007 memory_object_deallocate(unprotected_mem_obj);
1008 unprotected_mem_obj = MEMORY_OBJECT_NULL;
1009
1010 /* continue with next map entry */
1011 crypto_backing_offset += (tmp_entry.vme_end -
1012 tmp_entry.vme_start);
1013 crypto_backing_offset -= crypto_start;
1014 }
1015 kr = KERN_SUCCESS;
1016
1017 done:
1018 if (map_locked) {
1019 vm_map_unlock(map);
1020 }
1021 return kr;
1022 }
1023 #endif /* CONFIG_CODE_DECRYPTION */
1024
1025
1026 LCK_GRP_DECLARE(vm_map_lck_grp, "vm_map");
1027 LCK_ATTR_DECLARE(vm_map_lck_attr, 0, 0);
1028 LCK_ATTR_DECLARE(vm_map_lck_rw_attr, 0, LCK_ATTR_DEBUG);
1029
1030 #if XNU_TARGET_OS_OSX
1031 int malloc_no_cow = 0;
1032 #else /* XNU_TARGET_OS_OSX */
1033 int malloc_no_cow = 1;
1034 #endif /* XNU_TARGET_OS_OSX */
1035 uint64_t vm_memory_malloc_no_cow_mask = 0ULL;
1036 #if DEBUG
1037 int vm_check_map_sanity = 0;
1038 #endif
1039
1040 /*
1041 * vm_map_init:
1042 *
1043 * Initialize the vm_map module. Must be called before
1044 * any other vm_map routines.
1045 *
1046 * Map and entry structures are allocated from zones -- we must
1047 * initialize those zones.
1048 *
1049 * There are three zones of interest:
1050 *
1051 * vm_map_zone: used to allocate maps.
1052 * vm_map_entry_zone: used to allocate map entries.
1053 *
1054 * LP32:
1055 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
1056 *
1057 * The kernel allocates map entries from a special zone that is initially
1058 * "crammed" with memory. It would be difficult (perhaps impossible) for
1059 * the kernel to allocate more memory to a entry zone when it became
1060 * empty since the very act of allocating memory implies the creation
1061 * of a new entry.
1062 */
1063 __startup_func
1064 void
vm_map_init(void)1065 vm_map_init(void)
1066 {
1067
1068 #if MACH_ASSERT
1069 PE_parse_boot_argn("debug4k_filter", &debug4k_filter,
1070 sizeof(debug4k_filter));
1071 #endif /* MACH_ASSERT */
1072
1073 vm_map_zone = zone_create_ext(VM_MAP_ZONE_NAME, sizeof(struct _vm_map),
1074 VM_MAP_ZFLAGS, ZONE_ID_VM_MAP, NULL);
1075
1076 /*
1077 * Don't quarantine because we always need elements available
1078 * Disallow GC on this zone... to aid the GC.
1079 */
1080 vm_map_entry_zone = zone_create_ext(VM_MAP_ENTRY_ZONE_NAME,
1081 sizeof(struct vm_map_entry), VM_MAP_ENTRY_ZFLAGS,
1082 ZONE_ID_VM_MAP_ENTRY, ^(zone_t z) {
1083 z->z_elems_rsv = (uint16_t)(32 *
1084 (ml_early_cpu_max_number() + 1));
1085 });
1086 #if HAVE_VM_MAP_RESERVED_ENTRY_ZONE
1087 vm_map_entry_reserved_zone = zone_create(VM_MAP_ENTRY_RESERVED_ZONE_NAME,
1088 sizeof(struct vm_map_entry), VM_MAP_ENTRY_RESERVED_ZFLAGS);
1089 #endif /* HAVE_VM_MAP_RESERVED_ENTRY_ZONE */
1090
1091 vm_map_holes_zone = zone_create_ext(VM_MAP_HOLES_ZONE_NAME,
1092 sizeof(struct vm_map_links), VM_MAP_HOLES_ZFLAGS,
1093 ZONE_ID_VM_MAP_HOLES, ^(zone_t z) {
1094 z->z_elems_rsv = (uint16_t)(16 * 1024 / zone_elem_size(z));
1095 });
1096
1097 vm_map_copy_zone = zone_create_ext("VM map copies", sizeof(struct vm_map_copy),
1098 ZC_NOENCRYPT | ZC_CACHING, ZONE_ID_VM_MAP_COPY, NULL);
1099
1100 /*
1101 * Add the stolen memory to zones, adjust zone size and stolen counts.
1102 */
1103 zone_cram_foreign(vm_map_zone, map_data, map_data_size);
1104 zone_cram_foreign(vm_map_entry_zone, kentry_data, kentry_data_size);
1105 zone_cram_foreign(vm_map_holes_zone, map_holes_data, map_holes_data_size);
1106 printf("VM boostrap: %d maps, %d entries and %d holes available\n",
1107 vm_map_zone->z_elems_free,
1108 vm_map_entry_zone->z_elems_free,
1109 vm_map_holes_zone->z_elems_free);
1110
1111 /*
1112 * Since these are covered by zones, remove them from stolen page accounting.
1113 */
1114 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
1115
1116 #if VM_MAP_DEBUG_APPLE_PROTECT
1117 PE_parse_boot_argn("vm_map_debug_apple_protect",
1118 &vm_map_debug_apple_protect,
1119 sizeof(vm_map_debug_apple_protect));
1120 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1121 #if VM_MAP_DEBUG_APPLE_FOURK
1122 PE_parse_boot_argn("vm_map_debug_fourk",
1123 &vm_map_debug_fourk,
1124 sizeof(vm_map_debug_fourk));
1125 #endif /* VM_MAP_DEBUG_FOURK */
1126 PE_parse_boot_argn("vm_map_executable_immutable",
1127 &vm_map_executable_immutable,
1128 sizeof(vm_map_executable_immutable));
1129 PE_parse_boot_argn("vm_map_executable_immutable_verbose",
1130 &vm_map_executable_immutable_verbose,
1131 sizeof(vm_map_executable_immutable_verbose));
1132
1133 PE_parse_boot_argn("malloc_no_cow",
1134 &malloc_no_cow,
1135 sizeof(malloc_no_cow));
1136 if (malloc_no_cow) {
1137 vm_memory_malloc_no_cow_mask = 0ULL;
1138 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC;
1139 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_SMALL;
1140 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_MEDIUM;
1141 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE;
1142 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
1143 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
1144 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_TINY;
1145 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE;
1146 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED;
1147 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_NANO;
1148 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
1149 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
1150 &vm_memory_malloc_no_cow_mask,
1151 sizeof(vm_memory_malloc_no_cow_mask));
1152 }
1153
1154 #if DEBUG
1155 PE_parse_boot_argn("vm_check_map_sanity", &vm_check_map_sanity, sizeof(vm_check_map_sanity));
1156 if (vm_check_map_sanity) {
1157 kprintf("VM sanity checking enabled\n");
1158 } else {
1159 kprintf("VM sanity checking disabled. Set bootarg vm_check_map_sanity=1 to enable\n");
1160 }
1161 #endif /* DEBUG */
1162
1163 #if DEVELOPMENT || DEBUG
1164 PE_parse_boot_argn("panic_on_unsigned_execute",
1165 &panic_on_unsigned_execute,
1166 sizeof(panic_on_unsigned_execute));
1167 PE_parse_boot_argn("panic_on_mlock_failure",
1168 &panic_on_mlock_failure,
1169 sizeof(panic_on_mlock_failure));
1170 #endif /* DEVELOPMENT || DEBUG */
1171 }
1172
1173 __startup_func
1174 static void
vm_map_steal_memory(void)1175 vm_map_steal_memory(void)
1176 {
1177 uint16_t kentry_initial_pages;
1178 uint16_t zone_foreign_pages;
1179 bool overloaded = false;
1180
1181 /*
1182 * 1 page of maps and holes is enough for early boot
1183 *
1184 * Those early crams are only needed to bootstrap zones
1185 * until zone_init() has run (STARTUP_RANK_FIRST of ZALLOC).
1186 * After that point, zones know how to allocate vm map entries,
1187 * holes, and maps.
1188 */
1189 map_data_size = zone_get_foreign_alloc_size(VM_MAP_ZONE_NAME,
1190 sizeof(struct _vm_map), VM_MAP_ZFLAGS, 1);
1191
1192 map_holes_data_size = zone_get_foreign_alloc_size(VM_MAP_HOLES_ZONE_NAME,
1193 sizeof(struct vm_map_links), VM_MAP_HOLES_ZFLAGS, 1);
1194
1195 /*
1196 * kentry_initial_pages corresponds to the number of kernel map entries
1197 * required during bootstrap for the duration of zone_init().
1198 */
1199 #if defined(__LP64__)
1200 kentry_initial_pages = (uint16_t)atop(10 * 4096);
1201 #else
1202 kentry_initial_pages = 6;
1203 #endif
1204
1205 #if CONFIG_GZALLOC
1206 /*
1207 * If using the guard allocator, reserve more memory for the kernel
1208 * reserved map entry pool.
1209 */
1210 if (gzalloc_enabled()) {
1211 kentry_initial_pages *= 100;
1212 overloaded = true;
1213 }
1214 #endif
1215 if (PE_parse_boot_argn("zone_foreign_pages", &zone_foreign_pages,
1216 sizeof(zone_foreign_pages))) {
1217 kentry_initial_pages = zone_foreign_pages;
1218 overloaded = true;
1219 }
1220
1221 kentry_data_size = zone_get_foreign_alloc_size(VM_MAP_ENTRY_ZONE_NAME,
1222 sizeof(struct vm_map_entry), VM_MAP_ENTRY_ZFLAGS,
1223 kentry_initial_pages);
1224
1225 /*
1226 * Steal a contiguous range of memory so that a simple range check
1227 * can validate foreign addresses being freed/crammed to these
1228 * zones
1229 */
1230 vm_size_t total_size;
1231 if (os_add3_overflow(map_data_size, kentry_data_size,
1232 map_holes_data_size, &total_size)) {
1233 panic("vm_map_steal_memory: overflow in amount of memory requested");
1234 }
1235 map_data = zone_foreign_mem_init(total_size, overloaded);
1236 kentry_data = map_data + map_data_size;
1237 map_holes_data = kentry_data + kentry_data_size;
1238 }
1239 STARTUP(PMAP_STEAL, STARTUP_RANK_FIRST, vm_map_steal_memory);
1240
1241 __startup_func
1242 static void
vm_kernel_boostraped(void)1243 vm_kernel_boostraped(void)
1244 {
1245 printf("VM bootstrap done: %d maps, %d entries and %d holes left\n",
1246 vm_map_zone->z_elems_free,
1247 vm_map_entry_zone->z_elems_free,
1248 vm_map_holes_zone->z_elems_free);
1249 }
1250 STARTUP(ZALLOC, STARTUP_RANK_SECOND, vm_kernel_boostraped);
1251
1252 void
vm_map_disable_hole_optimization(vm_map_t map)1253 vm_map_disable_hole_optimization(vm_map_t map)
1254 {
1255 vm_map_entry_t head_entry, hole_entry, next_hole_entry;
1256
1257 if (map->holelistenabled) {
1258 head_entry = hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1259
1260 while (hole_entry != NULL) {
1261 next_hole_entry = hole_entry->vme_next;
1262
1263 hole_entry->vme_next = NULL;
1264 hole_entry->vme_prev = NULL;
1265 zfree(vm_map_holes_zone, hole_entry);
1266
1267 if (next_hole_entry == head_entry) {
1268 hole_entry = NULL;
1269 } else {
1270 hole_entry = next_hole_entry;
1271 }
1272 }
1273
1274 map->holes_list = NULL;
1275 map->holelistenabled = FALSE;
1276
1277 map->first_free = vm_map_first_entry(map);
1278 SAVE_HINT_HOLE_WRITE(map, NULL);
1279 }
1280 }
1281
1282 boolean_t
vm_kernel_map_is_kernel(vm_map_t map)1283 vm_kernel_map_is_kernel(vm_map_t map)
1284 {
1285 return map->pmap == kernel_pmap;
1286 }
1287
1288 /*
1289 * vm_map_create:
1290 *
1291 * Creates and returns a new empty VM map with
1292 * the given physical map structure, and having
1293 * the given lower and upper address bounds.
1294 */
1295
1296 vm_map_t
vm_map_create(pmap_t pmap,vm_map_offset_t min,vm_map_offset_t max,boolean_t pageable)1297 vm_map_create(
1298 pmap_t pmap,
1299 vm_map_offset_t min,
1300 vm_map_offset_t max,
1301 boolean_t pageable)
1302 {
1303 int options;
1304
1305 options = 0;
1306 if (pageable) {
1307 options |= VM_MAP_CREATE_PAGEABLE;
1308 }
1309 return vm_map_create_options(pmap, min, max, options);
1310 }
1311
1312 vm_map_t
vm_map_create_options(pmap_t pmap,vm_map_offset_t min,vm_map_offset_t max,int options)1313 vm_map_create_options(
1314 pmap_t pmap,
1315 vm_map_offset_t min,
1316 vm_map_offset_t max,
1317 int options)
1318 {
1319 vm_map_t result;
1320 struct vm_map_links *hole_entry = NULL;
1321
1322 if (options & ~(VM_MAP_CREATE_ALL_OPTIONS)) {
1323 /* unknown option */
1324 return VM_MAP_NULL;
1325 }
1326
1327 result = zalloc_flags(vm_map_zone, Z_WAITOK | Z_NOFAIL);
1328
1329 vm_map_first_entry(result) = vm_map_to_entry(result);
1330 vm_map_last_entry(result) = vm_map_to_entry(result);
1331 result->hdr.nentries = 0;
1332 if (options & VM_MAP_CREATE_PAGEABLE) {
1333 result->hdr.entries_pageable = TRUE;
1334 } else {
1335 result->hdr.entries_pageable = FALSE;
1336 }
1337
1338 vm_map_store_init( &(result->hdr));
1339
1340 result->hdr.page_shift = PAGE_SHIFT;
1341
1342 result->size = 0;
1343 result->size_limit = RLIM_INFINITY; /* default unlimited */
1344 result->data_limit = RLIM_INFINITY; /* default unlimited */
1345 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
1346 result->user_wire_size = 0;
1347 #if XNU_TARGET_OS_OSX
1348 result->vmmap_high_start = 0;
1349 #endif
1350 os_ref_init_count(&result->map_refcnt, &map_refgrp, 1);
1351 result->pmap = pmap;
1352 result->min_offset = min;
1353 result->max_offset = max;
1354 result->wiring_required = FALSE;
1355 result->no_zero_fill = FALSE;
1356 result->mapped_in_other_pmaps = FALSE;
1357 result->wait_for_space = FALSE;
1358 result->switch_protect = FALSE;
1359 result->disable_vmentry_reuse = FALSE;
1360 result->map_disallow_data_exec = FALSE;
1361 result->is_nested_map = FALSE;
1362 result->map_disallow_new_exec = FALSE;
1363 result->terminated = FALSE;
1364 result->cs_enforcement = FALSE;
1365 result->cs_debugged = FALSE;
1366 result->highest_entry_end = 0;
1367 result->first_free = vm_map_to_entry(result);
1368 result->hint = vm_map_to_entry(result);
1369 result->jit_entry_exists = FALSE;
1370 result->is_alien = FALSE;
1371 result->reserved_regions = FALSE;
1372 result->single_jit = FALSE;
1373
1374 /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1375 if (options & VM_MAP_CREATE_CORPSE_FOOTPRINT) {
1376 result->has_corpse_footprint = TRUE;
1377 result->holelistenabled = FALSE;
1378 result->vmmap_corpse_footprint = NULL;
1379 } else if (startup_phase >= STARTUP_SUB_ZALLOC) {
1380 hole_entry = zalloc(vm_map_holes_zone);
1381
1382 hole_entry->start = min;
1383 #if defined(__arm__) || defined(__arm64__)
1384 hole_entry->end = result->max_offset;
1385 #else
1386 hole_entry->end = MAX(max, (vm_map_offset_t)MACH_VM_MAX_ADDRESS);
1387 #endif
1388 result->holes_list = result->hole_hint = hole_entry;
1389 hole_entry->prev = hole_entry->next = CAST_TO_VM_MAP_ENTRY(hole_entry);
1390 result->holelistenabled = TRUE;
1391 result->has_corpse_footprint = FALSE;
1392 } else {
1393 result->holelistenabled = FALSE;
1394 result->has_corpse_footprint = FALSE;
1395 }
1396
1397 vm_map_lock_init(result);
1398 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
1399
1400 return result;
1401 }
1402
1403 vm_map_size_t
vm_map_adjusted_size(vm_map_t map)1404 vm_map_adjusted_size(vm_map_t map)
1405 {
1406 struct vm_reserved_region *regions = NULL;
1407 size_t num_regions = 0;
1408 mach_vm_size_t reserved_size = 0, map_size = 0;
1409
1410 if (map == NULL || (map->size == 0)) {
1411 return 0;
1412 }
1413
1414 map_size = map->size;
1415
1416 if (map->reserved_regions == FALSE || !vm_map_is_exotic(map) || map->terminated) {
1417 /*
1418 * No special reserved regions or not an exotic map or the task
1419 * is terminating and these special regions might have already
1420 * been deallocated.
1421 */
1422 return map_size;
1423 }
1424
1425 num_regions = ml_get_vm_reserved_regions(vm_map_is_64bit(map), ®ions);
1426 assert((num_regions == 0) || (num_regions > 0 && regions != NULL));
1427
1428 while (num_regions) {
1429 reserved_size += regions[--num_regions].vmrr_size;
1430 }
1431
1432 /*
1433 * There are a few places where the map is being switched out due to
1434 * 'termination' without that bit being set (e.g. exec and corpse purging).
1435 * In those cases, we could have the map's regions being deallocated on
1436 * a core while some accounting process is trying to get the map's size.
1437 * So this assert can't be enabled till all those places are uniform in
1438 * their use of the 'map->terminated' bit.
1439 *
1440 * assert(map_size >= reserved_size);
1441 */
1442
1443 return (map_size >= reserved_size) ? (map_size - reserved_size) : map_size;
1444 }
1445
1446 /*
1447 * vm_map_entry_create: [ internal use only ]
1448 *
1449 * Allocates a VM map entry for insertion in the
1450 * given map (or map copy). No fields are filled.
1451 */
1452 #define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
1453
1454 #define vm_map_copy_entry_create(copy, map_locked) \
1455 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1456
1457 static vm_map_entry_t
_vm_map_entry_create(struct vm_map_header * map_header __unused,boolean_t map_locked __unused)1458 _vm_map_entry_create(
1459 struct vm_map_header *map_header __unused,
1460 boolean_t map_locked __unused)
1461 {
1462 vm_map_entry_t entry = NULL;
1463 zone_t zone = vm_map_entry_zone;
1464
1465 assert(map_header->entries_pageable ? !map_locked : TRUE);
1466
1467 #if HAVE_VM_MAP_RESERVED_ENTRY_ZONE
1468 zone_security_flags_t zsflags = zone_security_array[ZONE_ID_VM_MAP_ENTRY];
1469 if (map_header == &zone_submap(zsflags)->hdr) {
1470 /*
1471 * If we are trying to allocate an entry for the submap
1472 * of the vm_map_entry_zone, then this can cause recursive
1473 * locking of this map.
1474 *
1475 * Try to allocate _without blocking_ from this zone,
1476 * but if it is depleted, we need to go to the
1477 * vm_map_entry_reserved_zone which is in the zalloc
1478 * "VM" submap, which can grow without taking any map lock.
1479 *
1480 * Note: the vm_map_entry_zone has a rather high "reserve"
1481 * setup in order to minimize usage of the reserved one.
1482 */
1483 entry = zalloc_flags(vm_map_entry_zone, Z_NOWAIT | Z_ZERO);
1484 zone = vm_map_entry_reserved_zone;
1485 }
1486 #endif
1487 if (entry == NULL) {
1488 entry = zalloc_flags(zone, Z_WAITOK | Z_ZERO);
1489 }
1490
1491 entry->behavior = VM_BEHAVIOR_DEFAULT;
1492 entry->inheritance = VM_INHERIT_DEFAULT;
1493
1494 vm_map_store_update((vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1495 #if MAP_ENTRY_CREATION_DEBUG
1496 entry->vme_creation_maphdr = map_header;
1497 backtrace(&entry->vme_creation_bt[0],
1498 (sizeof(entry->vme_creation_bt) / sizeof(uintptr_t)), NULL, NULL);
1499 #endif
1500 return entry;
1501 }
1502
1503 /*
1504 * vm_map_entry_dispose: [ internal use only ]
1505 *
1506 * Inverse of vm_map_entry_create.
1507 *
1508 * write map lock held so no need to
1509 * do anything special to insure correctness
1510 * of the stores
1511 */
1512 #define vm_map_entry_dispose(map, entry) \
1513 _vm_map_entry_dispose(&(map)->hdr, (entry))
1514
1515 #define vm_map_copy_entry_dispose(copy, entry) \
1516 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1517
1518 static void
_vm_map_entry_dispose(struct vm_map_header * map_header __unused,vm_map_entry_t entry)1519 _vm_map_entry_dispose(
1520 struct vm_map_header *map_header __unused,
1521 vm_map_entry_t entry)
1522 {
1523 #if HAVE_VM_MAP_RESERVED_ENTRY_ZONE
1524 switch (zone_id_for_native_element(entry, sizeof(*entry))) {
1525 case ZONE_ID_VM_MAP_ENTRY:
1526 case ZONE_ID_INVALID: /* foreign elements are regular entries always */
1527 break;
1528 default:
1529 zfree(vm_map_entry_reserved_zone, entry);
1530 return;
1531 }
1532 #endif /* HAVE_VM_MAP_RESERVED_ENTRY_ZONE */
1533 zfree(vm_map_entry_zone, entry);
1534 }
1535
1536 #if MACH_ASSERT
1537 static boolean_t first_free_check = FALSE;
1538 boolean_t
first_free_is_valid(vm_map_t map)1539 first_free_is_valid(
1540 vm_map_t map)
1541 {
1542 if (!first_free_check) {
1543 return TRUE;
1544 }
1545
1546 return first_free_is_valid_store( map );
1547 }
1548 #endif /* MACH_ASSERT */
1549
1550
1551 #define vm_map_copy_entry_link(copy, after_where, entry) \
1552 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1553
1554 #define vm_map_copy_entry_unlink(copy, entry) \
1555 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1556
1557 /*
1558 * vm_map_destroy:
1559 *
1560 * Actually destroy a map.
1561 */
1562 void
vm_map_destroy(vm_map_t map,int flags)1563 vm_map_destroy(
1564 vm_map_t map,
1565 int flags)
1566 {
1567 vm_map_lock(map);
1568
1569 /* final cleanup: no need to unnest shared region */
1570 flags |= VM_MAP_REMOVE_NO_UNNESTING;
1571 /* final cleanup: ok to remove immutable mappings */
1572 flags |= VM_MAP_REMOVE_IMMUTABLE;
1573 /* final cleanup: allow gaps in range */
1574 flags |= VM_MAP_REMOVE_GAPS_OK;
1575
1576 /* clean up regular map entries */
1577 (void) vm_map_delete(map, map->min_offset, map->max_offset,
1578 flags, VM_MAP_NULL);
1579 /* clean up leftover special mappings (commpage, GPU carveout, etc...) */
1580 #if !defined(__arm__)
1581 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1582 flags, VM_MAP_NULL);
1583 #endif /* !__arm__ */
1584
1585 vm_map_disable_hole_optimization(map);
1586 vm_map_corpse_footprint_destroy(map);
1587
1588 vm_map_unlock(map);
1589
1590 assert(map->hdr.nentries == 0);
1591
1592 if (map->pmap) {
1593 pmap_destroy(map->pmap);
1594 }
1595
1596 #if LOCKS_INDIRECT_ALLOW
1597 if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1598 /*
1599 * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1600 * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1601 * structure or kalloc'ed via lck_mtx_init.
1602 * An example is s_lock_ext within struct _vm_map.
1603 *
1604 * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1605 * can add another tag to detect embedded vs alloc'ed indirect external
1606 * mutexes but that'll be additional checks in the lock path and require
1607 * updating dependencies for the old vs new tag.
1608 *
1609 * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1610 * just when lock debugging is ON, we choose to forego explicitly destroying
1611 * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1612 * count on vm_map_lck_grp, which has no serious side-effect.
1613 */
1614 } else
1615 #endif /* LOCKS_INDIRECT_ALLOW */
1616 {
1617 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1618 lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1619 }
1620
1621 zfree(vm_map_zone, map);
1622 }
1623
1624 /*
1625 * Returns pid of the task with the largest number of VM map entries.
1626 * Used in the zone-map-exhaustion jetsam path.
1627 */
1628 pid_t
find_largest_process_vm_map_entries(void)1629 find_largest_process_vm_map_entries(void)
1630 {
1631 pid_t victim_pid = -1;
1632 int max_vm_map_entries = 0;
1633 task_t task = TASK_NULL;
1634 queue_head_t *task_list = &tasks;
1635
1636 lck_mtx_lock(&tasks_threads_lock);
1637 queue_iterate(task_list, task, task_t, tasks) {
1638 if (task == kernel_task || !task->active) {
1639 continue;
1640 }
1641
1642 vm_map_t task_map = task->map;
1643 if (task_map != VM_MAP_NULL) {
1644 int task_vm_map_entries = task_map->hdr.nentries;
1645 if (task_vm_map_entries > max_vm_map_entries) {
1646 max_vm_map_entries = task_vm_map_entries;
1647 victim_pid = pid_from_task(task);
1648 }
1649 }
1650 }
1651 lck_mtx_unlock(&tasks_threads_lock);
1652
1653 printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1654 return victim_pid;
1655 }
1656
1657
1658 /*
1659 * vm_map_lookup_entry: [ internal use only ]
1660 *
1661 * Calls into the vm map store layer to find the map
1662 * entry containing (or immediately preceding) the
1663 * specified address in the given map; the entry is returned
1664 * in the "entry" parameter. The boolean
1665 * result indicates whether the address is
1666 * actually contained in the map.
1667 */
1668 boolean_t
vm_map_lookup_entry(vm_map_t map,vm_map_offset_t address,vm_map_entry_t * entry)1669 vm_map_lookup_entry(
1670 vm_map_t map,
1671 vm_map_offset_t address,
1672 vm_map_entry_t *entry) /* OUT */
1673 {
1674 #if CONFIG_KERNEL_TBI
1675 if (VM_KERNEL_ADDRESS(address)) {
1676 address = VM_KERNEL_STRIP_UPTR(address);
1677 }
1678 #endif /* CONFIG_KERNEL_TBI */
1679 return vm_map_store_lookup_entry( map, address, entry );
1680 }
1681
1682 /*
1683 * Routine: vm_map_find_space
1684 * Purpose:
1685 * Allocate a range in the specified virtual address map,
1686 * returning the entry allocated for that range.
1687 * Used by kmem_alloc, etc.
1688 *
1689 * The map must be NOT be locked. It will be returned locked
1690 * on KERN_SUCCESS, unlocked on failure.
1691 *
1692 * If an entry is allocated, the object/offset fields
1693 * are initialized to zero.
1694 *
1695 * If VM_MAP_FIND_LAST_FREE flag is set, allocate from end of map. This
1696 * is currently only used for allocating memory for zones backing
1697 * one of the kalloc heaps.(rdar://65832263)
1698 */
1699 kern_return_t
vm_map_find_space(vm_map_t map,vm_map_offset_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,vm_map_entry_t * o_entry)1700 vm_map_find_space(
1701 vm_map_t map,
1702 vm_map_offset_t *address, /* OUT */
1703 vm_map_size_t size,
1704 vm_map_offset_t mask,
1705 int flags,
1706 vm_map_kernel_flags_t vmk_flags,
1707 vm_tag_t tag,
1708 vm_map_entry_t *o_entry) /* OUT */
1709 {
1710 vm_map_entry_t entry, new_entry, hole_entry;
1711 vm_map_offset_t start;
1712 vm_map_offset_t end;
1713
1714 if (size == 0) {
1715 *address = 0;
1716 return KERN_INVALID_ARGUMENT;
1717 }
1718
1719 new_entry = vm_map_entry_create(map, FALSE);
1720 vm_map_lock(map);
1721
1722 if (flags & VM_MAP_FIND_LAST_FREE) {
1723 assert(!map->disable_vmentry_reuse);
1724 /* TODO: Make backward lookup generic and support guard pages */
1725 assert(!vmk_flags.vmkf_guard_after && !vmk_flags.vmkf_guard_before);
1726 assert(VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map)));
1727
1728 /* Allocate space from end of map */
1729 vm_map_store_find_last_free(map, &entry);
1730
1731 if (!entry) {
1732 goto noSpace;
1733 }
1734
1735 if (entry == vm_map_to_entry(map)) {
1736 end = map->max_offset;
1737 } else {
1738 end = entry->vme_start;
1739 }
1740
1741 while (TRUE) {
1742 vm_map_entry_t prev;
1743
1744 start = end - size;
1745
1746 if ((start < map->min_offset) || end < start) {
1747 goto noSpace;
1748 }
1749
1750 prev = entry->vme_prev;
1751 entry = prev;
1752
1753 if (prev == vm_map_to_entry(map)) {
1754 break;
1755 }
1756
1757 if (prev->vme_end <= start) {
1758 break;
1759 }
1760
1761 /*
1762 * Didn't fit -- move to the next entry.
1763 */
1764
1765 end = entry->vme_start;
1766 }
1767 } else {
1768 if (vmk_flags.vmkf_guard_after) {
1769 /* account for the back guard page in the size */
1770 size += VM_MAP_PAGE_SIZE(map);
1771 }
1772
1773 /*
1774 * Look for the first possible address; if there's already
1775 * something at this address, we have to start after it.
1776 */
1777
1778 if (map->disable_vmentry_reuse == TRUE) {
1779 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1780 } else {
1781 if (map->holelistenabled) {
1782 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1783
1784 if (hole_entry == NULL) {
1785 /*
1786 * No more space in the map?
1787 */
1788 goto noSpace;
1789 }
1790
1791 entry = hole_entry;
1792 start = entry->vme_start;
1793 } else {
1794 assert(first_free_is_valid(map));
1795 if ((entry = map->first_free) == vm_map_to_entry(map)) {
1796 start = map->min_offset;
1797 } else {
1798 start = entry->vme_end;
1799 }
1800 }
1801 }
1802
1803 /*
1804 * In any case, the "entry" always precedes
1805 * the proposed new region throughout the loop:
1806 */
1807
1808 while (TRUE) {
1809 vm_map_entry_t next;
1810
1811 /*
1812 * Find the end of the proposed new region.
1813 * Be sure we didn't go beyond the end, or
1814 * wrap around the address.
1815 */
1816
1817 if (vmk_flags.vmkf_guard_before) {
1818 /* reserve space for the front guard page */
1819 start += VM_MAP_PAGE_SIZE(map);
1820 }
1821 end = ((start + mask) & ~mask);
1822
1823 if (end < start) {
1824 goto noSpace;
1825 }
1826 start = end;
1827 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
1828 end += size;
1829 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
1830
1831 if ((end > map->max_offset) || (end < start)) {
1832 goto noSpace;
1833 }
1834
1835 next = entry->vme_next;
1836
1837 if (map->holelistenabled) {
1838 if (entry->vme_end >= end) {
1839 break;
1840 }
1841 } else {
1842 /*
1843 * If there are no more entries, we must win.
1844 *
1845 * OR
1846 *
1847 * If there is another entry, it must be
1848 * after the end of the potential new region.
1849 */
1850
1851 if (next == vm_map_to_entry(map)) {
1852 break;
1853 }
1854
1855 if (next->vme_start >= end) {
1856 break;
1857 }
1858 }
1859
1860 /*
1861 * Didn't fit -- move to the next entry.
1862 */
1863
1864 entry = next;
1865
1866 if (map->holelistenabled) {
1867 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
1868 /*
1869 * Wrapped around
1870 */
1871 goto noSpace;
1872 }
1873 start = entry->vme_start;
1874 } else {
1875 start = entry->vme_end;
1876 }
1877 }
1878
1879 if (vmk_flags.vmkf_guard_before) {
1880 /* go back for the front guard page */
1881 start -= VM_MAP_PAGE_SIZE(map);
1882 }
1883 }
1884
1885 if (map->holelistenabled) {
1886 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1887 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.", entry, (unsigned long long)entry->vme_start);
1888 }
1889 }
1890
1891 /*
1892 * At this point,
1893 * "start" and "end" should define the endpoints of the
1894 * available new range, and
1895 * "entry" should refer to the region before the new
1896 * range, and
1897 *
1898 * the map should be locked.
1899 */
1900
1901 *address = start;
1902
1903 assert(start < end);
1904 new_entry->vme_start = start;
1905 new_entry->vme_end = end;
1906 assert(page_aligned(new_entry->vme_start));
1907 assert(page_aligned(new_entry->vme_end));
1908 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1909 VM_MAP_PAGE_MASK(map)));
1910 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1911 VM_MAP_PAGE_MASK(map)));
1912
1913 new_entry->is_shared = FALSE;
1914 new_entry->is_sub_map = FALSE;
1915 new_entry->use_pmap = TRUE;
1916 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1917 VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1918
1919 new_entry->needs_copy = FALSE;
1920
1921 new_entry->inheritance = VM_INHERIT_DEFAULT;
1922 new_entry->protection = VM_PROT_DEFAULT;
1923 new_entry->max_protection = VM_PROT_ALL;
1924 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1925 new_entry->wired_count = 0;
1926 new_entry->user_wired_count = 0;
1927
1928 new_entry->in_transition = FALSE;
1929 new_entry->needs_wakeup = FALSE;
1930 new_entry->no_cache = FALSE;
1931 new_entry->permanent = FALSE;
1932 new_entry->superpage_size = FALSE;
1933 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1934 new_entry->map_aligned = TRUE;
1935 } else {
1936 new_entry->map_aligned = FALSE;
1937 }
1938
1939 new_entry->used_for_jit = FALSE;
1940 new_entry->pmap_cs_associated = FALSE;
1941 new_entry->zero_wired_pages = FALSE;
1942 new_entry->iokit_acct = FALSE;
1943 new_entry->vme_resilient_codesign = FALSE;
1944 new_entry->vme_resilient_media = FALSE;
1945 if (vmk_flags.vmkf_atomic_entry) {
1946 new_entry->vme_atomic = TRUE;
1947 } else {
1948 new_entry->vme_atomic = FALSE;
1949 }
1950
1951 VME_ALIAS_SET(new_entry, tag);
1952
1953 /*
1954 * Insert the new entry into the list
1955 */
1956
1957 vm_map_store_entry_link(map, entry, new_entry, VM_MAP_KERNEL_FLAGS_NONE);
1958
1959 map->size += size;
1960
1961 /*
1962 * Update the lookup hint
1963 */
1964 SAVE_HINT_MAP_WRITE(map, new_entry);
1965
1966 *o_entry = new_entry;
1967 return KERN_SUCCESS;
1968
1969 noSpace:
1970
1971 vm_map_entry_dispose(map, new_entry);
1972 vm_map_unlock(map);
1973 return KERN_NO_SPACE;
1974 }
1975
1976 int vm_map_pmap_enter_print = FALSE;
1977 int vm_map_pmap_enter_enable = FALSE;
1978
1979 /*
1980 * Routine: vm_map_pmap_enter [internal only]
1981 *
1982 * Description:
1983 * Force pages from the specified object to be entered into
1984 * the pmap at the specified address if they are present.
1985 * As soon as a page not found in the object the scan ends.
1986 *
1987 * Returns:
1988 * Nothing.
1989 *
1990 * In/out conditions:
1991 * The source map should not be locked on entry.
1992 */
1993 __unused static void
vm_map_pmap_enter(vm_map_t map,vm_map_offset_t addr,vm_map_offset_t end_addr,vm_object_t object,vm_object_offset_t offset,vm_prot_t protection)1994 vm_map_pmap_enter(
1995 vm_map_t map,
1996 vm_map_offset_t addr,
1997 vm_map_offset_t end_addr,
1998 vm_object_t object,
1999 vm_object_offset_t offset,
2000 vm_prot_t protection)
2001 {
2002 int type_of_fault;
2003 kern_return_t kr;
2004 struct vm_object_fault_info fault_info = {};
2005
2006 if (map->pmap == 0) {
2007 return;
2008 }
2009
2010 assert(VM_MAP_PAGE_SHIFT(map) == PAGE_SHIFT);
2011
2012 while (addr < end_addr) {
2013 vm_page_t m;
2014
2015
2016 /*
2017 * TODO:
2018 * From vm_map_enter(), we come into this function without the map
2019 * lock held or the object lock held.
2020 * We haven't taken a reference on the object either.
2021 * We should do a proper lookup on the map to make sure
2022 * that things are sane before we go locking objects that
2023 * could have been deallocated from under us.
2024 */
2025
2026 vm_object_lock(object);
2027
2028 m = vm_page_lookup(object, offset);
2029
2030 if (m == VM_PAGE_NULL || m->vmp_busy || m->vmp_fictitious ||
2031 (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_absent))) {
2032 vm_object_unlock(object);
2033 return;
2034 }
2035
2036 if (vm_map_pmap_enter_print) {
2037 printf("vm_map_pmap_enter:");
2038 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
2039 map, (unsigned long long)addr, object, (unsigned long long)offset);
2040 }
2041 type_of_fault = DBG_CACHE_HIT_FAULT;
2042 kr = vm_fault_enter(m, map->pmap,
2043 addr,
2044 PAGE_SIZE, 0,
2045 protection, protection,
2046 VM_PAGE_WIRED(m),
2047 FALSE, /* change_wiring */
2048 VM_KERN_MEMORY_NONE, /* tag - not wiring */
2049 &fault_info,
2050 NULL, /* need_retry */
2051 &type_of_fault);
2052
2053 vm_object_unlock(object);
2054
2055 offset += PAGE_SIZE_64;
2056 addr += PAGE_SIZE;
2057 }
2058 }
2059
2060 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
2061 kern_return_t
vm_map_random_address_for_size(vm_map_t map,vm_map_offset_t * address,vm_map_size_t size)2062 vm_map_random_address_for_size(
2063 vm_map_t map,
2064 vm_map_offset_t *address,
2065 vm_map_size_t size)
2066 {
2067 kern_return_t kr = KERN_SUCCESS;
2068 int tries = 0;
2069 vm_map_offset_t random_addr = 0;
2070 vm_map_offset_t hole_end;
2071
2072 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
2073 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
2074 vm_map_size_t vm_hole_size = 0;
2075 vm_map_size_t addr_space_size;
2076
2077 addr_space_size = vm_map_max(map) - vm_map_min(map);
2078
2079 assert(VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map)));
2080
2081 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2082 if (startup_phase < STARTUP_SUB_ZALLOC) {
2083 random_addr = (vm_map_offset_t)early_random();
2084 } else {
2085 random_addr = (vm_map_offset_t)random();
2086 }
2087 random_addr <<= VM_MAP_PAGE_SHIFT(map);
2088 random_addr = vm_map_trunc_page(
2089 vm_map_min(map) + (random_addr % addr_space_size),
2090 VM_MAP_PAGE_MASK(map));
2091
2092 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
2093 if (prev_entry == vm_map_to_entry(map)) {
2094 next_entry = vm_map_first_entry(map);
2095 } else {
2096 next_entry = prev_entry->vme_next;
2097 }
2098 if (next_entry == vm_map_to_entry(map)) {
2099 hole_end = vm_map_max(map);
2100 } else {
2101 hole_end = next_entry->vme_start;
2102 }
2103 vm_hole_size = hole_end - random_addr;
2104 if (vm_hole_size >= size) {
2105 *address = random_addr;
2106 break;
2107 }
2108 }
2109 tries++;
2110 }
2111
2112 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2113 kr = KERN_NO_SPACE;
2114 }
2115 return kr;
2116 }
2117
2118 static boolean_t
vm_memory_malloc_no_cow(int alias)2119 vm_memory_malloc_no_cow(
2120 int alias)
2121 {
2122 uint64_t alias_mask;
2123
2124 if (alias > 63) {
2125 return FALSE;
2126 }
2127
2128 alias_mask = 1ULL << alias;
2129 if (alias_mask & vm_memory_malloc_no_cow_mask) {
2130 return TRUE;
2131 }
2132 return FALSE;
2133 }
2134
2135 uint64_t vm_map_enter_RLIMIT_AS_count = 0;
2136 uint64_t vm_map_enter_RLIMIT_DATA_count = 0;
2137 /*
2138 * Routine: vm_map_enter
2139 *
2140 * Description:
2141 * Allocate a range in the specified virtual address map.
2142 * The resulting range will refer to memory defined by
2143 * the given memory object and offset into that object.
2144 *
2145 * Arguments are as defined in the vm_map call.
2146 */
2147 static unsigned int vm_map_enter_restore_successes = 0;
2148 static unsigned int vm_map_enter_restore_failures = 0;
2149 kern_return_t
vm_map_enter(vm_map_t map,vm_map_offset_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t alias,vm_object_t object,vm_object_offset_t offset,boolean_t needs_copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)2150 vm_map_enter(
2151 vm_map_t map,
2152 vm_map_offset_t *address, /* IN/OUT */
2153 vm_map_size_t size,
2154 vm_map_offset_t mask,
2155 int flags,
2156 vm_map_kernel_flags_t vmk_flags,
2157 vm_tag_t alias,
2158 vm_object_t object,
2159 vm_object_offset_t offset,
2160 boolean_t needs_copy,
2161 vm_prot_t cur_protection,
2162 vm_prot_t max_protection,
2163 vm_inherit_t inheritance)
2164 {
2165 vm_map_entry_t entry, new_entry;
2166 vm_map_offset_t start, tmp_start, tmp_offset;
2167 vm_map_offset_t end, tmp_end;
2168 vm_map_offset_t tmp2_start, tmp2_end;
2169 vm_map_offset_t desired_empty_end;
2170 vm_map_offset_t step;
2171 kern_return_t result = KERN_SUCCESS;
2172 vm_map_t zap_old_map = VM_MAP_NULL;
2173 vm_map_t zap_new_map = VM_MAP_NULL;
2174 boolean_t map_locked = FALSE;
2175 boolean_t pmap_empty = TRUE;
2176 boolean_t new_mapping_established = FALSE;
2177 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
2178 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
2179 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
2180 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2181 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
2182 boolean_t is_submap = vmk_flags.vmkf_submap;
2183 boolean_t permanent = (((flags & VM_FLAGS_PERMANENT) != 0) || vmk_flags.vmkf_permanent);
2184 boolean_t no_copy_on_read = vmk_flags.vmkf_no_copy_on_read;
2185 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
2186 boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
2187 boolean_t translated_allow_execute = vmk_flags.vmkf_translated_allow_execute;
2188 boolean_t resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
2189 boolean_t resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
2190 boolean_t random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
2191 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
2192 vm_tag_t user_alias;
2193 vm_map_offset_t effective_min_offset, effective_max_offset;
2194 kern_return_t kr;
2195 boolean_t clear_map_aligned = FALSE;
2196 vm_map_entry_t hole_entry;
2197 vm_map_size_t chunk_size = 0;
2198 vm_object_t caller_object;
2199
2200 caller_object = object;
2201
2202 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2203
2204 if (flags & VM_FLAGS_4GB_CHUNK) {
2205 #if defined(__LP64__)
2206 chunk_size = (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2207 #else /* __LP64__ */
2208 chunk_size = ANON_CHUNK_SIZE;
2209 #endif /* __LP64__ */
2210 } else {
2211 chunk_size = ANON_CHUNK_SIZE;
2212 }
2213
2214 if (superpage_size) {
2215 switch (superpage_size) {
2216 /*
2217 * Note that the current implementation only supports
2218 * a single size for superpages, SUPERPAGE_SIZE, per
2219 * architecture. As soon as more sizes are supposed
2220 * to be supported, SUPERPAGE_SIZE has to be replaced
2221 * with a lookup of the size depending on superpage_size.
2222 */
2223 #ifdef __x86_64__
2224 case SUPERPAGE_SIZE_ANY:
2225 /* handle it like 2 MB and round up to page size */
2226 size = (size + 2 * 1024 * 1024 - 1) & ~(2 * 1024 * 1024 - 1);
2227 OS_FALLTHROUGH;
2228 case SUPERPAGE_SIZE_2MB:
2229 break;
2230 #endif
2231 default:
2232 return KERN_INVALID_ARGUMENT;
2233 }
2234 mask = SUPERPAGE_SIZE - 1;
2235 if (size & (SUPERPAGE_SIZE - 1)) {
2236 return KERN_INVALID_ARGUMENT;
2237 }
2238 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
2239 }
2240
2241
2242 if ((cur_protection & VM_PROT_WRITE) &&
2243 (cur_protection & VM_PROT_EXECUTE) &&
2244 #if XNU_TARGET_OS_OSX
2245 map->pmap != kernel_pmap &&
2246 (cs_process_global_enforcement() ||
2247 (vmk_flags.vmkf_cs_enforcement_override
2248 ? vmk_flags.vmkf_cs_enforcement
2249 : (vm_map_cs_enforcement(map)
2250 #if __arm64__
2251 || !VM_MAP_IS_EXOTIC(map)
2252 #endif /* __arm64__ */
2253 ))) &&
2254 #endif /* XNU_TARGET_OS_OSX */
2255 (VM_MAP_POLICY_WX_FAIL(map) ||
2256 VM_MAP_POLICY_WX_STRIP_X(map)) &&
2257 !entry_for_jit) {
2258 boolean_t vm_protect_wx_fail = VM_MAP_POLICY_WX_FAIL(map);
2259
2260 DTRACE_VM3(cs_wx,
2261 uint64_t, 0,
2262 uint64_t, 0,
2263 vm_prot_t, cur_protection);
2264 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. %s\n",
2265 proc_selfpid(),
2266 (current_task()->bsd_info
2267 ? proc_name_address(current_task()->bsd_info)
2268 : "?"),
2269 __FUNCTION__,
2270 (vm_protect_wx_fail ? "failing" : "turning off execute"));
2271 cur_protection &= ~VM_PROT_EXECUTE;
2272 if (vm_protect_wx_fail) {
2273 return KERN_PROTECTION_FAILURE;
2274 }
2275 }
2276
2277 /*
2278 * If the task has requested executable lockdown,
2279 * deny any new executable mapping.
2280 */
2281 if (map->map_disallow_new_exec == TRUE) {
2282 if (cur_protection & VM_PROT_EXECUTE) {
2283 return KERN_PROTECTION_FAILURE;
2284 }
2285 }
2286
2287 if (resilient_codesign) {
2288 assert(!is_submap);
2289 int reject_prot = (needs_copy ? VM_PROT_ALLEXEC : (VM_PROT_WRITE | VM_PROT_ALLEXEC));
2290 if ((cur_protection | max_protection) & reject_prot) {
2291 return KERN_PROTECTION_FAILURE;
2292 }
2293 }
2294
2295 if (resilient_media) {
2296 assert(!is_submap);
2297 // assert(!needs_copy);
2298 if (object != VM_OBJECT_NULL &&
2299 !object->internal) {
2300 /*
2301 * This mapping is directly backed by an external
2302 * memory manager (e.g. a vnode pager for a file):
2303 * we would not have any safe place to inject
2304 * a zero-filled page if an actual page is not
2305 * available, without possibly impacting the actual
2306 * contents of the mapped object (e.g. the file),
2307 * so we can't provide any media resiliency here.
2308 */
2309 return KERN_INVALID_ARGUMENT;
2310 }
2311 }
2312
2313 if (is_submap) {
2314 if (purgable) {
2315 /* submaps can not be purgeable */
2316 return KERN_INVALID_ARGUMENT;
2317 }
2318 if (object == VM_OBJECT_NULL) {
2319 /* submaps can not be created lazily */
2320 return KERN_INVALID_ARGUMENT;
2321 }
2322 }
2323 if (vmk_flags.vmkf_already) {
2324 /*
2325 * VM_FLAGS_ALREADY says that it's OK if the same mapping
2326 * is already present. For it to be meaningul, the requested
2327 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2328 * we shouldn't try and remove what was mapped there first
2329 * (!VM_FLAGS_OVERWRITE).
2330 */
2331 if ((flags & VM_FLAGS_ANYWHERE) ||
2332 (flags & VM_FLAGS_OVERWRITE)) {
2333 return KERN_INVALID_ARGUMENT;
2334 }
2335 }
2336
2337 effective_min_offset = map->min_offset;
2338
2339 if (vmk_flags.vmkf_beyond_max) {
2340 /*
2341 * Allow an insertion beyond the map's max offset.
2342 */
2343 #if !defined(__arm__)
2344 if (vm_map_is_64bit(map)) {
2345 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
2346 } else
2347 #endif /* __arm__ */
2348 effective_max_offset = 0x00000000FFFFF000ULL;
2349 } else {
2350 #if XNU_TARGET_OS_OSX
2351 if (__improbable(vmk_flags.vmkf_32bit_map_va)) {
2352 effective_max_offset = MIN(map->max_offset, 0x00000000FFFFF000ULL);
2353 } else {
2354 effective_max_offset = map->max_offset;
2355 }
2356 #else /* XNU_TARGET_OS_OSX */
2357 effective_max_offset = map->max_offset;
2358 #endif /* XNU_TARGET_OS_OSX */
2359 }
2360
2361 if (size == 0 ||
2362 (offset & MIN(VM_MAP_PAGE_MASK(map), PAGE_MASK_64)) != 0) {
2363 *address = 0;
2364 return KERN_INVALID_ARGUMENT;
2365 }
2366
2367 if (map->pmap == kernel_pmap) {
2368 user_alias = VM_KERN_MEMORY_NONE;
2369 } else {
2370 user_alias = alias;
2371 }
2372
2373 if (user_alias == VM_MEMORY_MALLOC_MEDIUM) {
2374 chunk_size = MALLOC_MEDIUM_CHUNK_SIZE;
2375 }
2376
2377 #define RETURN(value) { result = value; goto BailOut; }
2378
2379 assertf(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK), "0x%llx", (uint64_t)*address);
2380 assertf(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK), "0x%llx", (uint64_t)size);
2381 if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK) {
2382 assertf(page_aligned(*address), "0x%llx", (uint64_t)*address);
2383 assertf(page_aligned(size), "0x%llx", (uint64_t)size);
2384 }
2385
2386 if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK &&
2387 !VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
2388 /*
2389 * In most cases, the caller rounds the size up to the
2390 * map's page size.
2391 * If we get a size that is explicitly not map-aligned here,
2392 * we'll have to respect the caller's wish and mark the
2393 * mapping as "not map-aligned" to avoid tripping the
2394 * map alignment checks later.
2395 */
2396 clear_map_aligned = TRUE;
2397 }
2398 if (!anywhere &&
2399 VM_MAP_PAGE_MASK(map) >= PAGE_MASK &&
2400 !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2401 /*
2402 * We've been asked to map at a fixed address and that
2403 * address is not aligned to the map's specific alignment.
2404 * The caller should know what it's doing (i.e. most likely
2405 * mapping some fragmented copy map, transferring memory from
2406 * a VM map with a different alignment), so clear map_aligned
2407 * for this new VM map entry and proceed.
2408 */
2409 clear_map_aligned = TRUE;
2410 }
2411
2412 /*
2413 * Only zero-fill objects are allowed to be purgable.
2414 * LP64todo - limit purgable objects to 32-bits for now
2415 */
2416 if (purgable &&
2417 (offset != 0 ||
2418 (object != VM_OBJECT_NULL &&
2419 (object->vo_size != size ||
2420 object->purgable == VM_PURGABLE_DENY))
2421 || size > ANON_MAX_SIZE)) { /* LP64todo: remove when dp capable */
2422 return KERN_INVALID_ARGUMENT;
2423 }
2424
2425 if (!anywhere && overwrite) {
2426 /*
2427 * Create a temporary VM map to hold the old mappings in the
2428 * affected area while we create the new one.
2429 * This avoids releasing the VM map lock in
2430 * vm_map_entry_delete() and allows atomicity
2431 * when we want to replace some mappings with a new one.
2432 * It also allows us to restore the old VM mappings if the
2433 * new mapping fails.
2434 */
2435 zap_old_map = vm_map_create(PMAP_NULL,
2436 *address,
2437 *address + size,
2438 map->hdr.entries_pageable);
2439 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
2440 vm_map_disable_hole_optimization(zap_old_map);
2441 }
2442
2443 StartAgain:;
2444
2445 start = *address;
2446
2447 if (anywhere) {
2448 vm_map_lock(map);
2449 map_locked = TRUE;
2450
2451 if (entry_for_jit) {
2452 if (map->jit_entry_exists &&
2453 !VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map)) {
2454 result = KERN_INVALID_ARGUMENT;
2455 goto BailOut;
2456 }
2457 if (VM_MAP_POLICY_ALLOW_JIT_RANDOM_ADDRESS(map)) {
2458 random_address = TRUE;
2459 }
2460 }
2461
2462 if (random_address) {
2463 /*
2464 * Get a random start address.
2465 */
2466 result = vm_map_random_address_for_size(map, address, size);
2467 if (result != KERN_SUCCESS) {
2468 goto BailOut;
2469 }
2470 start = *address;
2471 }
2472 #if XNU_TARGET_OS_OSX
2473 else if ((start == 0 || start == vm_map_min(map)) &&
2474 !map->disable_vmentry_reuse &&
2475 map->vmmap_high_start != 0) {
2476 start = map->vmmap_high_start;
2477 }
2478 #endif /* XNU_TARGET_OS_OSX */
2479
2480
2481 /*
2482 * Calculate the first possible address.
2483 */
2484
2485 if (start < effective_min_offset) {
2486 start = effective_min_offset;
2487 }
2488 if (start > effective_max_offset) {
2489 RETURN(KERN_NO_SPACE);
2490 }
2491
2492 /*
2493 * Look for the first possible address;
2494 * if there's already something at this
2495 * address, we have to start after it.
2496 */
2497
2498 if (map->disable_vmentry_reuse == TRUE) {
2499 VM_MAP_HIGHEST_ENTRY(map, entry, start);
2500 } else {
2501 if (map->holelistenabled) {
2502 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
2503
2504 if (hole_entry == NULL) {
2505 /*
2506 * No more space in the map?
2507 */
2508 result = KERN_NO_SPACE;
2509 goto BailOut;
2510 } else {
2511 boolean_t found_hole = FALSE;
2512
2513 do {
2514 if (hole_entry->vme_start >= start) {
2515 start = hole_entry->vme_start;
2516 found_hole = TRUE;
2517 break;
2518 }
2519
2520 if (hole_entry->vme_end > start) {
2521 found_hole = TRUE;
2522 break;
2523 }
2524 hole_entry = hole_entry->vme_next;
2525 } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
2526
2527 if (found_hole == FALSE) {
2528 result = KERN_NO_SPACE;
2529 goto BailOut;
2530 }
2531
2532 entry = hole_entry;
2533
2534 if (start == 0) {
2535 start += PAGE_SIZE_64;
2536 }
2537 }
2538 } else {
2539 assert(first_free_is_valid(map));
2540
2541 entry = map->first_free;
2542
2543 if (entry == vm_map_to_entry(map)) {
2544 entry = NULL;
2545 } else {
2546 if (entry->vme_next == vm_map_to_entry(map)) {
2547 /*
2548 * Hole at the end of the map.
2549 */
2550 entry = NULL;
2551 } else {
2552 if (start < (entry->vme_next)->vme_start) {
2553 start = entry->vme_end;
2554 start = vm_map_round_page(start,
2555 VM_MAP_PAGE_MASK(map));
2556 } else {
2557 /*
2558 * Need to do a lookup.
2559 */
2560 entry = NULL;
2561 }
2562 }
2563 }
2564
2565 if (entry == NULL) {
2566 vm_map_entry_t tmp_entry;
2567 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2568 assert(!entry_for_jit);
2569 start = tmp_entry->vme_end;
2570 start = vm_map_round_page(start,
2571 VM_MAP_PAGE_MASK(map));
2572 }
2573 entry = tmp_entry;
2574 }
2575 }
2576 }
2577
2578 /*
2579 * In any case, the "entry" always precedes
2580 * the proposed new region throughout the
2581 * loop:
2582 */
2583
2584 while (TRUE) {
2585 vm_map_entry_t next;
2586
2587 /*
2588 * Find the end of the proposed new region.
2589 * Be sure we didn't go beyond the end, or
2590 * wrap around the address.
2591 */
2592
2593 end = ((start + mask) & ~mask);
2594 end = vm_map_round_page(end,
2595 VM_MAP_PAGE_MASK(map));
2596 if (end < start) {
2597 RETURN(KERN_NO_SPACE);
2598 }
2599 start = end;
2600 assert(VM_MAP_PAGE_ALIGNED(start,
2601 VM_MAP_PAGE_MASK(map)));
2602 end += size;
2603
2604 /* We want an entire page of empty space, but don't increase the allocation size. */
2605 desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
2606
2607 if ((desired_empty_end > effective_max_offset) || (desired_empty_end < start)) {
2608 if (map->wait_for_space) {
2609 assert(!keep_map_locked);
2610 if (size <= (effective_max_offset -
2611 effective_min_offset)) {
2612 assert_wait((event_t)map,
2613 THREAD_ABORTSAFE);
2614 vm_map_unlock(map);
2615 map_locked = FALSE;
2616 thread_block(THREAD_CONTINUE_NULL);
2617 goto StartAgain;
2618 }
2619 }
2620 RETURN(KERN_NO_SPACE);
2621 }
2622
2623 next = entry->vme_next;
2624
2625 if (map->holelistenabled) {
2626 if (entry->vme_end >= desired_empty_end) {
2627 break;
2628 }
2629 } else {
2630 /*
2631 * If there are no more entries, we must win.
2632 *
2633 * OR
2634 *
2635 * If there is another entry, it must be
2636 * after the end of the potential new region.
2637 */
2638
2639 if (next == vm_map_to_entry(map)) {
2640 break;
2641 }
2642
2643 if (next->vme_start >= desired_empty_end) {
2644 break;
2645 }
2646 }
2647
2648 /*
2649 * Didn't fit -- move to the next entry.
2650 */
2651
2652 entry = next;
2653
2654 if (map->holelistenabled) {
2655 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
2656 /*
2657 * Wrapped around
2658 */
2659 result = KERN_NO_SPACE;
2660 goto BailOut;
2661 }
2662 start = entry->vme_start;
2663 } else {
2664 start = entry->vme_end;
2665 }
2666
2667 start = vm_map_round_page(start,
2668 VM_MAP_PAGE_MASK(map));
2669 }
2670
2671 if (map->holelistenabled) {
2672 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2673 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.", entry, (unsigned long long)entry->vme_start);
2674 }
2675 }
2676
2677 *address = start;
2678 assert(VM_MAP_PAGE_ALIGNED(*address,
2679 VM_MAP_PAGE_MASK(map)));
2680 } else {
2681 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT &&
2682 !overwrite &&
2683 user_alias == VM_MEMORY_REALLOC) {
2684 /*
2685 * Force realloc() to switch to a new allocation,
2686 * to prevent 4k-fragmented virtual ranges.
2687 */
2688 // DEBUG4K_ERROR("no realloc in place");
2689 return KERN_NO_SPACE;
2690 }
2691
2692 /*
2693 * Verify that:
2694 * the address doesn't itself violate
2695 * the mask requirement.
2696 */
2697
2698 vm_map_lock(map);
2699 map_locked = TRUE;
2700 if ((start & mask) != 0) {
2701 RETURN(KERN_NO_SPACE);
2702 }
2703
2704 /*
2705 * ... the address is within bounds
2706 */
2707
2708 end = start + size;
2709
2710 if ((start < effective_min_offset) ||
2711 (end > effective_max_offset) ||
2712 (start >= end)) {
2713 RETURN(KERN_INVALID_ADDRESS);
2714 }
2715
2716 if (overwrite && zap_old_map != VM_MAP_NULL) {
2717 int remove_flags;
2718 /*
2719 * Fixed mapping and "overwrite" flag: attempt to
2720 * remove all existing mappings in the specified
2721 * address range, saving them in our "zap_old_map".
2722 */
2723 remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES;
2724 remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
2725 if (vmk_flags.vmkf_overwrite_immutable) {
2726 /* we can overwrite immutable mappings */
2727 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2728 }
2729 (void) vm_map_delete(map, start, end,
2730 remove_flags,
2731 zap_old_map);
2732 }
2733
2734 /*
2735 * ... the starting address isn't allocated
2736 */
2737
2738 if (vm_map_lookup_entry(map, start, &entry)) {
2739 if (!(vmk_flags.vmkf_already)) {
2740 RETURN(KERN_NO_SPACE);
2741 }
2742 /*
2743 * Check if what's already there is what we want.
2744 */
2745 tmp_start = start;
2746 tmp_offset = offset;
2747 if (entry->vme_start < start) {
2748 tmp_start -= start - entry->vme_start;
2749 tmp_offset -= start - entry->vme_start;
2750 }
2751 for (; entry->vme_start < end;
2752 entry = entry->vme_next) {
2753 /*
2754 * Check if the mapping's attributes
2755 * match the existing map entry.
2756 */
2757 if (entry == vm_map_to_entry(map) ||
2758 entry->vme_start != tmp_start ||
2759 entry->is_sub_map != is_submap ||
2760 VME_OFFSET(entry) != tmp_offset ||
2761 entry->needs_copy != needs_copy ||
2762 entry->protection != cur_protection ||
2763 entry->max_protection != max_protection ||
2764 entry->inheritance != inheritance ||
2765 entry->iokit_acct != iokit_acct ||
2766 VME_ALIAS(entry) != alias) {
2767 /* not the same mapping ! */
2768 RETURN(KERN_NO_SPACE);
2769 }
2770 /*
2771 * Check if the same object is being mapped.
2772 */
2773 if (is_submap) {
2774 if (VME_SUBMAP(entry) !=
2775 (vm_map_t) object) {
2776 /* not the same submap */
2777 RETURN(KERN_NO_SPACE);
2778 }
2779 } else {
2780 if (VME_OBJECT(entry) != object) {
2781 /* not the same VM object... */
2782 vm_object_t obj2;
2783
2784 obj2 = VME_OBJECT(entry);
2785 if ((obj2 == VM_OBJECT_NULL ||
2786 obj2->internal) &&
2787 (object == VM_OBJECT_NULL ||
2788 object->internal)) {
2789 /*
2790 * ... but both are
2791 * anonymous memory,
2792 * so equivalent.
2793 */
2794 } else {
2795 RETURN(KERN_NO_SPACE);
2796 }
2797 }
2798 }
2799
2800 tmp_offset += entry->vme_end - entry->vme_start;
2801 tmp_start += entry->vme_end - entry->vme_start;
2802 if (entry->vme_end >= end) {
2803 /* reached the end of our mapping */
2804 break;
2805 }
2806 }
2807 /* it all matches: let's use what's already there ! */
2808 RETURN(KERN_MEMORY_PRESENT);
2809 }
2810
2811 /*
2812 * ... the next region doesn't overlap the
2813 * end point.
2814 */
2815
2816 if ((entry->vme_next != vm_map_to_entry(map)) &&
2817 (entry->vme_next->vme_start < end)) {
2818 RETURN(KERN_NO_SPACE);
2819 }
2820 }
2821
2822 /*
2823 * At this point,
2824 * "start" and "end" should define the endpoints of the
2825 * available new range, and
2826 * "entry" should refer to the region before the new
2827 * range, and
2828 *
2829 * the map should be locked.
2830 */
2831
2832 /*
2833 * See whether we can avoid creating a new entry (and object) by
2834 * extending one of our neighbors. [So far, we only attempt to
2835 * extend from below.] Note that we can never extend/join
2836 * purgable objects because they need to remain distinct
2837 * entities in order to implement their "volatile object"
2838 * semantics.
2839 */
2840
2841 if (purgable ||
2842 entry_for_jit ||
2843 vm_memory_malloc_no_cow(user_alias)) {
2844 if (object == VM_OBJECT_NULL) {
2845 object = vm_object_allocate(size);
2846 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2847 object->true_share = FALSE;
2848 if (purgable) {
2849 task_t owner;
2850 object->purgable = VM_PURGABLE_NONVOLATILE;
2851 if (map->pmap == kernel_pmap) {
2852 /*
2853 * Purgeable mappings made in a kernel
2854 * map are "owned" by the kernel itself
2855 * rather than the current user task
2856 * because they're likely to be used by
2857 * more than this user task (see
2858 * execargs_purgeable_allocate(), for
2859 * example).
2860 */
2861 owner = kernel_task;
2862 } else {
2863 owner = current_task();
2864 }
2865 assert(object->vo_owner == NULL);
2866 assert(object->resident_page_count == 0);
2867 assert(object->wired_page_count == 0);
2868 vm_object_lock(object);
2869 vm_purgeable_nonvolatile_enqueue(object, owner);
2870 vm_object_unlock(object);
2871 }
2872 offset = (vm_object_offset_t)0;
2873 }
2874 } else if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
2875 /* no coalescing if address space uses sub-pages */
2876 } else if ((is_submap == FALSE) &&
2877 (object == VM_OBJECT_NULL) &&
2878 (entry != vm_map_to_entry(map)) &&
2879 (entry->vme_end == start) &&
2880 (!entry->is_shared) &&
2881 (!entry->is_sub_map) &&
2882 (!entry->in_transition) &&
2883 (!entry->needs_wakeup) &&
2884 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2885 (entry->protection == cur_protection) &&
2886 (entry->max_protection == max_protection) &&
2887 (entry->inheritance == inheritance) &&
2888 ((user_alias == VM_MEMORY_REALLOC) ||
2889 (VME_ALIAS(entry) == alias)) &&
2890 (entry->no_cache == no_cache) &&
2891 (entry->permanent == permanent) &&
2892 /* no coalescing for immutable executable mappings */
2893 !((entry->protection & VM_PROT_EXECUTE) &&
2894 entry->permanent) &&
2895 (!entry->superpage_size && !superpage_size) &&
2896 /*
2897 * No coalescing if not map-aligned, to avoid propagating
2898 * that condition any further than needed:
2899 */
2900 (!entry->map_aligned || !clear_map_aligned) &&
2901 (!entry->zero_wired_pages) &&
2902 (!entry->used_for_jit && !entry_for_jit) &&
2903 (!entry->pmap_cs_associated) &&
2904 (entry->iokit_acct == iokit_acct) &&
2905 (!entry->vme_resilient_codesign) &&
2906 (!entry->vme_resilient_media) &&
2907 (!entry->vme_atomic) &&
2908 (entry->vme_no_copy_on_read == no_copy_on_read) &&
2909
2910 ((entry->vme_end - entry->vme_start) + size <=
2911 (user_alias == VM_MEMORY_REALLOC ?
2912 ANON_CHUNK_SIZE :
2913 NO_COALESCE_LIMIT)) &&
2914
2915 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
2916 if (vm_object_coalesce(VME_OBJECT(entry),
2917 VM_OBJECT_NULL,
2918 VME_OFFSET(entry),
2919 (vm_object_offset_t) 0,
2920 (vm_map_size_t)(entry->vme_end - entry->vme_start),
2921 (vm_map_size_t)(end - entry->vme_end))) {
2922 /*
2923 * Coalesced the two objects - can extend
2924 * the previous map entry to include the
2925 * new range.
2926 */
2927 map->size += (end - entry->vme_end);
2928 assert(entry->vme_start < end);
2929 assert(VM_MAP_PAGE_ALIGNED(end,
2930 VM_MAP_PAGE_MASK(map)));
2931 if (__improbable(vm_debug_events)) {
2932 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
2933 }
2934 entry->vme_end = end;
2935 if (map->holelistenabled) {
2936 vm_map_store_update_first_free(map, entry, TRUE);
2937 } else {
2938 vm_map_store_update_first_free(map, map->first_free, TRUE);
2939 }
2940 new_mapping_established = TRUE;
2941 RETURN(KERN_SUCCESS);
2942 }
2943 }
2944
2945 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2946 new_entry = NULL;
2947
2948 for (tmp2_start = start; tmp2_start < end; tmp2_start += step) {
2949 tmp2_end = tmp2_start + step;
2950 /*
2951 * Create a new entry
2952 *
2953 * XXX FBDP
2954 * The reserved "page zero" in each process's address space can
2955 * be arbitrarily large. Splitting it into separate objects and
2956 * therefore different VM map entries serves no purpose and just
2957 * slows down operations on the VM map, so let's not split the
2958 * allocation into chunks if the max protection is NONE. That
2959 * memory should never be accessible, so it will never get to the
2960 * default pager.
2961 */
2962 tmp_start = tmp2_start;
2963 if (object == VM_OBJECT_NULL &&
2964 size > chunk_size &&
2965 max_protection != VM_PROT_NONE &&
2966 superpage_size == 0) {
2967 tmp_end = tmp_start + chunk_size;
2968 } else {
2969 tmp_end = tmp2_end;
2970 }
2971 do {
2972 if (!is_submap &&
2973 object != VM_OBJECT_NULL &&
2974 object->internal &&
2975 offset + (tmp_end - tmp_start) > object->vo_size) {
2976 // printf("FBDP object %p size 0x%llx overmapping offset 0x%llx size 0x%llx\n", object, object->vo_size, offset, (uint64_t)(tmp_end - tmp_start));
2977 DTRACE_VM5(vm_map_enter_overmap,
2978 vm_map_t, map,
2979 vm_map_address_t, tmp_start,
2980 vm_map_address_t, tmp_end,
2981 vm_object_offset_t, offset,
2982 vm_object_size_t, object->vo_size);
2983 }
2984 new_entry = vm_map_entry_insert(map,
2985 entry, tmp_start, tmp_end,
2986 object, offset, vmk_flags,
2987 needs_copy, FALSE, FALSE,
2988 cur_protection, max_protection,
2989 VM_BEHAVIOR_DEFAULT,
2990 (entry_for_jit && !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map) ?
2991 VM_INHERIT_NONE : inheritance),
2992 0,
2993 no_cache,
2994 permanent,
2995 no_copy_on_read,
2996 superpage_size,
2997 clear_map_aligned,
2998 is_submap,
2999 entry_for_jit,
3000 alias,
3001 translated_allow_execute);
3002
3003 assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
3004
3005 if (resilient_codesign) {
3006 int reject_prot = (needs_copy ? VM_PROT_ALLEXEC : (VM_PROT_WRITE | VM_PROT_ALLEXEC));
3007 if (!((cur_protection | max_protection) & reject_prot)) {
3008 new_entry->vme_resilient_codesign = TRUE;
3009 }
3010 }
3011
3012 if (resilient_media &&
3013 (object == VM_OBJECT_NULL ||
3014 object->internal)) {
3015 new_entry->vme_resilient_media = TRUE;
3016 }
3017
3018 assert(!new_entry->iokit_acct);
3019 if (!is_submap &&
3020 object != VM_OBJECT_NULL &&
3021 (object->purgable != VM_PURGABLE_DENY ||
3022 object->vo_ledger_tag)) {
3023 assert(new_entry->use_pmap);
3024 assert(!new_entry->iokit_acct);
3025 /*
3026 * Turn off pmap accounting since
3027 * purgeable (or tagged) objects have their
3028 * own ledgers.
3029 */
3030 new_entry->use_pmap = FALSE;
3031 } else if (!is_submap &&
3032 iokit_acct &&
3033 object != VM_OBJECT_NULL &&
3034 object->internal) {
3035 /* alternate accounting */
3036 assert(!new_entry->iokit_acct);
3037 assert(new_entry->use_pmap);
3038 new_entry->iokit_acct = TRUE;
3039 new_entry->use_pmap = FALSE;
3040 DTRACE_VM4(
3041 vm_map_iokit_mapped_region,
3042 vm_map_t, map,
3043 vm_map_offset_t, new_entry->vme_start,
3044 vm_map_offset_t, new_entry->vme_end,
3045 int, VME_ALIAS(new_entry));
3046 vm_map_iokit_mapped_region(
3047 map,
3048 (new_entry->vme_end -
3049 new_entry->vme_start));
3050 } else if (!is_submap) {
3051 assert(!new_entry->iokit_acct);
3052 assert(new_entry->use_pmap);
3053 }
3054
3055 if (is_submap) {
3056 vm_map_t submap;
3057 boolean_t submap_is_64bit;
3058 boolean_t use_pmap;
3059
3060 assert(new_entry->is_sub_map);
3061 assert(!new_entry->use_pmap);
3062 assert(!new_entry->iokit_acct);
3063 submap = (vm_map_t) object;
3064 submap_is_64bit = vm_map_is_64bit(submap);
3065 use_pmap = vmk_flags.vmkf_nested_pmap;
3066 #ifndef NO_NESTED_PMAP
3067 if (use_pmap && submap->pmap == NULL) {
3068 ledger_t ledger = map->pmap->ledger;
3069 /* we need a sub pmap to nest... */
3070 submap->pmap = pmap_create_options(ledger, 0,
3071 submap_is_64bit ? PMAP_CREATE_64BIT : 0);
3072 if (submap->pmap == NULL) {
3073 /* let's proceed without nesting... */
3074 }
3075 #if defined(__arm__) || defined(__arm64__)
3076 else {
3077 pmap_set_nested(submap->pmap);
3078 }
3079 #endif
3080 }
3081 if (use_pmap && submap->pmap != NULL) {
3082 if (VM_MAP_PAGE_SHIFT(map) != VM_MAP_PAGE_SHIFT(submap)) {
3083 DEBUG4K_ERROR("map %p (%d) submap %p (%d): incompatible page sizes\n", map, VM_MAP_PAGE_SHIFT(map), submap, VM_MAP_PAGE_SHIFT(submap));
3084 kr = KERN_FAILURE;
3085 } else {
3086 kr = pmap_nest(map->pmap,
3087 submap->pmap,
3088 tmp_start,
3089 tmp_end - tmp_start);
3090 }
3091 if (kr != KERN_SUCCESS) {
3092 printf("vm_map_enter: "
3093 "pmap_nest(0x%llx,0x%llx) "
3094 "error 0x%x\n",
3095 (long long)tmp_start,
3096 (long long)tmp_end,
3097 kr);
3098 } else {
3099 /* we're now nested ! */
3100 new_entry->use_pmap = TRUE;
3101 pmap_empty = FALSE;
3102 }
3103 }
3104 #endif /* NO_NESTED_PMAP */
3105 }
3106 entry = new_entry;
3107
3108 if (superpage_size) {
3109 vm_page_t pages, m;
3110 vm_object_t sp_object;
3111 vm_object_offset_t sp_offset;
3112
3113 VME_OFFSET_SET(entry, 0);
3114
3115 /* allocate one superpage */
3116 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES - 1, TRUE, 0);
3117 if (kr != KERN_SUCCESS) {
3118 /* deallocate whole range... */
3119 new_mapping_established = TRUE;
3120 /* ... but only up to "tmp_end" */
3121 size -= end - tmp_end;
3122 RETURN(kr);
3123 }
3124
3125 /* create one vm_object per superpage */
3126 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
3127 sp_object->phys_contiguous = TRUE;
3128 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages) * PAGE_SIZE;
3129 VME_OBJECT_SET(entry, sp_object);
3130 assert(entry->use_pmap);
3131
3132 /* enter the base pages into the object */
3133 vm_object_lock(sp_object);
3134 for (sp_offset = 0;
3135 sp_offset < SUPERPAGE_SIZE;
3136 sp_offset += PAGE_SIZE) {
3137 m = pages;
3138 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
3139 pages = NEXT_PAGE(m);
3140 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3141 vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
3142 }
3143 vm_object_unlock(sp_object);
3144 }
3145 } while (tmp_end != tmp2_end &&
3146 (tmp_start = tmp_end) &&
3147 (tmp_end = (tmp2_end - tmp_end > chunk_size) ?
3148 tmp_end + chunk_size : tmp2_end));
3149 }
3150
3151 new_mapping_established = TRUE;
3152
3153 BailOut:
3154 assert(map_locked == TRUE);
3155
3156 /*
3157 * Address space limit enforcement (RLIMIT_AS and RLIMIT_DATA):
3158 * If we have identified and possibly established the new mapping(s),
3159 * make sure we did not go beyond the address space limit.
3160 */
3161 if (result == KERN_SUCCESS) {
3162 if (map->size_limit != RLIM_INFINITY &&
3163 map->size > map->size_limit) {
3164 /*
3165 * Establishing the requested mappings would exceed
3166 * the process's RLIMIT_AS limit: fail with
3167 * KERN_NO_SPACE.
3168 */
3169 result = KERN_NO_SPACE;
3170 printf("%d[%s] %s: map size 0x%llx over RLIMIT_AS 0x%llx\n",
3171 proc_selfpid(),
3172 (current_task()->bsd_info
3173 ? proc_name_address(current_task()->bsd_info)
3174 : "?"),
3175 __FUNCTION__,
3176 (uint64_t) map->size,
3177 (uint64_t) map->size_limit);
3178 DTRACE_VM2(vm_map_enter_RLIMIT_AS,
3179 vm_map_size_t, map->size,
3180 uint64_t, map->size_limit);
3181 vm_map_enter_RLIMIT_AS_count++;
3182 } else if (map->data_limit != RLIM_INFINITY &&
3183 map->size > map->data_limit) {
3184 /*
3185 * Establishing the requested mappings would exceed
3186 * the process's RLIMIT_DATA limit: fail with
3187 * KERN_NO_SPACE.
3188 */
3189 result = KERN_NO_SPACE;
3190 printf("%d[%s] %s: map size 0x%llx over RLIMIT_DATA 0x%llx\n",
3191 proc_selfpid(),
3192 (current_task()->bsd_info
3193 ? proc_name_address(current_task()->bsd_info)
3194 : "?"),
3195 __FUNCTION__,
3196 (uint64_t) map->size,
3197 (uint64_t) map->data_limit);
3198 DTRACE_VM2(vm_map_enter_RLIMIT_DATA,
3199 vm_map_size_t, map->size,
3200 uint64_t, map->data_limit);
3201 vm_map_enter_RLIMIT_DATA_count++;
3202 }
3203 }
3204
3205 if (result == KERN_SUCCESS) {
3206 vm_prot_t pager_prot;
3207 memory_object_t pager;
3208
3209 #if DEBUG
3210 if (pmap_empty &&
3211 !(vmk_flags.vmkf_no_pmap_check)) {
3212 assert(pmap_is_empty(map->pmap,
3213 *address,
3214 *address + size));
3215 }
3216 #endif /* DEBUG */
3217
3218 /*
3219 * For "named" VM objects, let the pager know that the
3220 * memory object is being mapped. Some pagers need to keep
3221 * track of this, to know when they can reclaim the memory
3222 * object, for example.
3223 * VM calls memory_object_map() for each mapping (specifying
3224 * the protection of each mapping) and calls
3225 * memory_object_last_unmap() when all the mappings are gone.
3226 */
3227 pager_prot = max_protection;
3228 if (needs_copy) {
3229 /*
3230 * Copy-On-Write mapping: won't modify
3231 * the memory object.
3232 */
3233 pager_prot &= ~VM_PROT_WRITE;
3234 }
3235 if (!is_submap &&
3236 object != VM_OBJECT_NULL &&
3237 object->named &&
3238 object->pager != MEMORY_OBJECT_NULL) {
3239 vm_object_lock(object);
3240 pager = object->pager;
3241 if (object->named &&
3242 pager != MEMORY_OBJECT_NULL) {
3243 assert(object->pager_ready);
3244 vm_object_mapping_wait(object, THREAD_UNINT);
3245 vm_object_mapping_begin(object);
3246 vm_object_unlock(object);
3247
3248 kr = memory_object_map(pager, pager_prot);
3249 assert(kr == KERN_SUCCESS);
3250
3251 vm_object_lock(object);
3252 vm_object_mapping_end(object);
3253 }
3254 vm_object_unlock(object);
3255 }
3256 }
3257
3258 assert(map_locked == TRUE);
3259
3260 if (!keep_map_locked) {
3261 vm_map_unlock(map);
3262 map_locked = FALSE;
3263 }
3264
3265 /*
3266 * We can't hold the map lock if we enter this block.
3267 */
3268
3269 if (result == KERN_SUCCESS) {
3270 /* Wire down the new entry if the user
3271 * requested all new map entries be wired.
3272 */
3273 if ((map->wiring_required) || (superpage_size)) {
3274 assert(!keep_map_locked);
3275 pmap_empty = FALSE; /* pmap won't be empty */
3276 kr = vm_map_wire_kernel(map, start, end,
3277 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3278 TRUE);
3279 result = kr;
3280 }
3281
3282 }
3283
3284 if (result != KERN_SUCCESS) {
3285 if (new_mapping_established) {
3286 /*
3287 * The caller had an extra reference on the VM object
3288 * it gave us.
3289 * We've transferred that reference to the mapping we
3290 * just established but we're about to undo that mapping
3291 * and release that reference.
3292 * The caller expects its reference to be consumed on
3293 * success only, so we have to get the extra reference
3294 * back for the caller.
3295 */
3296 vm_object_reference(caller_object);
3297
3298 /*
3299 * We have to get rid of the new mappings since we
3300 * won't make them available to the user.
3301 * Try and do that atomically, to minimize the risk
3302 * that someone else create new mappings that range.
3303 */
3304 zap_new_map = vm_map_create(PMAP_NULL,
3305 *address,
3306 *address + size,
3307 map->hdr.entries_pageable);
3308 vm_map_set_page_shift(zap_new_map,
3309 VM_MAP_PAGE_SHIFT(map));
3310 vm_map_disable_hole_optimization(zap_new_map);
3311
3312 if (!map_locked) {
3313 vm_map_lock(map);
3314 map_locked = TRUE;
3315 }
3316 (void) vm_map_delete(map, *address, *address + size,
3317 (VM_MAP_REMOVE_SAVE_ENTRIES |
3318 VM_MAP_REMOVE_NO_MAP_ALIGN),
3319 zap_new_map);
3320 }
3321 if (zap_old_map != VM_MAP_NULL &&
3322 zap_old_map->hdr.nentries != 0) {
3323 vm_map_entry_t entry1, entry2;
3324
3325 /*
3326 * The new mapping failed. Attempt to restore
3327 * the old mappings, saved in the "zap_old_map".
3328 */
3329 if (!map_locked) {
3330 vm_map_lock(map);
3331 map_locked = TRUE;
3332 }
3333
3334 /* first check if the coast is still clear */
3335 start = vm_map_first_entry(zap_old_map)->vme_start;
3336 end = vm_map_last_entry(zap_old_map)->vme_end;
3337 if (vm_map_lookup_entry(map, start, &entry1) ||
3338 vm_map_lookup_entry(map, end, &entry2) ||
3339 entry1 != entry2) {
3340 /*
3341 * Part of that range has already been
3342 * re-mapped: we can't restore the old
3343 * mappings...
3344 */
3345 vm_map_enter_restore_failures++;
3346 } else {
3347 /*
3348 * Transfer the saved map entries from
3349 * "zap_old_map" to the original "map",
3350 * inserting them all after "entry1".
3351 */
3352 for (entry2 = vm_map_first_entry(zap_old_map);
3353 entry2 != vm_map_to_entry(zap_old_map);
3354 entry2 = vm_map_first_entry(zap_old_map)) {
3355 vm_map_size_t entry_size;
3356
3357 entry_size = (entry2->vme_end -
3358 entry2->vme_start);
3359 vm_map_store_entry_unlink(zap_old_map,
3360 entry2);
3361 zap_old_map->size -= entry_size;
3362 vm_map_store_entry_link(map, entry1, entry2,
3363 VM_MAP_KERNEL_FLAGS_NONE);
3364 map->size += entry_size;
3365 entry1 = entry2;
3366 }
3367 if (map->wiring_required) {
3368 /*
3369 * XXX TODO: we should rewire the
3370 * old pages here...
3371 */
3372 }
3373 vm_map_enter_restore_successes++;
3374 }
3375 }
3376 }
3377
3378 /*
3379 * The caller is responsible for releasing the lock if it requested to
3380 * keep the map locked.
3381 */
3382 if (map_locked && !keep_map_locked) {
3383 vm_map_unlock(map);
3384 }
3385
3386 /*
3387 * Get rid of the "zap_maps" and all the map entries that
3388 * they may still contain.
3389 */
3390 if (zap_old_map != VM_MAP_NULL) {
3391 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3392 zap_old_map = VM_MAP_NULL;
3393 }
3394 if (zap_new_map != VM_MAP_NULL) {
3395 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3396 zap_new_map = VM_MAP_NULL;
3397 }
3398
3399 return result;
3400
3401 #undef RETURN
3402 }
3403
3404 #if __arm64__
3405 extern const struct memory_object_pager_ops fourk_pager_ops;
3406 kern_return_t
vm_map_enter_fourk(vm_map_t map,vm_map_offset_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t alias,vm_object_t object,vm_object_offset_t offset,boolean_t needs_copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)3407 vm_map_enter_fourk(
3408 vm_map_t map,
3409 vm_map_offset_t *address, /* IN/OUT */
3410 vm_map_size_t size,
3411 vm_map_offset_t mask,
3412 int flags,
3413 vm_map_kernel_flags_t vmk_flags,
3414 vm_tag_t alias,
3415 vm_object_t object,
3416 vm_object_offset_t offset,
3417 boolean_t needs_copy,
3418 vm_prot_t cur_protection,
3419 vm_prot_t max_protection,
3420 vm_inherit_t inheritance)
3421 {
3422 vm_map_entry_t entry, new_entry;
3423 vm_map_offset_t start, fourk_start;
3424 vm_map_offset_t end, fourk_end;
3425 vm_map_size_t fourk_size;
3426 kern_return_t result = KERN_SUCCESS;
3427 vm_map_t zap_old_map = VM_MAP_NULL;
3428 vm_map_t zap_new_map = VM_MAP_NULL;
3429 boolean_t map_locked = FALSE;
3430 boolean_t pmap_empty = TRUE;
3431 boolean_t new_mapping_established = FALSE;
3432 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3433 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
3434 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
3435 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
3436 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
3437 boolean_t is_submap = vmk_flags.vmkf_submap;
3438 boolean_t permanent = vmk_flags.vmkf_permanent;
3439 boolean_t no_copy_on_read = vmk_flags.vmkf_permanent;
3440 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
3441 // boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
3442 boolean_t translated_allow_execute = vmk_flags.vmkf_translated_allow_execute;
3443 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3444 vm_map_offset_t effective_min_offset, effective_max_offset;
3445 kern_return_t kr;
3446 boolean_t clear_map_aligned = FALSE;
3447 memory_object_t fourk_mem_obj;
3448 vm_object_t fourk_object;
3449 vm_map_offset_t fourk_pager_offset;
3450 int fourk_pager_index_start, fourk_pager_index_num;
3451 int cur_idx;
3452 boolean_t fourk_copy;
3453 vm_object_t copy_object;
3454 vm_object_offset_t copy_offset;
3455
3456 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
3457 panic("%s:%d", __FUNCTION__, __LINE__);
3458 }
3459 fourk_mem_obj = MEMORY_OBJECT_NULL;
3460 fourk_object = VM_OBJECT_NULL;
3461
3462 if (superpage_size) {
3463 return KERN_NOT_SUPPORTED;
3464 }
3465
3466 if ((cur_protection & VM_PROT_WRITE) &&
3467 (cur_protection & VM_PROT_EXECUTE) &&
3468 #if XNU_TARGET_OS_OSX
3469 map->pmap != kernel_pmap &&
3470 (vm_map_cs_enforcement(map)
3471 #if __arm64__
3472 || !VM_MAP_IS_EXOTIC(map)
3473 #endif /* __arm64__ */
3474 ) &&
3475 #endif /* XNU_TARGET_OS_OSX */
3476 !entry_for_jit) {
3477 DTRACE_VM3(cs_wx,
3478 uint64_t, 0,
3479 uint64_t, 0,
3480 vm_prot_t, cur_protection);
3481 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
3482 "turning off execute\n",
3483 proc_selfpid(),
3484 (current_task()->bsd_info
3485 ? proc_name_address(current_task()->bsd_info)
3486 : "?"),
3487 __FUNCTION__);
3488 cur_protection &= ~VM_PROT_EXECUTE;
3489 }
3490
3491 /*
3492 * If the task has requested executable lockdown,
3493 * deny any new executable mapping.
3494 */
3495 if (map->map_disallow_new_exec == TRUE) {
3496 if (cur_protection & VM_PROT_EXECUTE) {
3497 return KERN_PROTECTION_FAILURE;
3498 }
3499 }
3500
3501 if (is_submap) {
3502 return KERN_NOT_SUPPORTED;
3503 }
3504 if (vmk_flags.vmkf_already) {
3505 return KERN_NOT_SUPPORTED;
3506 }
3507 if (purgable || entry_for_jit) {
3508 return KERN_NOT_SUPPORTED;
3509 }
3510
3511 effective_min_offset = map->min_offset;
3512
3513 if (vmk_flags.vmkf_beyond_max) {
3514 return KERN_NOT_SUPPORTED;
3515 } else {
3516 effective_max_offset = map->max_offset;
3517 }
3518
3519 if (size == 0 ||
3520 (offset & FOURK_PAGE_MASK) != 0) {
3521 *address = 0;
3522 return KERN_INVALID_ARGUMENT;
3523 }
3524
3525 #define RETURN(value) { result = value; goto BailOut; }
3526
3527 assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3528 assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3529
3530 if (!anywhere && overwrite) {
3531 return KERN_NOT_SUPPORTED;
3532 }
3533 if (!anywhere && overwrite) {
3534 /*
3535 * Create a temporary VM map to hold the old mappings in the
3536 * affected area while we create the new one.
3537 * This avoids releasing the VM map lock in
3538 * vm_map_entry_delete() and allows atomicity
3539 * when we want to replace some mappings with a new one.
3540 * It also allows us to restore the old VM mappings if the
3541 * new mapping fails.
3542 */
3543 zap_old_map = vm_map_create(PMAP_NULL,
3544 *address,
3545 *address + size,
3546 map->hdr.entries_pageable);
3547 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3548 vm_map_disable_hole_optimization(zap_old_map);
3549 }
3550
3551 fourk_start = *address;
3552 fourk_size = size;
3553 fourk_end = fourk_start + fourk_size;
3554
3555 start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3556 end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3557 size = end - start;
3558
3559 if (anywhere) {
3560 return KERN_NOT_SUPPORTED;
3561 } else {
3562 /*
3563 * Verify that:
3564 * the address doesn't itself violate
3565 * the mask requirement.
3566 */
3567
3568 vm_map_lock(map);
3569 map_locked = TRUE;
3570 if ((start & mask) != 0) {
3571 RETURN(KERN_NO_SPACE);
3572 }
3573
3574 /*
3575 * ... the address is within bounds
3576 */
3577
3578 end = start + size;
3579
3580 if ((start < effective_min_offset) ||
3581 (end > effective_max_offset) ||
3582 (start >= end)) {
3583 RETURN(KERN_INVALID_ADDRESS);
3584 }
3585
3586 if (overwrite && zap_old_map != VM_MAP_NULL) {
3587 /*
3588 * Fixed mapping and "overwrite" flag: attempt to
3589 * remove all existing mappings in the specified
3590 * address range, saving them in our "zap_old_map".
3591 */
3592 (void) vm_map_delete(map, start, end,
3593 (VM_MAP_REMOVE_SAVE_ENTRIES |
3594 VM_MAP_REMOVE_NO_MAP_ALIGN),
3595 zap_old_map);
3596 }
3597
3598 /*
3599 * ... the starting address isn't allocated
3600 */
3601 if (vm_map_lookup_entry(map, start, &entry)) {
3602 vm_object_t cur_object, shadow_object;
3603
3604 /*
3605 * We might already some 4K mappings
3606 * in a 16K page here.
3607 */
3608
3609 if (entry->vme_end - entry->vme_start
3610 != SIXTEENK_PAGE_SIZE) {
3611 RETURN(KERN_NO_SPACE);
3612 }
3613 if (entry->is_sub_map) {
3614 RETURN(KERN_NO_SPACE);
3615 }
3616 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3617 RETURN(KERN_NO_SPACE);
3618 }
3619
3620 /* go all the way down the shadow chain */
3621 cur_object = VME_OBJECT(entry);
3622 vm_object_lock(cur_object);
3623 while (cur_object->shadow != VM_OBJECT_NULL) {
3624 shadow_object = cur_object->shadow;
3625 vm_object_lock(shadow_object);
3626 vm_object_unlock(cur_object);
3627 cur_object = shadow_object;
3628 shadow_object = VM_OBJECT_NULL;
3629 }
3630 if (cur_object->internal ||
3631 cur_object->pager == NULL) {
3632 vm_object_unlock(cur_object);
3633 RETURN(KERN_NO_SPACE);
3634 }
3635 if (cur_object->pager->mo_pager_ops
3636 != &fourk_pager_ops) {
3637 vm_object_unlock(cur_object);
3638 RETURN(KERN_NO_SPACE);
3639 }
3640 fourk_object = cur_object;
3641 fourk_mem_obj = fourk_object->pager;
3642
3643 /* keep the "4K" object alive */
3644 vm_object_reference_locked(fourk_object);
3645 memory_object_reference(fourk_mem_obj);
3646 vm_object_unlock(fourk_object);
3647
3648 /* merge permissions */
3649 entry->protection |= cur_protection;
3650 entry->max_protection |= max_protection;
3651
3652 if ((entry->protection & VM_PROT_WRITE) &&
3653 (entry->protection & VM_PROT_ALLEXEC) &&
3654 fourk_binary_compatibility_unsafe &&
3655 fourk_binary_compatibility_allow_wx) {
3656 /* write+execute: need to be "jit" */
3657 entry->used_for_jit = TRUE;
3658 }
3659 goto map_in_fourk_pager;
3660 }
3661
3662 /*
3663 * ... the next region doesn't overlap the
3664 * end point.
3665 */
3666
3667 if ((entry->vme_next != vm_map_to_entry(map)) &&
3668 (entry->vme_next->vme_start < end)) {
3669 RETURN(KERN_NO_SPACE);
3670 }
3671 }
3672
3673 /*
3674 * At this point,
3675 * "start" and "end" should define the endpoints of the
3676 * available new range, and
3677 * "entry" should refer to the region before the new
3678 * range, and
3679 *
3680 * the map should be locked.
3681 */
3682
3683 /* create a new "4K" pager */
3684 fourk_mem_obj = fourk_pager_create();
3685 fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3686 assert(fourk_object);
3687
3688 /* keep the "4" object alive */
3689 vm_object_reference(fourk_object);
3690
3691 /* create a "copy" object, to map the "4K" object copy-on-write */
3692 fourk_copy = TRUE;
3693 result = vm_object_copy_strategically(fourk_object,
3694 0,
3695 end - start,
3696 ©_object,
3697 ©_offset,
3698 &fourk_copy);
3699 assert(result == KERN_SUCCESS);
3700 assert(copy_object != VM_OBJECT_NULL);
3701 assert(copy_offset == 0);
3702
3703 /* map the "4K" pager's copy object */
3704 new_entry =
3705 vm_map_entry_insert(map, entry,
3706 vm_map_trunc_page(start,
3707 VM_MAP_PAGE_MASK(map)),
3708 vm_map_round_page(end,
3709 VM_MAP_PAGE_MASK(map)),
3710 copy_object,
3711 0, /* offset */
3712 vmk_flags,
3713 FALSE, /* needs_copy */
3714 FALSE,
3715 FALSE,
3716 cur_protection, max_protection,
3717 VM_BEHAVIOR_DEFAULT,
3718 (entry_for_jit && !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map) ?
3719 VM_INHERIT_NONE : inheritance),
3720 0,
3721 no_cache,
3722 permanent,
3723 no_copy_on_read,
3724 superpage_size,
3725 clear_map_aligned,
3726 is_submap,
3727 FALSE, /* jit */
3728 alias,
3729 translated_allow_execute);
3730 entry = new_entry;
3731
3732 #if VM_MAP_DEBUG_FOURK
3733 if (vm_map_debug_fourk) {
3734 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3735 map,
3736 (uint64_t) entry->vme_start,
3737 (uint64_t) entry->vme_end,
3738 fourk_mem_obj);
3739 }
3740 #endif /* VM_MAP_DEBUG_FOURK */
3741
3742 new_mapping_established = TRUE;
3743
3744 map_in_fourk_pager:
3745 /* "map" the original "object" where it belongs in the "4K" pager */
3746 fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3747 fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3748 if (fourk_size > SIXTEENK_PAGE_SIZE) {
3749 fourk_pager_index_num = 4;
3750 } else {
3751 fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3752 }
3753 if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3754 fourk_pager_index_num = 4 - fourk_pager_index_start;
3755 }
3756 for (cur_idx = 0;
3757 cur_idx < fourk_pager_index_num;
3758 cur_idx++) {
3759 vm_object_t old_object;
3760 vm_object_offset_t old_offset;
3761
3762 kr = fourk_pager_populate(fourk_mem_obj,
3763 TRUE, /* overwrite */
3764 fourk_pager_index_start + cur_idx,
3765 object,
3766 (object
3767 ? (offset +
3768 (cur_idx * FOURK_PAGE_SIZE))
3769 : 0),
3770 &old_object,
3771 &old_offset);
3772 #if VM_MAP_DEBUG_FOURK
3773 if (vm_map_debug_fourk) {
3774 if (old_object == (vm_object_t) -1 &&
3775 old_offset == (vm_object_offset_t) -1) {
3776 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3777 "pager [%p:0x%llx] "
3778 "populate[%d] "
3779 "[object:%p,offset:0x%llx]\n",
3780 map,
3781 (uint64_t) entry->vme_start,
3782 (uint64_t) entry->vme_end,
3783 fourk_mem_obj,
3784 VME_OFFSET(entry),
3785 fourk_pager_index_start + cur_idx,
3786 object,
3787 (object
3788 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3789 : 0));
3790 } else {
3791 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3792 "pager [%p:0x%llx] "
3793 "populate[%d] [object:%p,offset:0x%llx] "
3794 "old [%p:0x%llx]\n",
3795 map,
3796 (uint64_t) entry->vme_start,
3797 (uint64_t) entry->vme_end,
3798 fourk_mem_obj,
3799 VME_OFFSET(entry),
3800 fourk_pager_index_start + cur_idx,
3801 object,
3802 (object
3803 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3804 : 0),
3805 old_object,
3806 old_offset);
3807 }
3808 }
3809 #endif /* VM_MAP_DEBUG_FOURK */
3810
3811 assert(kr == KERN_SUCCESS);
3812 if (object != old_object &&
3813 object != VM_OBJECT_NULL &&
3814 object != (vm_object_t) -1) {
3815 vm_object_reference(object);
3816 }
3817 if (object != old_object &&
3818 old_object != VM_OBJECT_NULL &&
3819 old_object != (vm_object_t) -1) {
3820 vm_object_deallocate(old_object);
3821 }
3822 }
3823
3824 BailOut:
3825 assert(map_locked == TRUE);
3826
3827 if (result == KERN_SUCCESS) {
3828 vm_prot_t pager_prot;
3829 memory_object_t pager;
3830
3831 #if DEBUG
3832 if (pmap_empty &&
3833 !(vmk_flags.vmkf_no_pmap_check)) {
3834 assert(pmap_is_empty(map->pmap,
3835 *address,
3836 *address + size));
3837 }
3838 #endif /* DEBUG */
3839
3840 /*
3841 * For "named" VM objects, let the pager know that the
3842 * memory object is being mapped. Some pagers need to keep
3843 * track of this, to know when they can reclaim the memory
3844 * object, for example.
3845 * VM calls memory_object_map() for each mapping (specifying
3846 * the protection of each mapping) and calls
3847 * memory_object_last_unmap() when all the mappings are gone.
3848 */
3849 pager_prot = max_protection;
3850 if (needs_copy) {
3851 /*
3852 * Copy-On-Write mapping: won't modify
3853 * the memory object.
3854 */
3855 pager_prot &= ~VM_PROT_WRITE;
3856 }
3857 if (!is_submap &&
3858 object != VM_OBJECT_NULL &&
3859 object->named &&
3860 object->pager != MEMORY_OBJECT_NULL) {
3861 vm_object_lock(object);
3862 pager = object->pager;
3863 if (object->named &&
3864 pager != MEMORY_OBJECT_NULL) {
3865 assert(object->pager_ready);
3866 vm_object_mapping_wait(object, THREAD_UNINT);
3867 vm_object_mapping_begin(object);
3868 vm_object_unlock(object);
3869
3870 kr = memory_object_map(pager, pager_prot);
3871 assert(kr == KERN_SUCCESS);
3872
3873 vm_object_lock(object);
3874 vm_object_mapping_end(object);
3875 }
3876 vm_object_unlock(object);
3877 }
3878 if (!is_submap &&
3879 fourk_object != VM_OBJECT_NULL &&
3880 fourk_object->named &&
3881 fourk_object->pager != MEMORY_OBJECT_NULL) {
3882 vm_object_lock(fourk_object);
3883 pager = fourk_object->pager;
3884 if (fourk_object->named &&
3885 pager != MEMORY_OBJECT_NULL) {
3886 assert(fourk_object->pager_ready);
3887 vm_object_mapping_wait(fourk_object,
3888 THREAD_UNINT);
3889 vm_object_mapping_begin(fourk_object);
3890 vm_object_unlock(fourk_object);
3891
3892 kr = memory_object_map(pager, VM_PROT_READ);
3893 assert(kr == KERN_SUCCESS);
3894
3895 vm_object_lock(fourk_object);
3896 vm_object_mapping_end(fourk_object);
3897 }
3898 vm_object_unlock(fourk_object);
3899 }
3900 }
3901
3902 if (fourk_object != VM_OBJECT_NULL) {
3903 vm_object_deallocate(fourk_object);
3904 fourk_object = VM_OBJECT_NULL;
3905 memory_object_deallocate(fourk_mem_obj);
3906 fourk_mem_obj = MEMORY_OBJECT_NULL;
3907 }
3908
3909 assert(map_locked == TRUE);
3910
3911 if (!keep_map_locked) {
3912 vm_map_unlock(map);
3913 map_locked = FALSE;
3914 }
3915
3916 /*
3917 * We can't hold the map lock if we enter this block.
3918 */
3919
3920 if (result == KERN_SUCCESS) {
3921 /* Wire down the new entry if the user
3922 * requested all new map entries be wired.
3923 */
3924 if ((map->wiring_required) || (superpage_size)) {
3925 assert(!keep_map_locked);
3926 pmap_empty = FALSE; /* pmap won't be empty */
3927 kr = vm_map_wire_kernel(map, start, end,
3928 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3929 TRUE);
3930 result = kr;
3931 }
3932
3933 }
3934
3935 if (result != KERN_SUCCESS) {
3936 if (new_mapping_established) {
3937 /*
3938 * We have to get rid of the new mappings since we
3939 * won't make them available to the user.
3940 * Try and do that atomically, to minimize the risk
3941 * that someone else create new mappings that range.
3942 */
3943 zap_new_map = vm_map_create(PMAP_NULL,
3944 *address,
3945 *address + size,
3946 map->hdr.entries_pageable);
3947 vm_map_set_page_shift(zap_new_map,
3948 VM_MAP_PAGE_SHIFT(map));
3949 vm_map_disable_hole_optimization(zap_new_map);
3950
3951 if (!map_locked) {
3952 vm_map_lock(map);
3953 map_locked = TRUE;
3954 }
3955 (void) vm_map_delete(map, *address, *address + size,
3956 (VM_MAP_REMOVE_SAVE_ENTRIES |
3957 VM_MAP_REMOVE_NO_MAP_ALIGN),
3958 zap_new_map);
3959 }
3960 if (zap_old_map != VM_MAP_NULL &&
3961 zap_old_map->hdr.nentries != 0) {
3962 vm_map_entry_t entry1, entry2;
3963
3964 /*
3965 * The new mapping failed. Attempt to restore
3966 * the old mappings, saved in the "zap_old_map".
3967 */
3968 if (!map_locked) {
3969 vm_map_lock(map);
3970 map_locked = TRUE;
3971 }
3972
3973 /* first check if the coast is still clear */
3974 start = vm_map_first_entry(zap_old_map)->vme_start;
3975 end = vm_map_last_entry(zap_old_map)->vme_end;
3976 if (vm_map_lookup_entry(map, start, &entry1) ||
3977 vm_map_lookup_entry(map, end, &entry2) ||
3978 entry1 != entry2) {
3979 /*
3980 * Part of that range has already been
3981 * re-mapped: we can't restore the old
3982 * mappings...
3983 */
3984 vm_map_enter_restore_failures++;
3985 } else {
3986 /*
3987 * Transfer the saved map entries from
3988 * "zap_old_map" to the original "map",
3989 * inserting them all after "entry1".
3990 */
3991 for (entry2 = vm_map_first_entry(zap_old_map);
3992 entry2 != vm_map_to_entry(zap_old_map);
3993 entry2 = vm_map_first_entry(zap_old_map)) {
3994 vm_map_size_t entry_size;
3995
3996 entry_size = (entry2->vme_end -
3997 entry2->vme_start);
3998 vm_map_store_entry_unlink(zap_old_map,
3999 entry2);
4000 zap_old_map->size -= entry_size;
4001 vm_map_store_entry_link(map, entry1, entry2,
4002 VM_MAP_KERNEL_FLAGS_NONE);
4003 map->size += entry_size;
4004 entry1 = entry2;
4005 }
4006 if (map->wiring_required) {
4007 /*
4008 * XXX TODO: we should rewire the
4009 * old pages here...
4010 */
4011 }
4012 vm_map_enter_restore_successes++;
4013 }
4014 }
4015 }
4016
4017 /*
4018 * The caller is responsible for releasing the lock if it requested to
4019 * keep the map locked.
4020 */
4021 if (map_locked && !keep_map_locked) {
4022 vm_map_unlock(map);
4023 }
4024
4025 /*
4026 * Get rid of the "zap_maps" and all the map entries that
4027 * they may still contain.
4028 */
4029 if (zap_old_map != VM_MAP_NULL) {
4030 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
4031 zap_old_map = VM_MAP_NULL;
4032 }
4033 if (zap_new_map != VM_MAP_NULL) {
4034 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
4035 zap_new_map = VM_MAP_NULL;
4036 }
4037
4038 return result;
4039
4040 #undef RETURN
4041 }
4042 #endif /* __arm64__ */
4043
4044 /*
4045 * Counters for the prefault optimization.
4046 */
4047 int64_t vm_prefault_nb_pages = 0;
4048 int64_t vm_prefault_nb_bailout = 0;
4049
4050 static kern_return_t
vm_map_enter_mem_object_helper(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,ipc_port_t port,vm_object_offset_t offset,boolean_t copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance,upl_page_list_ptr_t page_list,unsigned int page_list_count)4051 vm_map_enter_mem_object_helper(
4052 vm_map_t target_map,
4053 vm_map_offset_t *address,
4054 vm_map_size_t initial_size,
4055 vm_map_offset_t mask,
4056 int flags,
4057 vm_map_kernel_flags_t vmk_flags,
4058 vm_tag_t tag,
4059 ipc_port_t port,
4060 vm_object_offset_t offset,
4061 boolean_t copy,
4062 vm_prot_t cur_protection,
4063 vm_prot_t max_protection,
4064 vm_inherit_t inheritance,
4065 upl_page_list_ptr_t page_list,
4066 unsigned int page_list_count)
4067 {
4068 vm_map_address_t map_addr;
4069 vm_map_size_t map_size;
4070 vm_object_t object;
4071 vm_object_size_t size;
4072 kern_return_t result;
4073 boolean_t mask_cur_protection, mask_max_protection;
4074 boolean_t kernel_prefault, try_prefault = (page_list_count != 0);
4075 vm_map_offset_t offset_in_mapping = 0;
4076 #if __arm64__
4077 boolean_t fourk = vmk_flags.vmkf_fourk;
4078 #endif /* __arm64__ */
4079
4080 if (VM_MAP_PAGE_SHIFT(target_map) < PAGE_SHIFT) {
4081 /* XXX TODO4K prefaulting depends on page size... */
4082 try_prefault = FALSE;
4083 }
4084
4085 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
4086
4087 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
4088 mask_max_protection = max_protection & VM_PROT_IS_MASK;
4089 cur_protection &= ~VM_PROT_IS_MASK;
4090 max_protection &= ~VM_PROT_IS_MASK;
4091
4092 /*
4093 * Check arguments for validity
4094 */
4095 if ((target_map == VM_MAP_NULL) ||
4096 (cur_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4097 (max_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4098 (inheritance > VM_INHERIT_LAST_VALID) ||
4099 (try_prefault && (copy || !page_list)) ||
4100 initial_size == 0) {
4101 return KERN_INVALID_ARGUMENT;
4102 }
4103
4104 #if __arm64__
4105 if (fourk && VM_MAP_PAGE_SHIFT(target_map) < PAGE_SHIFT) {
4106 /* no "fourk" if map is using a sub-page page size */
4107 fourk = FALSE;
4108 }
4109 if (fourk) {
4110 map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
4111 map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
4112 } else
4113 #endif /* __arm64__ */
4114 {
4115 map_addr = vm_map_trunc_page(*address,
4116 VM_MAP_PAGE_MASK(target_map));
4117 map_size = vm_map_round_page(initial_size,
4118 VM_MAP_PAGE_MASK(target_map));
4119 }
4120 size = vm_object_round_page(initial_size);
4121
4122 /*
4123 * Find the vm object (if any) corresponding to this port.
4124 */
4125 if (!IP_VALID(port)) {
4126 object = VM_OBJECT_NULL;
4127 offset = 0;
4128 copy = FALSE;
4129 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
4130 vm_named_entry_t named_entry;
4131 vm_object_offset_t data_offset;
4132
4133 named_entry = mach_memory_entry_from_port(port);
4134
4135 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4136 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4137 data_offset = named_entry->data_offset;
4138 offset += named_entry->data_offset;
4139 } else {
4140 data_offset = 0;
4141 }
4142
4143 /* a few checks to make sure user is obeying rules */
4144 if (size == 0) {
4145 if (offset >= named_entry->size) {
4146 return KERN_INVALID_RIGHT;
4147 }
4148 size = named_entry->size - offset;
4149 }
4150 if (mask_max_protection) {
4151 max_protection &= named_entry->protection;
4152 }
4153 if (mask_cur_protection) {
4154 cur_protection &= named_entry->protection;
4155 }
4156 if ((named_entry->protection & max_protection) !=
4157 max_protection) {
4158 return KERN_INVALID_RIGHT;
4159 }
4160 if ((named_entry->protection & cur_protection) !=
4161 cur_protection) {
4162 return KERN_INVALID_RIGHT;
4163 }
4164 if (offset + size < offset) {
4165 /* overflow */
4166 return KERN_INVALID_ARGUMENT;
4167 }
4168 if (named_entry->size < (offset + initial_size)) {
4169 return KERN_INVALID_ARGUMENT;
4170 }
4171
4172 if (named_entry->is_copy) {
4173 /* for a vm_map_copy, we can only map it whole */
4174 if ((size != named_entry->size) &&
4175 (vm_map_round_page(size,
4176 VM_MAP_PAGE_MASK(target_map)) ==
4177 named_entry->size)) {
4178 /* XXX FBDP use the rounded size... */
4179 size = vm_map_round_page(
4180 size,
4181 VM_MAP_PAGE_MASK(target_map));
4182 }
4183 }
4184
4185 /* the callers parameter offset is defined to be the */
4186 /* offset from beginning of named entry offset in object */
4187 offset = offset + named_entry->offset;
4188
4189 if (!VM_MAP_PAGE_ALIGNED(size,
4190 VM_MAP_PAGE_MASK(target_map))) {
4191 /*
4192 * Let's not map more than requested;
4193 * vm_map_enter() will handle this "not map-aligned"
4194 * case.
4195 */
4196 map_size = size;
4197 }
4198
4199 named_entry_lock(named_entry);
4200 if (named_entry->is_sub_map) {
4201 vm_map_t submap;
4202
4203 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4204 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4205 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
4206 }
4207
4208 submap = named_entry->backing.map;
4209 vm_map_reference(submap);
4210 named_entry_unlock(named_entry);
4211
4212 vmk_flags.vmkf_submap = TRUE;
4213
4214 result = vm_map_enter(target_map,
4215 &map_addr,
4216 map_size,
4217 mask,
4218 flags,
4219 vmk_flags,
4220 tag,
4221 (vm_object_t)(uintptr_t) submap,
4222 offset,
4223 copy,
4224 cur_protection,
4225 max_protection,
4226 inheritance);
4227 if (result != KERN_SUCCESS) {
4228 vm_map_deallocate(submap);
4229 } else {
4230 /*
4231 * No need to lock "submap" just to check its
4232 * "mapped" flag: that flag is never reset
4233 * once it's been set and if we race, we'll
4234 * just end up setting it twice, which is OK.
4235 */
4236 if (submap->mapped_in_other_pmaps == FALSE &&
4237 vm_map_pmap(submap) != PMAP_NULL &&
4238 vm_map_pmap(submap) !=
4239 vm_map_pmap(target_map)) {
4240 /*
4241 * This submap is being mapped in a map
4242 * that uses a different pmap.
4243 * Set its "mapped_in_other_pmaps" flag
4244 * to indicate that we now need to
4245 * remove mappings from all pmaps rather
4246 * than just the submap's pmap.
4247 */
4248 vm_map_lock(submap);
4249 submap->mapped_in_other_pmaps = TRUE;
4250 vm_map_unlock(submap);
4251 }
4252 *address = map_addr;
4253 }
4254 return result;
4255 } else if (named_entry->is_copy) {
4256 kern_return_t kr;
4257 vm_map_copy_t copy_map;
4258 vm_map_entry_t copy_entry;
4259 vm_map_offset_t copy_addr;
4260 vm_map_copy_t target_copy_map;
4261 vm_map_offset_t overmap_start, overmap_end;
4262 vm_map_offset_t trimmed_start;
4263 vm_map_size_t target_size;
4264
4265 if (flags & ~(VM_FLAGS_FIXED |
4266 VM_FLAGS_ANYWHERE |
4267 VM_FLAGS_OVERWRITE |
4268 VM_FLAGS_RETURN_4K_DATA_ADDR |
4269 VM_FLAGS_RETURN_DATA_ADDR |
4270 VM_FLAGS_ALIAS_MASK)) {
4271 named_entry_unlock(named_entry);
4272 return KERN_INVALID_ARGUMENT;
4273 }
4274
4275 copy_map = named_entry->backing.copy;
4276 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
4277 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
4278 /* unsupported type; should not happen */
4279 printf("vm_map_enter_mem_object: "
4280 "memory_entry->backing.copy "
4281 "unsupported type 0x%x\n",
4282 copy_map->type);
4283 named_entry_unlock(named_entry);
4284 return KERN_INVALID_ARGUMENT;
4285 }
4286
4287 if (VM_MAP_PAGE_SHIFT(target_map) != copy_map->cpy_hdr.page_shift) {
4288 DEBUG4K_SHARE("copy_map %p offset %llx size 0x%llx pgshift %d -> target_map %p pgshift %d\n", copy_map, offset, (uint64_t)map_size, copy_map->cpy_hdr.page_shift, target_map, VM_MAP_PAGE_SHIFT(target_map));
4289 }
4290
4291 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4292 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4293 offset_in_mapping = offset & VM_MAP_PAGE_MASK(target_map);
4294 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4295 offset_in_mapping &= ~((signed)(0xFFF));
4296 }
4297 }
4298
4299 target_copy_map = VM_MAP_COPY_NULL;
4300 target_size = copy_map->size;
4301 overmap_start = 0;
4302 overmap_end = 0;
4303 trimmed_start = 0;
4304 if (copy_map->cpy_hdr.page_shift != VM_MAP_PAGE_SHIFT(target_map)) {
4305 DEBUG4K_ADJUST("adjusting...\n");
4306 kr = vm_map_copy_adjust_to_target(
4307 copy_map,
4308 offset /* includes data_offset */,
4309 initial_size,
4310 target_map,
4311 copy,
4312 &target_copy_map,
4313 &overmap_start,
4314 &overmap_end,
4315 &trimmed_start);
4316 if (kr != KERN_SUCCESS) {
4317 named_entry_unlock(named_entry);
4318 return kr;
4319 }
4320 target_size = target_copy_map->size;
4321 if (trimmed_start >= data_offset) {
4322 data_offset = offset & VM_MAP_PAGE_MASK(target_map);
4323 } else {
4324 data_offset -= trimmed_start;
4325 }
4326 } else {
4327 target_copy_map = copy_map;
4328 }
4329
4330 /* reserve a contiguous range */
4331 kr = vm_map_enter(target_map,
4332 &map_addr,
4333 vm_map_round_page(target_size, VM_MAP_PAGE_MASK(target_map)),
4334 mask,
4335 flags & (VM_FLAGS_ANYWHERE |
4336 VM_FLAGS_OVERWRITE |
4337 VM_FLAGS_RETURN_4K_DATA_ADDR |
4338 VM_FLAGS_RETURN_DATA_ADDR),
4339 vmk_flags,
4340 tag,
4341 VM_OBJECT_NULL,
4342 0,
4343 FALSE, /* copy */
4344 cur_protection,
4345 max_protection,
4346 inheritance);
4347 if (kr != KERN_SUCCESS) {
4348 DEBUG4K_ERROR("kr 0x%x\n", kr);
4349 if (target_copy_map != copy_map) {
4350 vm_map_copy_discard(target_copy_map);
4351 target_copy_map = VM_MAP_COPY_NULL;
4352 }
4353 named_entry_unlock(named_entry);
4354 return kr;
4355 }
4356
4357 copy_addr = map_addr;
4358
4359 for (copy_entry = vm_map_copy_first_entry(target_copy_map);
4360 copy_entry != vm_map_copy_to_entry(target_copy_map);
4361 copy_entry = copy_entry->vme_next) {
4362 int remap_flags;
4363 vm_map_kernel_flags_t vmk_remap_flags;
4364 vm_map_t copy_submap;
4365 vm_object_t copy_object;
4366 vm_map_size_t copy_size;
4367 vm_object_offset_t copy_offset;
4368 int copy_vm_alias;
4369
4370 remap_flags = 0;
4371 vmk_remap_flags = VM_MAP_KERNEL_FLAGS_NONE;
4372
4373 copy_object = VME_OBJECT(copy_entry);
4374 copy_offset = VME_OFFSET(copy_entry);
4375 copy_size = (copy_entry->vme_end -
4376 copy_entry->vme_start);
4377 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
4378 if (copy_vm_alias == 0) {
4379 /*
4380 * Caller does not want a specific
4381 * alias for this new mapping: use
4382 * the alias of the original mapping.
4383 */
4384 copy_vm_alias = VME_ALIAS(copy_entry);
4385 }
4386
4387 /* sanity check */
4388 if ((copy_addr + copy_size) >
4389 (map_addr +
4390 overmap_start + overmap_end +
4391 named_entry->size /* XXX full size */)) {
4392 /* over-mapping too much !? */
4393 kr = KERN_INVALID_ARGUMENT;
4394 DEBUG4K_ERROR("kr 0x%x\n", kr);
4395 /* abort */
4396 break;
4397 }
4398
4399 /* take a reference on the object */
4400 if (copy_entry->is_sub_map) {
4401 vmk_remap_flags.vmkf_submap = TRUE;
4402 copy_submap = VME_SUBMAP(copy_entry);
4403 vm_map_lock(copy_submap);
4404 vm_map_reference(copy_submap);
4405 vm_map_unlock(copy_submap);
4406 copy_object = (vm_object_t)(uintptr_t) copy_submap;
4407 } else if (!copy &&
4408 copy_object != VM_OBJECT_NULL &&
4409 (copy_entry->needs_copy ||
4410 copy_object->shadowed ||
4411 (!copy_object->true_share &&
4412 !copy_entry->is_shared &&
4413 copy_object->vo_size > copy_size))) {
4414 /*
4415 * We need to resolve our side of this
4416 * "symmetric" copy-on-write now; we
4417 * need a new object to map and share,
4418 * instead of the current one which
4419 * might still be shared with the
4420 * original mapping.
4421 *
4422 * Note: A "vm_map_copy_t" does not
4423 * have a lock but we're protected by
4424 * the named entry's lock here.
4425 */
4426 // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4427 VME_OBJECT_SHADOW(copy_entry, copy_size);
4428 if (!copy_entry->needs_copy &&
4429 copy_entry->protection & VM_PROT_WRITE) {
4430 vm_prot_t prot;
4431
4432 prot = copy_entry->protection & ~VM_PROT_WRITE;
4433 vm_object_pmap_protect(copy_object,
4434 copy_offset,
4435 copy_size,
4436 PMAP_NULL,
4437 PAGE_SIZE,
4438 0,
4439 prot);
4440 }
4441
4442 copy_entry->needs_copy = FALSE;
4443 copy_entry->is_shared = TRUE;
4444 copy_object = VME_OBJECT(copy_entry);
4445 copy_offset = VME_OFFSET(copy_entry);
4446 vm_object_lock(copy_object);
4447 vm_object_reference_locked(copy_object);
4448 if (copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4449 /* we're about to make a shared mapping of this object */
4450 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4451 copy_object->true_share = TRUE;
4452 }
4453 vm_object_unlock(copy_object);
4454 } else {
4455 /*
4456 * We already have the right object
4457 * to map.
4458 */
4459 copy_object = VME_OBJECT(copy_entry);
4460 vm_object_reference(copy_object);
4461 }
4462
4463 /* over-map the object into destination */
4464 remap_flags |= flags;
4465 remap_flags |= VM_FLAGS_FIXED;
4466 remap_flags |= VM_FLAGS_OVERWRITE;
4467 remap_flags &= ~VM_FLAGS_ANYWHERE;
4468 if (!copy && !copy_entry->is_sub_map) {
4469 /*
4470 * copy-on-write should have been
4471 * resolved at this point, or we would
4472 * end up sharing instead of copying.
4473 */
4474 assert(!copy_entry->needs_copy);
4475 }
4476 #if XNU_TARGET_OS_OSX
4477 if (copy_entry->used_for_jit) {
4478 vmk_remap_flags.vmkf_map_jit = TRUE;
4479 }
4480 #endif /* XNU_TARGET_OS_OSX */
4481
4482 assertf((copy_vm_alias & VME_ALIAS_MASK) == copy_vm_alias,
4483 "VM Tag truncated from 0x%x to 0x%x\n", copy_vm_alias, (copy_vm_alias & VME_ALIAS_MASK));
4484 kr = vm_map_enter(target_map,
4485 ©_addr,
4486 copy_size,
4487 (vm_map_offset_t) 0,
4488 remap_flags,
4489 vmk_remap_flags,
4490 (vm_tag_t) copy_vm_alias, /* see comment at end of vm_fault_unwire re. cast*/
4491 copy_object,
4492 copy_offset,
4493 ((copy_object == NULL)
4494 ? FALSE
4495 : (copy || copy_entry->needs_copy)),
4496 cur_protection,
4497 max_protection,
4498 inheritance);
4499 if (kr != KERN_SUCCESS) {
4500 DEBUG4K_SHARE("failed kr 0x%x\n", kr);
4501 if (copy_entry->is_sub_map) {
4502 vm_map_deallocate(copy_submap);
4503 } else {
4504 vm_object_deallocate(copy_object);
4505 }
4506 /* abort */
4507 break;
4508 }
4509
4510 /* next mapping */
4511 copy_addr += copy_size;
4512 }
4513
4514 if (kr == KERN_SUCCESS) {
4515 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4516 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4517 *address = map_addr + offset_in_mapping;
4518 } else {
4519 *address = map_addr;
4520 }
4521 if (overmap_start) {
4522 *address += overmap_start;
4523 DEBUG4K_SHARE("map %p map_addr 0x%llx offset_in_mapping 0x%llx overmap_start 0x%llx -> *address 0x%llx\n", target_map, (uint64_t)map_addr, (uint64_t) offset_in_mapping, (uint64_t)overmap_start, (uint64_t)*address);
4524 }
4525 }
4526 named_entry_unlock(named_entry);
4527 if (target_copy_map != copy_map) {
4528 vm_map_copy_discard(target_copy_map);
4529 target_copy_map = VM_MAP_COPY_NULL;
4530 }
4531
4532 if (kr != KERN_SUCCESS) {
4533 if (!(flags & VM_FLAGS_OVERWRITE)) {
4534 /* deallocate the contiguous range */
4535 (void) vm_deallocate(target_map,
4536 map_addr,
4537 map_size);
4538 }
4539 }
4540
4541 return kr;
4542 }
4543
4544 if (named_entry->is_object) {
4545 unsigned int access;
4546 vm_prot_t protections;
4547 unsigned int wimg_mode;
4548
4549 /* we are mapping a VM object */
4550
4551 protections = named_entry->protection & (VM_PROT_ALL | VM_PROT_ALLEXEC);
4552 access = GET_MAP_MEM(named_entry->protection);
4553
4554 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4555 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4556 offset_in_mapping = offset - VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(target_map));
4557 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4558 offset_in_mapping &= ~((signed)(0xFFF));
4559 }
4560 offset = VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(target_map));
4561 map_size = VM_MAP_ROUND_PAGE((offset + offset_in_mapping + initial_size) - offset, VM_MAP_PAGE_MASK(target_map));
4562 }
4563
4564 object = vm_named_entry_to_vm_object(named_entry);
4565 assert(object != VM_OBJECT_NULL);
4566 vm_object_lock(object);
4567 named_entry_unlock(named_entry);
4568
4569 vm_object_reference_locked(object);
4570
4571 wimg_mode = object->wimg_bits;
4572 vm_prot_to_wimg(access, &wimg_mode);
4573 if (object->wimg_bits != wimg_mode) {
4574 vm_object_change_wimg_mode(object, wimg_mode);
4575 }
4576
4577 vm_object_unlock(object);
4578 } else {
4579 panic("invalid VM named entry %p", named_entry);
4580 }
4581 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4582 /*
4583 * JMM - This is temporary until we unify named entries
4584 * and raw memory objects.
4585 *
4586 * Detected fake ip_kotype for a memory object. In
4587 * this case, the port isn't really a port at all, but
4588 * instead is just a raw memory object.
4589 */
4590 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4591 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4592 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4593 }
4594
4595 object = memory_object_to_vm_object((memory_object_t)port);
4596 if (object == VM_OBJECT_NULL) {
4597 return KERN_INVALID_OBJECT;
4598 }
4599 vm_object_reference(object);
4600
4601 /* wait for object (if any) to be ready */
4602 if (object != VM_OBJECT_NULL) {
4603 if (object == kernel_object) {
4604 printf("Warning: Attempt to map kernel object"
4605 " by a non-private kernel entity\n");
4606 return KERN_INVALID_OBJECT;
4607 }
4608 if (!object->pager_ready) {
4609 vm_object_lock(object);
4610
4611 while (!object->pager_ready) {
4612 vm_object_wait(object,
4613 VM_OBJECT_EVENT_PAGER_READY,
4614 THREAD_UNINT);
4615 vm_object_lock(object);
4616 }
4617 vm_object_unlock(object);
4618 }
4619 }
4620 } else {
4621 return KERN_INVALID_OBJECT;
4622 }
4623
4624 if (object != VM_OBJECT_NULL &&
4625 object->named &&
4626 object->pager != MEMORY_OBJECT_NULL &&
4627 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4628 memory_object_t pager;
4629 vm_prot_t pager_prot;
4630 kern_return_t kr;
4631
4632 /*
4633 * For "named" VM objects, let the pager know that the
4634 * memory object is being mapped. Some pagers need to keep
4635 * track of this, to know when they can reclaim the memory
4636 * object, for example.
4637 * VM calls memory_object_map() for each mapping (specifying
4638 * the protection of each mapping) and calls
4639 * memory_object_last_unmap() when all the mappings are gone.
4640 */
4641 pager_prot = max_protection;
4642 if (copy) {
4643 /*
4644 * Copy-On-Write mapping: won't modify the
4645 * memory object.
4646 */
4647 pager_prot &= ~VM_PROT_WRITE;
4648 }
4649 vm_object_lock(object);
4650 pager = object->pager;
4651 if (object->named &&
4652 pager != MEMORY_OBJECT_NULL &&
4653 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4654 assert(object->pager_ready);
4655 vm_object_mapping_wait(object, THREAD_UNINT);
4656 vm_object_mapping_begin(object);
4657 vm_object_unlock(object);
4658
4659 kr = memory_object_map(pager, pager_prot);
4660 assert(kr == KERN_SUCCESS);
4661
4662 vm_object_lock(object);
4663 vm_object_mapping_end(object);
4664 }
4665 vm_object_unlock(object);
4666 }
4667
4668 /*
4669 * Perform the copy if requested
4670 */
4671
4672 if (copy) {
4673 vm_object_t new_object;
4674 vm_object_offset_t new_offset;
4675
4676 result = vm_object_copy_strategically(object, offset,
4677 map_size,
4678 &new_object, &new_offset,
4679 ©);
4680
4681
4682 if (result == KERN_MEMORY_RESTART_COPY) {
4683 boolean_t success;
4684 boolean_t src_needs_copy;
4685
4686 /*
4687 * XXX
4688 * We currently ignore src_needs_copy.
4689 * This really is the issue of how to make
4690 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4691 * non-kernel users to use. Solution forthcoming.
4692 * In the meantime, since we don't allow non-kernel
4693 * memory managers to specify symmetric copy,
4694 * we won't run into problems here.
4695 */
4696 new_object = object;
4697 new_offset = offset;
4698 success = vm_object_copy_quickly(&new_object,
4699 new_offset,
4700 map_size,
4701 &src_needs_copy,
4702 ©);
4703 assert(success);
4704 result = KERN_SUCCESS;
4705 }
4706 /*
4707 * Throw away the reference to the
4708 * original object, as it won't be mapped.
4709 */
4710
4711 vm_object_deallocate(object);
4712
4713 if (result != KERN_SUCCESS) {
4714 return result;
4715 }
4716
4717 object = new_object;
4718 offset = new_offset;
4719 }
4720
4721 /*
4722 * If non-kernel users want to try to prefault pages, the mapping and prefault
4723 * needs to be atomic.
4724 */
4725 kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4726 vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4727
4728 #if __arm64__
4729 if (fourk) {
4730 /* map this object in a "4K" pager */
4731 result = vm_map_enter_fourk(target_map,
4732 &map_addr,
4733 map_size,
4734 (vm_map_offset_t) mask,
4735 flags,
4736 vmk_flags,
4737 tag,
4738 object,
4739 offset,
4740 copy,
4741 cur_protection,
4742 max_protection,
4743 inheritance);
4744 } else
4745 #endif /* __arm64__ */
4746 {
4747 result = vm_map_enter(target_map,
4748 &map_addr, map_size,
4749 (vm_map_offset_t)mask,
4750 flags,
4751 vmk_flags,
4752 tag,
4753 object, offset,
4754 copy,
4755 cur_protection, max_protection,
4756 inheritance);
4757 }
4758 if (result != KERN_SUCCESS) {
4759 vm_object_deallocate(object);
4760 }
4761
4762 /*
4763 * Try to prefault, and do not forget to release the vm map lock.
4764 */
4765 if (result == KERN_SUCCESS && try_prefault) {
4766 mach_vm_address_t va = map_addr;
4767 kern_return_t kr = KERN_SUCCESS;
4768 unsigned int i = 0;
4769 int pmap_options;
4770
4771 pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
4772 if (object->internal) {
4773 pmap_options |= PMAP_OPTIONS_INTERNAL;
4774 }
4775
4776 for (i = 0; i < page_list_count; ++i) {
4777 if (!UPL_VALID_PAGE(page_list, i)) {
4778 if (kernel_prefault) {
4779 assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4780 result = KERN_MEMORY_ERROR;
4781 break;
4782 }
4783 } else {
4784 /*
4785 * If this function call failed, we should stop
4786 * trying to optimize, other calls are likely
4787 * going to fail too.
4788 *
4789 * We are not gonna report an error for such
4790 * failure though. That's an optimization, not
4791 * something critical.
4792 */
4793 kr = pmap_enter_options(target_map->pmap,
4794 va, UPL_PHYS_PAGE(page_list, i),
4795 cur_protection, VM_PROT_NONE,
4796 0, TRUE, pmap_options, NULL);
4797 if (kr != KERN_SUCCESS) {
4798 OSIncrementAtomic64(&vm_prefault_nb_bailout);
4799 if (kernel_prefault) {
4800 result = kr;
4801 }
4802 break;
4803 }
4804 OSIncrementAtomic64(&vm_prefault_nb_pages);
4805 }
4806
4807 /* Next virtual address */
4808 va += PAGE_SIZE;
4809 }
4810 if (vmk_flags.vmkf_keep_map_locked) {
4811 vm_map_unlock(target_map);
4812 }
4813 }
4814
4815 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4816 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4817 *address = map_addr + offset_in_mapping;
4818 } else {
4819 *address = map_addr;
4820 }
4821 return result;
4822 }
4823
4824 kern_return_t
vm_map_enter_mem_object(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,ipc_port_t port,vm_object_offset_t offset,boolean_t copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)4825 vm_map_enter_mem_object(
4826 vm_map_t target_map,
4827 vm_map_offset_t *address,
4828 vm_map_size_t initial_size,
4829 vm_map_offset_t mask,
4830 int flags,
4831 vm_map_kernel_flags_t vmk_flags,
4832 vm_tag_t tag,
4833 ipc_port_t port,
4834 vm_object_offset_t offset,
4835 boolean_t copy,
4836 vm_prot_t cur_protection,
4837 vm_prot_t max_protection,
4838 vm_inherit_t inheritance)
4839 {
4840 kern_return_t ret;
4841
4842 ret = vm_map_enter_mem_object_helper(target_map,
4843 address,
4844 initial_size,
4845 mask,
4846 flags,
4847 vmk_flags,
4848 tag,
4849 port,
4850 offset,
4851 copy,
4852 cur_protection,
4853 max_protection,
4854 inheritance,
4855 NULL,
4856 0);
4857
4858 #if KASAN
4859 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4860 kasan_notify_address(*address, initial_size);
4861 }
4862 #endif
4863
4864 return ret;
4865 }
4866
4867 kern_return_t
vm_map_enter_mem_object_prefault(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,ipc_port_t port,vm_object_offset_t offset,vm_prot_t cur_protection,vm_prot_t max_protection,upl_page_list_ptr_t page_list,unsigned int page_list_count)4868 vm_map_enter_mem_object_prefault(
4869 vm_map_t target_map,
4870 vm_map_offset_t *address,
4871 vm_map_size_t initial_size,
4872 vm_map_offset_t mask,
4873 int flags,
4874 vm_map_kernel_flags_t vmk_flags,
4875 vm_tag_t tag,
4876 ipc_port_t port,
4877 vm_object_offset_t offset,
4878 vm_prot_t cur_protection,
4879 vm_prot_t max_protection,
4880 upl_page_list_ptr_t page_list,
4881 unsigned int page_list_count)
4882 {
4883 kern_return_t ret;
4884
4885 ret = vm_map_enter_mem_object_helper(target_map,
4886 address,
4887 initial_size,
4888 mask,
4889 flags,
4890 vmk_flags,
4891 tag,
4892 port,
4893 offset,
4894 FALSE,
4895 cur_protection,
4896 max_protection,
4897 VM_INHERIT_DEFAULT,
4898 page_list,
4899 page_list_count);
4900
4901 #if KASAN
4902 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4903 kasan_notify_address(*address, initial_size);
4904 }
4905 #endif
4906
4907 return ret;
4908 }
4909
4910
4911 kern_return_t
vm_map_enter_mem_object_control(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,memory_object_control_t control,vm_object_offset_t offset,boolean_t copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)4912 vm_map_enter_mem_object_control(
4913 vm_map_t target_map,
4914 vm_map_offset_t *address,
4915 vm_map_size_t initial_size,
4916 vm_map_offset_t mask,
4917 int flags,
4918 vm_map_kernel_flags_t vmk_flags,
4919 vm_tag_t tag,
4920 memory_object_control_t control,
4921 vm_object_offset_t offset,
4922 boolean_t copy,
4923 vm_prot_t cur_protection,
4924 vm_prot_t max_protection,
4925 vm_inherit_t inheritance)
4926 {
4927 vm_map_address_t map_addr;
4928 vm_map_size_t map_size;
4929 vm_object_t object;
4930 vm_object_size_t size;
4931 kern_return_t result;
4932 memory_object_t pager;
4933 vm_prot_t pager_prot;
4934 kern_return_t kr;
4935 #if __arm64__
4936 boolean_t fourk = vmk_flags.vmkf_fourk;
4937 #endif /* __arm64__ */
4938
4939 /*
4940 * Check arguments for validity
4941 */
4942 if ((target_map == VM_MAP_NULL) ||
4943 (cur_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4944 (max_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4945 (inheritance > VM_INHERIT_LAST_VALID) ||
4946 initial_size == 0) {
4947 return KERN_INVALID_ARGUMENT;
4948 }
4949
4950 #if __arm64__
4951 if (fourk && VM_MAP_PAGE_MASK(target_map) < PAGE_MASK) {
4952 fourk = FALSE;
4953 }
4954
4955 if (fourk) {
4956 map_addr = vm_map_trunc_page(*address,
4957 FOURK_PAGE_MASK);
4958 map_size = vm_map_round_page(initial_size,
4959 FOURK_PAGE_MASK);
4960 } else
4961 #endif /* __arm64__ */
4962 {
4963 map_addr = vm_map_trunc_page(*address,
4964 VM_MAP_PAGE_MASK(target_map));
4965 map_size = vm_map_round_page(initial_size,
4966 VM_MAP_PAGE_MASK(target_map));
4967 }
4968 size = vm_object_round_page(initial_size);
4969
4970 object = memory_object_control_to_vm_object(control);
4971
4972 if (object == VM_OBJECT_NULL) {
4973 return KERN_INVALID_OBJECT;
4974 }
4975
4976 if (object == kernel_object) {
4977 printf("Warning: Attempt to map kernel object"
4978 " by a non-private kernel entity\n");
4979 return KERN_INVALID_OBJECT;
4980 }
4981
4982 vm_object_lock(object);
4983 object->ref_count++;
4984
4985 /*
4986 * For "named" VM objects, let the pager know that the
4987 * memory object is being mapped. Some pagers need to keep
4988 * track of this, to know when they can reclaim the memory
4989 * object, for example.
4990 * VM calls memory_object_map() for each mapping (specifying
4991 * the protection of each mapping) and calls
4992 * memory_object_last_unmap() when all the mappings are gone.
4993 */
4994 pager_prot = max_protection;
4995 if (copy) {
4996 pager_prot &= ~VM_PROT_WRITE;
4997 }
4998 pager = object->pager;
4999 if (object->named &&
5000 pager != MEMORY_OBJECT_NULL &&
5001 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
5002 assert(object->pager_ready);
5003 vm_object_mapping_wait(object, THREAD_UNINT);
5004 vm_object_mapping_begin(object);
5005 vm_object_unlock(object);
5006
5007 kr = memory_object_map(pager, pager_prot);
5008 assert(kr == KERN_SUCCESS);
5009
5010 vm_object_lock(object);
5011 vm_object_mapping_end(object);
5012 }
5013 vm_object_unlock(object);
5014
5015 /*
5016 * Perform the copy if requested
5017 */
5018
5019 if (copy) {
5020 vm_object_t new_object;
5021 vm_object_offset_t new_offset;
5022
5023 result = vm_object_copy_strategically(object, offset, size,
5024 &new_object, &new_offset,
5025 ©);
5026
5027
5028 if (result == KERN_MEMORY_RESTART_COPY) {
5029 boolean_t success;
5030 boolean_t src_needs_copy;
5031
5032 /*
5033 * XXX
5034 * We currently ignore src_needs_copy.
5035 * This really is the issue of how to make
5036 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
5037 * non-kernel users to use. Solution forthcoming.
5038 * In the meantime, since we don't allow non-kernel
5039 * memory managers to specify symmetric copy,
5040 * we won't run into problems here.
5041 */
5042 new_object = object;
5043 new_offset = offset;
5044 success = vm_object_copy_quickly(&new_object,
5045 new_offset, size,
5046 &src_needs_copy,
5047 ©);
5048 assert(success);
5049 result = KERN_SUCCESS;
5050 }
5051 /*
5052 * Throw away the reference to the
5053 * original object, as it won't be mapped.
5054 */
5055
5056 vm_object_deallocate(object);
5057
5058 if (result != KERN_SUCCESS) {
5059 return result;
5060 }
5061
5062 object = new_object;
5063 offset = new_offset;
5064 }
5065
5066 #if __arm64__
5067 if (fourk) {
5068 result = vm_map_enter_fourk(target_map,
5069 &map_addr,
5070 map_size,
5071 (vm_map_offset_t)mask,
5072 flags,
5073 vmk_flags,
5074 tag,
5075 object, offset,
5076 copy,
5077 cur_protection, max_protection,
5078 inheritance);
5079 } else
5080 #endif /* __arm64__ */
5081 {
5082 result = vm_map_enter(target_map,
5083 &map_addr, map_size,
5084 (vm_map_offset_t)mask,
5085 flags,
5086 vmk_flags,
5087 tag,
5088 object, offset,
5089 copy,
5090 cur_protection, max_protection,
5091 inheritance);
5092 }
5093 if (result != KERN_SUCCESS) {
5094 vm_object_deallocate(object);
5095 }
5096 *address = map_addr;
5097
5098 return result;
5099 }
5100
5101
5102 #if VM_CPM
5103
5104 #ifdef MACH_ASSERT
5105 extern pmap_paddr_t avail_start, avail_end;
5106 #endif
5107
5108 /*
5109 * Allocate memory in the specified map, with the caveat that
5110 * the memory is physically contiguous. This call may fail
5111 * if the system can't find sufficient contiguous memory.
5112 * This call may cause or lead to heart-stopping amounts of
5113 * paging activity.
5114 *
5115 * Memory obtained from this call should be freed in the
5116 * normal way, viz., via vm_deallocate.
5117 */
5118 kern_return_t
vm_map_enter_cpm(vm_map_t map,vm_map_offset_t * addr,vm_map_size_t size,int flags)5119 vm_map_enter_cpm(
5120 vm_map_t map,
5121 vm_map_offset_t *addr,
5122 vm_map_size_t size,
5123 int flags)
5124 {
5125 vm_object_t cpm_obj;
5126 pmap_t pmap;
5127 vm_page_t m, pages;
5128 kern_return_t kr;
5129 vm_map_offset_t va, start, end, offset;
5130 #if MACH_ASSERT
5131 vm_map_offset_t prev_addr = 0;
5132 #endif /* MACH_ASSERT */
5133
5134 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
5135 vm_tag_t tag;
5136
5137 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
5138 /* XXX TODO4K do we need to support this? */
5139 *addr = 0;
5140 return KERN_NOT_SUPPORTED;
5141 }
5142
5143 VM_GET_FLAGS_ALIAS(flags, tag);
5144
5145 if (size == 0) {
5146 *addr = 0;
5147 return KERN_SUCCESS;
5148 }
5149 if (anywhere) {
5150 *addr = vm_map_min(map);
5151 } else {
5152 *addr = vm_map_trunc_page(*addr,
5153 VM_MAP_PAGE_MASK(map));
5154 }
5155 size = vm_map_round_page(size,
5156 VM_MAP_PAGE_MASK(map));
5157
5158 /*
5159 * LP64todo - cpm_allocate should probably allow
5160 * allocations of >4GB, but not with the current
5161 * algorithm, so just cast down the size for now.
5162 */
5163 if (size > VM_MAX_ADDRESS) {
5164 return KERN_RESOURCE_SHORTAGE;
5165 }
5166 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
5167 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS) {
5168 return kr;
5169 }
5170
5171 cpm_obj = vm_object_allocate((vm_object_size_t)size);
5172 assert(cpm_obj != VM_OBJECT_NULL);
5173 assert(cpm_obj->internal);
5174 assert(cpm_obj->vo_size == (vm_object_size_t)size);
5175 assert(cpm_obj->can_persist == FALSE);
5176 assert(cpm_obj->pager_created == FALSE);
5177 assert(cpm_obj->pageout == FALSE);
5178 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5179
5180 /*
5181 * Insert pages into object.
5182 */
5183
5184 vm_object_lock(cpm_obj);
5185 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5186 m = pages;
5187 pages = NEXT_PAGE(m);
5188 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
5189
5190 assert(!m->vmp_gobbled);
5191 assert(!m->vmp_wanted);
5192 assert(!m->vmp_pageout);
5193 assert(!m->vmp_tabled);
5194 assert(VM_PAGE_WIRED(m));
5195 assert(m->vmp_busy);
5196 assert(VM_PAGE_GET_PHYS_PAGE(m) >= (avail_start >> PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m) <= (avail_end >> PAGE_SHIFT));
5197
5198 m->vmp_busy = FALSE;
5199 vm_page_insert(m, cpm_obj, offset);
5200 }
5201 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
5202 vm_object_unlock(cpm_obj);
5203
5204 /*
5205 * Hang onto a reference on the object in case a
5206 * multi-threaded application for some reason decides
5207 * to deallocate the portion of the address space into
5208 * which we will insert this object.
5209 *
5210 * Unfortunately, we must insert the object now before
5211 * we can talk to the pmap module about which addresses
5212 * must be wired down. Hence, the race with a multi-
5213 * threaded app.
5214 */
5215 vm_object_reference(cpm_obj);
5216
5217 /*
5218 * Insert object into map.
5219 */
5220
5221 kr = vm_map_enter(
5222 map,
5223 addr,
5224 size,
5225 (vm_map_offset_t)0,
5226 flags,
5227 VM_MAP_KERNEL_FLAGS_NONE,
5228 cpm_obj,
5229 (vm_object_offset_t)0,
5230 FALSE,
5231 VM_PROT_ALL,
5232 VM_PROT_ALL,
5233 VM_INHERIT_DEFAULT);
5234
5235 if (kr != KERN_SUCCESS) {
5236 /*
5237 * A CPM object doesn't have can_persist set,
5238 * so all we have to do is deallocate it to
5239 * free up these pages.
5240 */
5241 assert(cpm_obj->pager_created == FALSE);
5242 assert(cpm_obj->can_persist == FALSE);
5243 assert(cpm_obj->pageout == FALSE);
5244 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5245 vm_object_deallocate(cpm_obj); /* kill acquired ref */
5246 vm_object_deallocate(cpm_obj); /* kill creation ref */
5247 }
5248
5249 /*
5250 * Inform the physical mapping system that the
5251 * range of addresses may not fault, so that
5252 * page tables and such can be locked down as well.
5253 */
5254 start = *addr;
5255 end = start + size;
5256 pmap = vm_map_pmap(map);
5257 pmap_pageable(pmap, start, end, FALSE);
5258
5259 /*
5260 * Enter each page into the pmap, to avoid faults.
5261 * Note that this loop could be coded more efficiently,
5262 * if the need arose, rather than looking up each page
5263 * again.
5264 */
5265 for (offset = 0, va = start; offset < size;
5266 va += PAGE_SIZE, offset += PAGE_SIZE) {
5267 int type_of_fault;
5268
5269 vm_object_lock(cpm_obj);
5270 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5271 assert(m != VM_PAGE_NULL);
5272
5273 vm_page_zero_fill(m);
5274
5275 type_of_fault = DBG_ZERO_FILL_FAULT;
5276
5277 vm_fault_enter(m, pmap, va,
5278 PAGE_SIZE, 0,
5279 VM_PROT_ALL, VM_PROT_WRITE,
5280 VM_PAGE_WIRED(m),
5281 FALSE, /* change_wiring */
5282 VM_KERN_MEMORY_NONE, /* tag - not wiring */
5283 FALSE, /* no_cache */
5284 FALSE, /* cs_bypass */
5285 0, /* user_tag */
5286 0, /* pmap_options */
5287 NULL, /* need_retry */
5288 &type_of_fault);
5289
5290 vm_object_unlock(cpm_obj);
5291 }
5292
5293 #if MACH_ASSERT
5294 /*
5295 * Verify ordering in address space.
5296 */
5297 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5298 vm_object_lock(cpm_obj);
5299 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5300 vm_object_unlock(cpm_obj);
5301 if (m == VM_PAGE_NULL) {
5302 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
5303 cpm_obj, (uint64_t)offset);
5304 }
5305 assert(m->vmp_tabled);
5306 assert(!m->vmp_busy);
5307 assert(!m->vmp_wanted);
5308 assert(!m->vmp_fictitious);
5309 assert(!m->vmp_private);
5310 assert(!m->vmp_absent);
5311 assert(!m->vmp_error);
5312 assert(!m->vmp_cleaning);
5313 assert(!m->vmp_laundry);
5314 assert(!m->vmp_precious);
5315 assert(!m->vmp_clustered);
5316 if (offset != 0) {
5317 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
5318 printf("start 0x%llx end 0x%llx va 0x%llx\n",
5319 (uint64_t)start, (uint64_t)end, (uint64_t)va);
5320 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
5321 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
5322 panic("vm_allocate_cpm: pages not contig!");
5323 }
5324 }
5325 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
5326 }
5327 #endif /* MACH_ASSERT */
5328
5329 vm_object_deallocate(cpm_obj); /* kill extra ref */
5330
5331 return kr;
5332 }
5333
5334
5335 #else /* VM_CPM */
5336
5337 /*
5338 * Interface is defined in all cases, but unless the kernel
5339 * is built explicitly for this option, the interface does
5340 * nothing.
5341 */
5342
5343 kern_return_t
vm_map_enter_cpm(__unused vm_map_t map,__unused vm_map_offset_t * addr,__unused vm_map_size_t size,__unused int flags)5344 vm_map_enter_cpm(
5345 __unused vm_map_t map,
5346 __unused vm_map_offset_t *addr,
5347 __unused vm_map_size_t size,
5348 __unused int flags)
5349 {
5350 return KERN_FAILURE;
5351 }
5352 #endif /* VM_CPM */
5353
5354 /* Not used without nested pmaps */
5355 #ifndef NO_NESTED_PMAP
5356 /*
5357 * Clip and unnest a portion of a nested submap mapping.
5358 */
5359
5360
5361 static void
vm_map_clip_unnest(vm_map_t map,vm_map_entry_t entry,vm_map_offset_t start_unnest,vm_map_offset_t end_unnest)5362 vm_map_clip_unnest(
5363 vm_map_t map,
5364 vm_map_entry_t entry,
5365 vm_map_offset_t start_unnest,
5366 vm_map_offset_t end_unnest)
5367 {
5368 vm_map_offset_t old_start_unnest = start_unnest;
5369 vm_map_offset_t old_end_unnest = end_unnest;
5370
5371 assert(entry->is_sub_map);
5372 assert(VME_SUBMAP(entry) != NULL);
5373 assert(entry->use_pmap);
5374
5375 /*
5376 * Query the platform for the optimal unnest range.
5377 * DRK: There's some duplication of effort here, since
5378 * callers may have adjusted the range to some extent. This
5379 * routine was introduced to support 1GiB subtree nesting
5380 * for x86 platforms, which can also nest on 2MiB boundaries
5381 * depending on size/alignment.
5382 */
5383 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
5384 assert(VME_SUBMAP(entry)->is_nested_map);
5385 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
5386 log_unnest_badness(map,
5387 old_start_unnest,
5388 old_end_unnest,
5389 VME_SUBMAP(entry)->is_nested_map,
5390 (entry->vme_start +
5391 VME_SUBMAP(entry)->lowest_unnestable_start -
5392 VME_OFFSET(entry)));
5393 }
5394
5395 if (entry->vme_start > start_unnest ||
5396 entry->vme_end < end_unnest) {
5397 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
5398 "bad nested entry: start=0x%llx end=0x%llx\n",
5399 (long long)start_unnest, (long long)end_unnest,
5400 (long long)entry->vme_start, (long long)entry->vme_end);
5401 }
5402
5403 if (start_unnest > entry->vme_start) {
5404 _vm_map_clip_start(&map->hdr,
5405 entry,
5406 start_unnest);
5407 if (map->holelistenabled) {
5408 vm_map_store_update_first_free(map, NULL, FALSE);
5409 } else {
5410 vm_map_store_update_first_free(map, map->first_free, FALSE);
5411 }
5412 }
5413 if (entry->vme_end > end_unnest) {
5414 _vm_map_clip_end(&map->hdr,
5415 entry,
5416 end_unnest);
5417 if (map->holelistenabled) {
5418 vm_map_store_update_first_free(map, NULL, FALSE);
5419 } else {
5420 vm_map_store_update_first_free(map, map->first_free, FALSE);
5421 }
5422 }
5423
5424 pmap_unnest(map->pmap,
5425 entry->vme_start,
5426 entry->vme_end - entry->vme_start);
5427 if ((map->mapped_in_other_pmaps) && os_ref_get_count(&map->map_refcnt) != 0) {
5428 /* clean up parent map/maps */
5429 vm_map_submap_pmap_clean(
5430 map, entry->vme_start,
5431 entry->vme_end,
5432 VME_SUBMAP(entry),
5433 VME_OFFSET(entry));
5434 }
5435 entry->use_pmap = FALSE;
5436 if ((map->pmap != kernel_pmap) &&
5437 (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
5438 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
5439 }
5440 }
5441 #endif /* NO_NESTED_PMAP */
5442
5443 /*
5444 * vm_map_clip_start: [ internal use only ]
5445 *
5446 * Asserts that the given entry begins at or after
5447 * the specified address; if necessary,
5448 * it splits the entry into two.
5449 */
5450 void
vm_map_clip_start(vm_map_t map,vm_map_entry_t entry,vm_map_offset_t startaddr)5451 vm_map_clip_start(
5452 vm_map_t map,
5453 vm_map_entry_t entry,
5454 vm_map_offset_t startaddr)
5455 {
5456 #ifndef NO_NESTED_PMAP
5457 if (entry->is_sub_map &&
5458 entry->use_pmap &&
5459 startaddr >= entry->vme_start) {
5460 vm_map_offset_t start_unnest, end_unnest;
5461
5462 /*
5463 * Make sure "startaddr" is no longer in a nested range
5464 * before we clip. Unnest only the minimum range the platform
5465 * can handle.
5466 * vm_map_clip_unnest may perform additional adjustments to
5467 * the unnest range.
5468 */
5469 start_unnest = startaddr & ~(pmap_shared_region_size_min(map->pmap) - 1);
5470 end_unnest = start_unnest + pmap_shared_region_size_min(map->pmap);
5471 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5472 }
5473 #endif /* NO_NESTED_PMAP */
5474 if (startaddr > entry->vme_start) {
5475 if (VME_OBJECT(entry) &&
5476 !entry->is_sub_map &&
5477 VME_OBJECT(entry)->phys_contiguous) {
5478 pmap_remove(map->pmap,
5479 (addr64_t)(entry->vme_start),
5480 (addr64_t)(entry->vme_end));
5481 }
5482 if (entry->vme_atomic) {
5483 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)", map, entry);
5484 }
5485
5486 DTRACE_VM5(
5487 vm_map_clip_start,
5488 vm_map_t, map,
5489 vm_map_offset_t, entry->vme_start,
5490 vm_map_offset_t, entry->vme_end,
5491 vm_map_offset_t, startaddr,
5492 int, VME_ALIAS(entry));
5493
5494 _vm_map_clip_start(&map->hdr, entry, startaddr);
5495 if (map->holelistenabled) {
5496 vm_map_store_update_first_free(map, NULL, FALSE);
5497 } else {
5498 vm_map_store_update_first_free(map, map->first_free, FALSE);
5499 }
5500 }
5501 }
5502
5503
5504 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5505 MACRO_BEGIN \
5506 if ((startaddr) > (entry)->vme_start) \
5507 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5508 MACRO_END
5509
5510 /*
5511 * This routine is called only when it is known that
5512 * the entry must be split.
5513 */
5514 static void
_vm_map_clip_start(struct vm_map_header * map_header,vm_map_entry_t entry,vm_map_offset_t start)5515 _vm_map_clip_start(
5516 struct vm_map_header *map_header,
5517 vm_map_entry_t entry,
5518 vm_map_offset_t start)
5519 {
5520 vm_map_entry_t new_entry;
5521
5522 /*
5523 * Split off the front portion --
5524 * note that we must insert the new
5525 * entry BEFORE this one, so that
5526 * this entry has the specified starting
5527 * address.
5528 */
5529
5530 if (entry->map_aligned) {
5531 assert(VM_MAP_PAGE_ALIGNED(start,
5532 VM_MAP_HDR_PAGE_MASK(map_header)));
5533 }
5534
5535 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5536 vm_map_entry_copy_full(new_entry, entry);
5537
5538 new_entry->vme_end = start;
5539 assert(new_entry->vme_start < new_entry->vme_end);
5540 VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
5541 assert(start < entry->vme_end);
5542 entry->vme_start = start;
5543
5544 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
5545
5546 if (entry->is_sub_map) {
5547 vm_map_reference(VME_SUBMAP(new_entry));
5548 } else {
5549 vm_object_reference(VME_OBJECT(new_entry));
5550 }
5551 }
5552
5553
5554 /*
5555 * vm_map_clip_end: [ internal use only ]
5556 *
5557 * Asserts that the given entry ends at or before
5558 * the specified address; if necessary,
5559 * it splits the entry into two.
5560 */
5561 void
vm_map_clip_end(vm_map_t map,vm_map_entry_t entry,vm_map_offset_t endaddr)5562 vm_map_clip_end(
5563 vm_map_t map,
5564 vm_map_entry_t entry,
5565 vm_map_offset_t endaddr)
5566 {
5567 if (endaddr > entry->vme_end) {
5568 /*
5569 * Within the scope of this clipping, limit "endaddr" to
5570 * the end of this map entry...
5571 */
5572 endaddr = entry->vme_end;
5573 }
5574 #ifndef NO_NESTED_PMAP
5575 if (entry->is_sub_map && entry->use_pmap) {
5576 vm_map_offset_t start_unnest, end_unnest;
5577
5578 /*
5579 * Make sure the range between the start of this entry and
5580 * the new "endaddr" is no longer nested before we clip.
5581 * Unnest only the minimum range the platform can handle.
5582 * vm_map_clip_unnest may perform additional adjustments to
5583 * the unnest range.
5584 */
5585 start_unnest = entry->vme_start;
5586 end_unnest =
5587 (endaddr + pmap_shared_region_size_min(map->pmap) - 1) &
5588 ~(pmap_shared_region_size_min(map->pmap) - 1);
5589 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5590 }
5591 #endif /* NO_NESTED_PMAP */
5592 if (endaddr < entry->vme_end) {
5593 if (VME_OBJECT(entry) &&
5594 !entry->is_sub_map &&
5595 VME_OBJECT(entry)->phys_contiguous) {
5596 pmap_remove(map->pmap,
5597 (addr64_t)(entry->vme_start),
5598 (addr64_t)(entry->vme_end));
5599 }
5600 if (entry->vme_atomic) {
5601 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)", map, entry);
5602 }
5603 DTRACE_VM5(
5604 vm_map_clip_end,
5605 vm_map_t, map,
5606 vm_map_offset_t, entry->vme_start,
5607 vm_map_offset_t, entry->vme_end,
5608 vm_map_offset_t, endaddr,
5609 int, VME_ALIAS(entry));
5610
5611 _vm_map_clip_end(&map->hdr, entry, endaddr);
5612 if (map->holelistenabled) {
5613 vm_map_store_update_first_free(map, NULL, FALSE);
5614 } else {
5615 vm_map_store_update_first_free(map, map->first_free, FALSE);
5616 }
5617 }
5618 }
5619
5620
5621 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5622 MACRO_BEGIN \
5623 if ((endaddr) < (entry)->vme_end) \
5624 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5625 MACRO_END
5626
5627 /*
5628 * This routine is called only when it is known that
5629 * the entry must be split.
5630 */
5631 static void
_vm_map_clip_end(struct vm_map_header * map_header,vm_map_entry_t entry,vm_map_offset_t end)5632 _vm_map_clip_end(
5633 struct vm_map_header *map_header,
5634 vm_map_entry_t entry,
5635 vm_map_offset_t end)
5636 {
5637 vm_map_entry_t new_entry;
5638
5639 /*
5640 * Create a new entry and insert it
5641 * AFTER the specified entry
5642 */
5643
5644 if (entry->map_aligned) {
5645 assert(VM_MAP_PAGE_ALIGNED(end,
5646 VM_MAP_HDR_PAGE_MASK(map_header)));
5647 }
5648
5649 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5650 vm_map_entry_copy_full(new_entry, entry);
5651
5652 assert(entry->vme_start < end);
5653 new_entry->vme_start = entry->vme_end = end;
5654 VME_OFFSET_SET(new_entry,
5655 VME_OFFSET(new_entry) + (end - entry->vme_start));
5656 assert(new_entry->vme_start < new_entry->vme_end);
5657
5658 _vm_map_store_entry_link(map_header, entry, new_entry);
5659
5660 if (entry->is_sub_map) {
5661 vm_map_reference(VME_SUBMAP(new_entry));
5662 } else {
5663 vm_object_reference(VME_OBJECT(new_entry));
5664 }
5665 }
5666
5667
5668 /*
5669 * VM_MAP_RANGE_CHECK: [ internal use only ]
5670 *
5671 * Asserts that the starting and ending region
5672 * addresses fall within the valid range of the map.
5673 */
5674 #define VM_MAP_RANGE_CHECK(map, start, end) \
5675 MACRO_BEGIN \
5676 if (start < vm_map_min(map)) \
5677 start = vm_map_min(map); \
5678 if (end > vm_map_max(map)) \
5679 end = vm_map_max(map); \
5680 if (start > end) \
5681 start = end; \
5682 MACRO_END
5683
5684 /*
5685 * vm_map_range_check: [ internal use only ]
5686 *
5687 * Check that the region defined by the specified start and
5688 * end addresses are wholly contained within a single map
5689 * entry or set of adjacent map entries of the spacified map,
5690 * i.e. the specified region contains no unmapped space.
5691 * If any or all of the region is unmapped, FALSE is returned.
5692 * Otherwise, TRUE is returned and if the output argument 'entry'
5693 * is not NULL it points to the map entry containing the start
5694 * of the region.
5695 *
5696 * The map is locked for reading on entry and is left locked.
5697 */
5698 static boolean_t
vm_map_range_check(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_map_entry_t * entry)5699 vm_map_range_check(
5700 vm_map_t map,
5701 vm_map_offset_t start,
5702 vm_map_offset_t end,
5703 vm_map_entry_t *entry)
5704 {
5705 vm_map_entry_t cur;
5706 vm_map_offset_t prev;
5707
5708 /*
5709 * Basic sanity checks first
5710 */
5711 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
5712 return FALSE;
5713 }
5714
5715 /*
5716 * Check first if the region starts within a valid
5717 * mapping for the map.
5718 */
5719 if (!vm_map_lookup_entry(map, start, &cur)) {
5720 return FALSE;
5721 }
5722
5723 /*
5724 * Optimize for the case that the region is contained
5725 * in a single map entry.
5726 */
5727 if (entry != (vm_map_entry_t *) NULL) {
5728 *entry = cur;
5729 }
5730 if (end <= cur->vme_end) {
5731 return TRUE;
5732 }
5733
5734 /*
5735 * If the region is not wholly contained within a
5736 * single entry, walk the entries looking for holes.
5737 */
5738 prev = cur->vme_end;
5739 cur = cur->vme_next;
5740 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
5741 if (end <= cur->vme_end) {
5742 return TRUE;
5743 }
5744 prev = cur->vme_end;
5745 cur = cur->vme_next;
5746 }
5747 return FALSE;
5748 }
5749
5750 /*
5751 * vm_map_submap: [ kernel use only ]
5752 *
5753 * Mark the given range as handled by a subordinate map.
5754 *
5755 * This range must have been created with vm_map_find using
5756 * the vm_submap_object, and no other operations may have been
5757 * performed on this range prior to calling vm_map_submap.
5758 *
5759 * Only a limited number of operations can be performed
5760 * within this rage after calling vm_map_submap:
5761 * vm_fault
5762 * [Don't try vm_map_copyin!]
5763 *
5764 * To remove a submapping, one must first remove the
5765 * range from the superior map, and then destroy the
5766 * submap (if desired). [Better yet, don't try it.]
5767 */
5768 kern_return_t
vm_map_submap(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_map_t submap,vm_map_offset_t offset,__unused boolean_t use_pmap)5769 vm_map_submap(
5770 vm_map_t map,
5771 vm_map_offset_t start,
5772 vm_map_offset_t end,
5773 vm_map_t submap,
5774 vm_map_offset_t offset,
5775 #ifdef NO_NESTED_PMAP
5776 __unused
5777 #endif /* NO_NESTED_PMAP */
5778 boolean_t use_pmap)
5779 {
5780 vm_map_entry_t entry;
5781 kern_return_t result = KERN_INVALID_ARGUMENT;
5782 vm_object_t object;
5783
5784 vm_map_lock(map);
5785
5786 if (!vm_map_lookup_entry(map, start, &entry)) {
5787 entry = entry->vme_next;
5788 }
5789
5790 if (entry == vm_map_to_entry(map) ||
5791 entry->is_sub_map) {
5792 vm_map_unlock(map);
5793 return KERN_INVALID_ARGUMENT;
5794 }
5795
5796 vm_map_clip_start(map, entry, start);
5797 vm_map_clip_end(map, entry, end);
5798
5799 if ((entry->vme_start == start) && (entry->vme_end == end) &&
5800 (!entry->is_sub_map) &&
5801 ((object = VME_OBJECT(entry)) == vm_submap_object) &&
5802 (object->resident_page_count == 0) &&
5803 (object->copy == VM_OBJECT_NULL) &&
5804 (object->shadow == VM_OBJECT_NULL) &&
5805 (!object->pager_created)) {
5806 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
5807 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
5808 vm_object_deallocate(object);
5809 entry->is_sub_map = TRUE;
5810 entry->use_pmap = FALSE;
5811 VME_SUBMAP_SET(entry, submap);
5812 vm_map_reference(submap);
5813 if (submap->mapped_in_other_pmaps == FALSE &&
5814 vm_map_pmap(submap) != PMAP_NULL &&
5815 vm_map_pmap(submap) != vm_map_pmap(map)) {
5816 /*
5817 * This submap is being mapped in a map
5818 * that uses a different pmap.
5819 * Set its "mapped_in_other_pmaps" flag
5820 * to indicate that we now need to
5821 * remove mappings from all pmaps rather
5822 * than just the submap's pmap.
5823 */
5824 submap->mapped_in_other_pmaps = TRUE;
5825 }
5826
5827 #ifndef NO_NESTED_PMAP
5828 if (use_pmap) {
5829 /* nest if platform code will allow */
5830 if (submap->pmap == NULL) {
5831 ledger_t ledger = map->pmap->ledger;
5832 submap->pmap = pmap_create_options(ledger,
5833 (vm_map_size_t) 0, 0);
5834 if (submap->pmap == PMAP_NULL) {
5835 vm_map_unlock(map);
5836 return KERN_NO_SPACE;
5837 }
5838 #if defined(__arm__) || defined(__arm64__)
5839 pmap_set_nested(submap->pmap);
5840 #endif
5841 }
5842 result = pmap_nest(map->pmap,
5843 (VME_SUBMAP(entry))->pmap,
5844 (addr64_t)start,
5845 (uint64_t)(end - start));
5846 if (result) {
5847 panic("vm_map_submap: pmap_nest failed, rc = %08X", result);
5848 }
5849 entry->use_pmap = TRUE;
5850 }
5851 #else /* NO_NESTED_PMAP */
5852 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
5853 #endif /* NO_NESTED_PMAP */
5854 result = KERN_SUCCESS;
5855 }
5856 vm_map_unlock(map);
5857
5858 return result;
5859 }
5860
5861 /*
5862 * vm_map_protect:
5863 *
5864 * Sets the protection of the specified address
5865 * region in the target map. If "set_max" is
5866 * specified, the maximum protection is to be set;
5867 * otherwise, only the current protection is affected.
5868 */
5869 kern_return_t
vm_map_protect(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t new_prot,boolean_t set_max)5870 vm_map_protect(
5871 vm_map_t map,
5872 vm_map_offset_t start,
5873 vm_map_offset_t end,
5874 vm_prot_t new_prot,
5875 boolean_t set_max)
5876 {
5877 vm_map_entry_t current;
5878 vm_map_offset_t prev;
5879 vm_map_entry_t entry;
5880 vm_prot_t new_max;
5881 int pmap_options = 0;
5882 kern_return_t kr;
5883
5884 if (new_prot & VM_PROT_COPY) {
5885 vm_map_offset_t new_start;
5886 vm_prot_t cur_prot, max_prot;
5887 vm_map_kernel_flags_t kflags;
5888
5889 /* LP64todo - see below */
5890 if (start >= map->max_offset) {
5891 return KERN_INVALID_ADDRESS;
5892 }
5893
5894 if ((new_prot & VM_PROT_ALLEXEC) &&
5895 map->pmap != kernel_pmap &&
5896 (vm_map_cs_enforcement(map)
5897 #if XNU_TARGET_OS_OSX && __arm64__
5898 || !VM_MAP_IS_EXOTIC(map)
5899 #endif /* XNU_TARGET_OS_OSX && __arm64__ */
5900 ) &&
5901 VM_MAP_POLICY_WX_FAIL(map)) {
5902 DTRACE_VM3(cs_wx,
5903 uint64_t, (uint64_t) start,
5904 uint64_t, (uint64_t) end,
5905 vm_prot_t, new_prot);
5906 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5907 proc_selfpid(),
5908 (current_task()->bsd_info
5909 ? proc_name_address(current_task()->bsd_info)
5910 : "?"),
5911 __FUNCTION__);
5912 return KERN_PROTECTION_FAILURE;
5913 }
5914
5915 /*
5916 * Let vm_map_remap_extract() know that it will need to:
5917 * + make a copy of the mapping
5918 * + add VM_PROT_WRITE to the max protections
5919 * + remove any protections that are no longer allowed from the
5920 * max protections (to avoid any WRITE/EXECUTE conflict, for
5921 * example).
5922 * Note that "max_prot" is an IN/OUT parameter only for this
5923 * specific (VM_PROT_COPY) case. It's usually an OUT parameter
5924 * only.
5925 */
5926 max_prot = new_prot & (VM_PROT_ALL | VM_PROT_ALLEXEC);
5927 cur_prot = VM_PROT_NONE;
5928 kflags = VM_MAP_KERNEL_FLAGS_NONE;
5929 kflags.vmkf_remap_prot_copy = TRUE;
5930 kflags.vmkf_overwrite_immutable = TRUE;
5931 new_start = start;
5932 kr = vm_map_remap(map,
5933 &new_start,
5934 end - start,
5935 0, /* mask */
5936 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
5937 kflags,
5938 0,
5939 map,
5940 start,
5941 TRUE, /* copy-on-write remapping! */
5942 &cur_prot, /* IN/OUT */
5943 &max_prot, /* IN/OUT */
5944 VM_INHERIT_DEFAULT);
5945 if (kr != KERN_SUCCESS) {
5946 return kr;
5947 }
5948 new_prot &= ~VM_PROT_COPY;
5949 }
5950
5951 vm_map_lock(map);
5952
5953 /* LP64todo - remove this check when vm_map_commpage64()
5954 * no longer has to stuff in a map_entry for the commpage
5955 * above the map's max_offset.
5956 */
5957 if (start >= map->max_offset) {
5958 vm_map_unlock(map);
5959 return KERN_INVALID_ADDRESS;
5960 }
5961
5962 while (1) {
5963 /*
5964 * Lookup the entry. If it doesn't start in a valid
5965 * entry, return an error.
5966 */
5967 if (!vm_map_lookup_entry(map, start, &entry)) {
5968 vm_map_unlock(map);
5969 return KERN_INVALID_ADDRESS;
5970 }
5971
5972 if (entry->superpage_size && (start & (SUPERPAGE_SIZE - 1))) { /* extend request to whole entry */
5973 start = SUPERPAGE_ROUND_DOWN(start);
5974 continue;
5975 }
5976 break;
5977 }
5978 if (entry->superpage_size) {
5979 end = SUPERPAGE_ROUND_UP(end);
5980 }
5981
5982 /*
5983 * Make a first pass to check for protection and address
5984 * violations.
5985 */
5986
5987 current = entry;
5988 prev = current->vme_start;
5989 while ((current != vm_map_to_entry(map)) &&
5990 (current->vme_start < end)) {
5991 /*
5992 * If there is a hole, return an error.
5993 */
5994 if (current->vme_start != prev) {
5995 vm_map_unlock(map);
5996 return KERN_INVALID_ADDRESS;
5997 }
5998
5999 new_max = current->max_protection;
6000
6001 #if defined(__x86_64__)
6002 /* Allow max mask to include execute prot bits if this map doesn't enforce CS */
6003 if (set_max && (new_prot & VM_PROT_ALLEXEC) && !vm_map_cs_enforcement(map)) {
6004 new_max = (new_max & ~VM_PROT_ALLEXEC) | (new_prot & VM_PROT_ALLEXEC);
6005 }
6006 #endif
6007 if ((new_prot & new_max) != new_prot) {
6008 vm_map_unlock(map);
6009 return KERN_PROTECTION_FAILURE;
6010 }
6011
6012 if (current->used_for_jit &&
6013 pmap_has_prot_policy(map->pmap, current->translated_allow_execute, current->protection)) {
6014 vm_map_unlock(map);
6015 return KERN_PROTECTION_FAILURE;
6016 }
6017
6018 if ((new_prot & VM_PROT_WRITE) &&
6019 (new_prot & VM_PROT_ALLEXEC) &&
6020 #if XNU_TARGET_OS_OSX
6021 map->pmap != kernel_pmap &&
6022 (vm_map_cs_enforcement(map)
6023 #if __arm64__
6024 || !VM_MAP_IS_EXOTIC(map)
6025 #endif /* __arm64__ */
6026 ) &&
6027 #endif /* XNU_TARGET_OS_OSX */
6028 !(current->used_for_jit)) {
6029 DTRACE_VM3(cs_wx,
6030 uint64_t, (uint64_t) current->vme_start,
6031 uint64_t, (uint64_t) current->vme_end,
6032 vm_prot_t, new_prot);
6033 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
6034 proc_selfpid(),
6035 (current_task()->bsd_info
6036 ? proc_name_address(current_task()->bsd_info)
6037 : "?"),
6038 __FUNCTION__);
6039 new_prot &= ~VM_PROT_ALLEXEC;
6040 if (VM_MAP_POLICY_WX_FAIL(map)) {
6041 vm_map_unlock(map);
6042 return KERN_PROTECTION_FAILURE;
6043 }
6044 }
6045
6046 /*
6047 * If the task has requested executable lockdown,
6048 * deny both:
6049 * - adding executable protections OR
6050 * - adding write protections to an existing executable mapping.
6051 */
6052 if (map->map_disallow_new_exec == TRUE) {
6053 if ((new_prot & VM_PROT_ALLEXEC) ||
6054 ((current->protection & VM_PROT_EXECUTE) && (new_prot & VM_PROT_WRITE))) {
6055 vm_map_unlock(map);
6056 return KERN_PROTECTION_FAILURE;
6057 }
6058 }
6059
6060 prev = current->vme_end;
6061 current = current->vme_next;
6062 }
6063
6064 #if __arm64__
6065 if (end > prev &&
6066 end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
6067 vm_map_entry_t prev_entry;
6068
6069 prev_entry = current->vme_prev;
6070 if (prev_entry != vm_map_to_entry(map) &&
6071 !prev_entry->map_aligned &&
6072 (vm_map_round_page(prev_entry->vme_end,
6073 VM_MAP_PAGE_MASK(map))
6074 == end)) {
6075 /*
6076 * The last entry in our range is not "map-aligned"
6077 * but it would have reached all the way to "end"
6078 * if it had been map-aligned, so this is not really
6079 * a hole in the range and we can proceed.
6080 */
6081 prev = end;
6082 }
6083 }
6084 #endif /* __arm64__ */
6085
6086 if (end > prev) {
6087 vm_map_unlock(map);
6088 return KERN_INVALID_ADDRESS;
6089 }
6090
6091 /*
6092 * Go back and fix up protections.
6093 * Clip to start here if the range starts within
6094 * the entry.
6095 */
6096
6097 current = entry;
6098 if (current != vm_map_to_entry(map)) {
6099 /* clip and unnest if necessary */
6100 vm_map_clip_start(map, current, start);
6101 }
6102
6103 while ((current != vm_map_to_entry(map)) &&
6104 (current->vme_start < end)) {
6105 vm_prot_t old_prot;
6106
6107 vm_map_clip_end(map, current, end);
6108
6109 if (current->is_sub_map) {
6110 /* clipping did unnest if needed */
6111 assert(!current->use_pmap);
6112 }
6113
6114 old_prot = current->protection;
6115
6116 if (set_max) {
6117 current->max_protection = new_prot;
6118 /* Consider either EXECUTE or UEXEC as EXECUTE for this masking */
6119 current->protection = (new_prot & old_prot);
6120 } else {
6121 current->protection = new_prot;
6122 }
6123
6124 /*
6125 * Update physical map if necessary.
6126 * If the request is to turn off write protection,
6127 * we won't do it for real (in pmap). This is because
6128 * it would cause copy-on-write to fail. We've already
6129 * set, the new protection in the map, so if a
6130 * write-protect fault occurred, it will be fixed up
6131 * properly, COW or not.
6132 */
6133 if (current->protection != old_prot) {
6134 /* Look one level in we support nested pmaps */
6135 /* from mapped submaps which are direct entries */
6136 /* in our map */
6137
6138 vm_prot_t prot;
6139
6140 prot = current->protection;
6141 if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
6142 prot &= ~VM_PROT_WRITE;
6143 } else {
6144 assert(!VME_OBJECT(current)->code_signed);
6145 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
6146 }
6147
6148 if (override_nx(map, VME_ALIAS(current)) && prot) {
6149 prot |= VM_PROT_EXECUTE;
6150 }
6151
6152 #if DEVELOPMENT || DEBUG
6153 if (!(old_prot & VM_PROT_EXECUTE) &&
6154 (prot & VM_PROT_EXECUTE) &&
6155 panic_on_unsigned_execute &&
6156 (proc_selfcsflags() & CS_KILL)) {
6157 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
6158 }
6159 #endif /* DEVELOPMENT || DEBUG */
6160
6161 if (pmap_has_prot_policy(map->pmap, current->translated_allow_execute, prot)) {
6162 if (current->wired_count) {
6163 panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x",
6164 map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
6165 }
6166
6167 /* If the pmap layer cares about this
6168 * protection type, force a fault for
6169 * each page so that vm_fault will
6170 * repopulate the page with the full
6171 * set of protections.
6172 */
6173 /*
6174 * TODO: We don't seem to need this,
6175 * but this is due to an internal
6176 * implementation detail of
6177 * pmap_protect. Do we want to rely
6178 * on this?
6179 */
6180 prot = VM_PROT_NONE;
6181 }
6182
6183 if (current->is_sub_map && current->use_pmap) {
6184 pmap_protect(VME_SUBMAP(current)->pmap,
6185 current->vme_start,
6186 current->vme_end,
6187 prot);
6188 } else {
6189 if (prot & VM_PROT_WRITE) {
6190 if (VME_OBJECT(current) == compressor_object) {
6191 /*
6192 * For write requests on the
6193 * compressor, we wil ask the
6194 * pmap layer to prevent us from
6195 * taking a write fault when we
6196 * attempt to access the mapping
6197 * next.
6198 */
6199 pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
6200 }
6201 }
6202
6203 pmap_protect_options(map->pmap,
6204 current->vme_start,
6205 current->vme_end,
6206 prot,
6207 pmap_options,
6208 NULL);
6209 }
6210 }
6211 current = current->vme_next;
6212 }
6213
6214 current = entry;
6215 while ((current != vm_map_to_entry(map)) &&
6216 (current->vme_start <= end)) {
6217 vm_map_simplify_entry(map, current);
6218 current = current->vme_next;
6219 }
6220
6221 vm_map_unlock(map);
6222 return KERN_SUCCESS;
6223 }
6224
6225 /*
6226 * vm_map_inherit:
6227 *
6228 * Sets the inheritance of the specified address
6229 * range in the target map. Inheritance
6230 * affects how the map will be shared with
6231 * child maps at the time of vm_map_fork.
6232 */
6233 kern_return_t
vm_map_inherit(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_inherit_t new_inheritance)6234 vm_map_inherit(
6235 vm_map_t map,
6236 vm_map_offset_t start,
6237 vm_map_offset_t end,
6238 vm_inherit_t new_inheritance)
6239 {
6240 vm_map_entry_t entry;
6241 vm_map_entry_t temp_entry;
6242
6243 vm_map_lock(map);
6244
6245 VM_MAP_RANGE_CHECK(map, start, end);
6246
6247 if (vm_map_lookup_entry(map, start, &temp_entry)) {
6248 entry = temp_entry;
6249 } else {
6250 temp_entry = temp_entry->vme_next;
6251 entry = temp_entry;
6252 }
6253
6254 /* first check entire range for submaps which can't support the */
6255 /* given inheritance. */
6256 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6257 if (entry->is_sub_map) {
6258 if (new_inheritance == VM_INHERIT_COPY) {
6259 vm_map_unlock(map);
6260 return KERN_INVALID_ARGUMENT;
6261 }
6262 }
6263
6264 entry = entry->vme_next;
6265 }
6266
6267 entry = temp_entry;
6268 if (entry != vm_map_to_entry(map)) {
6269 /* clip and unnest if necessary */
6270 vm_map_clip_start(map, entry, start);
6271 }
6272
6273 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6274 vm_map_clip_end(map, entry, end);
6275 if (entry->is_sub_map) {
6276 /* clip did unnest if needed */
6277 assert(!entry->use_pmap);
6278 }
6279
6280 entry->inheritance = new_inheritance;
6281
6282 entry = entry->vme_next;
6283 }
6284
6285 vm_map_unlock(map);
6286 return KERN_SUCCESS;
6287 }
6288
6289 /*
6290 * Update the accounting for the amount of wired memory in this map. If the user has
6291 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
6292 */
6293
6294 static kern_return_t
add_wire_counts(vm_map_t map,vm_map_entry_t entry,boolean_t user_wire)6295 add_wire_counts(
6296 vm_map_t map,
6297 vm_map_entry_t entry,
6298 boolean_t user_wire)
6299 {
6300 vm_map_size_t size;
6301
6302 if (user_wire) {
6303 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
6304
6305 /*
6306 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
6307 * this map entry.
6308 */
6309
6310 if (entry->user_wired_count == 0) {
6311 size = entry->vme_end - entry->vme_start;
6312
6313 /*
6314 * Since this is the first time the user is wiring this map entry, check to see if we're
6315 * exceeding the user wire limits. There is a per map limit which is the smaller of either
6316 * the process's rlimit or the global vm_per_task_user_wire_limit which caps this value. There is also
6317 * a system-wide limit on the amount of memory all users can wire. If the user is over either
6318 * limit, then we fail.
6319 */
6320
6321 if (size + map->user_wire_size > MIN(map->user_wire_limit, vm_per_task_user_wire_limit) ||
6322 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6323 if (size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6324 #if DEVELOPMENT || DEBUG
6325 if (panic_on_mlock_failure) {
6326 panic("mlock: Over global wire limit. %llu bytes wired and requested to wire %llu bytes more", ptoa_64(total_wire_count), (uint64_t) size);
6327 }
6328 #endif /* DEVELOPMENT || DEBUG */
6329 os_atomic_inc(&vm_add_wire_count_over_global_limit, relaxed);
6330 } else {
6331 os_atomic_inc(&vm_add_wire_count_over_user_limit, relaxed);
6332 #if DEVELOPMENT || DEBUG
6333 if (panic_on_mlock_failure) {
6334 panic("mlock: Over process wire limit. %llu bytes wired and requested to wire %llu bytes more", (uint64_t) map->user_wire_size, (uint64_t) size);
6335 }
6336 #endif /* DEVELOPMENT || DEBUG */
6337 }
6338 return KERN_RESOURCE_SHORTAGE;
6339 }
6340
6341 /*
6342 * The first time the user wires an entry, we also increment the wired_count and add this to
6343 * the total that has been wired in the map.
6344 */
6345
6346 if (entry->wired_count >= MAX_WIRE_COUNT) {
6347 return KERN_FAILURE;
6348 }
6349
6350 entry->wired_count++;
6351 map->user_wire_size += size;
6352 }
6353
6354 if (entry->user_wired_count >= MAX_WIRE_COUNT) {
6355 return KERN_FAILURE;
6356 }
6357
6358 entry->user_wired_count++;
6359 } else {
6360 /*
6361 * The kernel's wiring the memory. Just bump the count and continue.
6362 */
6363
6364 if (entry->wired_count >= MAX_WIRE_COUNT) {
6365 panic("vm_map_wire: too many wirings");
6366 }
6367
6368 entry->wired_count++;
6369 }
6370
6371 return KERN_SUCCESS;
6372 }
6373
6374 /*
6375 * Update the memory wiring accounting now that the given map entry is being unwired.
6376 */
6377
6378 static void
subtract_wire_counts(vm_map_t map,vm_map_entry_t entry,boolean_t user_wire)6379 subtract_wire_counts(
6380 vm_map_t map,
6381 vm_map_entry_t entry,
6382 boolean_t user_wire)
6383 {
6384 if (user_wire) {
6385 /*
6386 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
6387 */
6388
6389 if (entry->user_wired_count == 1) {
6390 /*
6391 * We're removing the last user wire reference. Decrement the wired_count and the total
6392 * user wired memory for this map.
6393 */
6394
6395 assert(entry->wired_count >= 1);
6396 entry->wired_count--;
6397 map->user_wire_size -= entry->vme_end - entry->vme_start;
6398 }
6399
6400 assert(entry->user_wired_count >= 1);
6401 entry->user_wired_count--;
6402 } else {
6403 /*
6404 * The kernel is unwiring the memory. Just update the count.
6405 */
6406
6407 assert(entry->wired_count >= 1);
6408 entry->wired_count--;
6409 }
6410 }
6411
6412 int cs_executable_wire = 0;
6413
6414 /*
6415 * vm_map_wire:
6416 *
6417 * Sets the pageability of the specified address range in the
6418 * target map as wired. Regions specified as not pageable require
6419 * locked-down physical memory and physical page maps. The
6420 * access_type variable indicates types of accesses that must not
6421 * generate page faults. This is checked against protection of
6422 * memory being locked-down.
6423 *
6424 * The map must not be locked, but a reference must remain to the
6425 * map throughout the call.
6426 */
6427 static kern_return_t
vm_map_wire_nested(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t caller_prot,vm_tag_t tag,boolean_t user_wire,pmap_t map_pmap,vm_map_offset_t pmap_addr,ppnum_t * physpage_p)6428 vm_map_wire_nested(
6429 vm_map_t map,
6430 vm_map_offset_t start,
6431 vm_map_offset_t end,
6432 vm_prot_t caller_prot,
6433 vm_tag_t tag,
6434 boolean_t user_wire,
6435 pmap_t map_pmap,
6436 vm_map_offset_t pmap_addr,
6437 ppnum_t *physpage_p)
6438 {
6439 vm_map_entry_t entry;
6440 vm_prot_t access_type;
6441 struct vm_map_entry *first_entry, tmp_entry;
6442 vm_map_t real_map;
6443 vm_map_offset_t s, e;
6444 kern_return_t rc;
6445 boolean_t need_wakeup;
6446 boolean_t main_map = FALSE;
6447 wait_interrupt_t interruptible_state;
6448 thread_t cur_thread;
6449 unsigned int last_timestamp;
6450 vm_map_size_t size;
6451 boolean_t wire_and_extract;
6452 vm_prot_t extra_prots;
6453
6454 extra_prots = VM_PROT_COPY;
6455 extra_prots |= VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6456 #if XNU_TARGET_OS_OSX
6457 if (map->pmap == kernel_pmap ||
6458 !vm_map_cs_enforcement(map)) {
6459 extra_prots &= ~VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6460 }
6461 #endif /* XNU_TARGET_OS_OSX */
6462
6463 access_type = (caller_prot & (VM_PROT_ALL | VM_PROT_ALLEXEC));
6464
6465 wire_and_extract = FALSE;
6466 if (physpage_p != NULL) {
6467 /*
6468 * The caller wants the physical page number of the
6469 * wired page. We return only one physical page number
6470 * so this works for only one page at a time.
6471 */
6472 if ((end - start) != PAGE_SIZE) {
6473 return KERN_INVALID_ARGUMENT;
6474 }
6475 wire_and_extract = TRUE;
6476 *physpage_p = 0;
6477 }
6478
6479 vm_map_lock(map);
6480 if (map_pmap == NULL) {
6481 main_map = TRUE;
6482 }
6483 last_timestamp = map->timestamp;
6484
6485 VM_MAP_RANGE_CHECK(map, start, end);
6486 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6487 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
6488
6489 if (start == end) {
6490 /* We wired what the caller asked for, zero pages */
6491 vm_map_unlock(map);
6492 return KERN_SUCCESS;
6493 }
6494
6495 need_wakeup = FALSE;
6496 cur_thread = current_thread();
6497
6498 s = start;
6499 rc = KERN_SUCCESS;
6500
6501 if (vm_map_lookup_entry(map, s, &first_entry)) {
6502 entry = first_entry;
6503 /*
6504 * vm_map_clip_start will be done later.
6505 * We don't want to unnest any nested submaps here !
6506 */
6507 } else {
6508 /* Start address is not in map */
6509 rc = KERN_INVALID_ADDRESS;
6510 goto done;
6511 }
6512
6513 while ((entry != vm_map_to_entry(map)) && (s < end)) {
6514 /*
6515 * At this point, we have wired from "start" to "s".
6516 * We still need to wire from "s" to "end".
6517 *
6518 * "entry" hasn't been clipped, so it could start before "s"
6519 * and/or end after "end".
6520 */
6521
6522 /* "e" is how far we want to wire in this entry */
6523 e = entry->vme_end;
6524 if (e > end) {
6525 e = end;
6526 }
6527
6528 /*
6529 * If another thread is wiring/unwiring this entry then
6530 * block after informing other thread to wake us up.
6531 */
6532 if (entry->in_transition) {
6533 wait_result_t wait_result;
6534
6535 /*
6536 * We have not clipped the entry. Make sure that
6537 * the start address is in range so that the lookup
6538 * below will succeed.
6539 * "s" is the current starting point: we've already
6540 * wired from "start" to "s" and we still have
6541 * to wire from "s" to "end".
6542 */
6543
6544 entry->needs_wakeup = TRUE;
6545
6546 /*
6547 * wake up anybody waiting on entries that we have
6548 * already wired.
6549 */
6550 if (need_wakeup) {
6551 vm_map_entry_wakeup(map);
6552 need_wakeup = FALSE;
6553 }
6554 /*
6555 * User wiring is interruptible
6556 */
6557 wait_result = vm_map_entry_wait(map,
6558 (user_wire) ? THREAD_ABORTSAFE :
6559 THREAD_UNINT);
6560 if (user_wire && wait_result == THREAD_INTERRUPTED) {
6561 /*
6562 * undo the wirings we have done so far
6563 * We do not clear the needs_wakeup flag,
6564 * because we cannot tell if we were the
6565 * only one waiting.
6566 */
6567 rc = KERN_FAILURE;
6568 goto done;
6569 }
6570
6571 /*
6572 * Cannot avoid a lookup here. reset timestamp.
6573 */
6574 last_timestamp = map->timestamp;
6575
6576 /*
6577 * The entry could have been clipped, look it up again.
6578 * Worse that can happen is, it may not exist anymore.
6579 */
6580 if (!vm_map_lookup_entry(map, s, &first_entry)) {
6581 /*
6582 * User: undo everything upto the previous
6583 * entry. let vm_map_unwire worry about
6584 * checking the validity of the range.
6585 */
6586 rc = KERN_FAILURE;
6587 goto done;
6588 }
6589 entry = first_entry;
6590 continue;
6591 }
6592
6593 if (entry->is_sub_map) {
6594 vm_map_offset_t sub_start;
6595 vm_map_offset_t sub_end;
6596 vm_map_offset_t local_start;
6597 vm_map_offset_t local_end;
6598 pmap_t pmap;
6599
6600 if (wire_and_extract) {
6601 /*
6602 * Wiring would result in copy-on-write
6603 * which would not be compatible with
6604 * the sharing we have with the original
6605 * provider of this memory.
6606 */
6607 rc = KERN_INVALID_ARGUMENT;
6608 goto done;
6609 }
6610
6611 vm_map_clip_start(map, entry, s);
6612 vm_map_clip_end(map, entry, end);
6613
6614 sub_start = VME_OFFSET(entry);
6615 sub_end = entry->vme_end;
6616 sub_end += VME_OFFSET(entry) - entry->vme_start;
6617
6618 local_end = entry->vme_end;
6619 if (map_pmap == NULL) {
6620 vm_object_t object;
6621 vm_object_offset_t offset;
6622 vm_prot_t prot;
6623 boolean_t wired;
6624 vm_map_entry_t local_entry;
6625 vm_map_version_t version;
6626 vm_map_t lookup_map;
6627
6628 if (entry->use_pmap) {
6629 pmap = VME_SUBMAP(entry)->pmap;
6630 /* ppc implementation requires that */
6631 /* submaps pmap address ranges line */
6632 /* up with parent map */
6633 #ifdef notdef
6634 pmap_addr = sub_start;
6635 #endif
6636 pmap_addr = s;
6637 } else {
6638 pmap = map->pmap;
6639 pmap_addr = s;
6640 }
6641
6642 if (entry->wired_count) {
6643 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6644 goto done;
6645 }
6646
6647 /*
6648 * The map was not unlocked:
6649 * no need to goto re-lookup.
6650 * Just go directly to next entry.
6651 */
6652 entry = entry->vme_next;
6653 s = entry->vme_start;
6654 continue;
6655 }
6656
6657 /* call vm_map_lookup_locked to */
6658 /* cause any needs copy to be */
6659 /* evaluated */
6660 local_start = entry->vme_start;
6661 lookup_map = map;
6662 vm_map_lock_write_to_read(map);
6663 rc = vm_map_lookup_locked(
6664 &lookup_map, local_start,
6665 (access_type | extra_prots),
6666 OBJECT_LOCK_EXCLUSIVE,
6667 &version, &object,
6668 &offset, &prot, &wired,
6669 NULL,
6670 &real_map, NULL);
6671 if (rc != KERN_SUCCESS) {
6672 vm_map_unlock_read(lookup_map);
6673 assert(map_pmap == NULL);
6674 vm_map_unwire(map, start,
6675 s, user_wire);
6676 return rc;
6677 }
6678 vm_object_unlock(object);
6679 if (real_map != lookup_map) {
6680 vm_map_unlock(real_map);
6681 }
6682 vm_map_unlock_read(lookup_map);
6683 vm_map_lock(map);
6684
6685 /* we unlocked, so must re-lookup */
6686 if (!vm_map_lookup_entry(map,
6687 local_start,
6688 &local_entry)) {
6689 rc = KERN_FAILURE;
6690 goto done;
6691 }
6692
6693 /*
6694 * entry could have been "simplified",
6695 * so re-clip
6696 */
6697 entry = local_entry;
6698 assert(s == local_start);
6699 vm_map_clip_start(map, entry, s);
6700 vm_map_clip_end(map, entry, end);
6701 /* re-compute "e" */
6702 e = entry->vme_end;
6703 if (e > end) {
6704 e = end;
6705 }
6706
6707 /* did we have a change of type? */
6708 if (!entry->is_sub_map) {
6709 last_timestamp = map->timestamp;
6710 continue;
6711 }
6712 } else {
6713 local_start = entry->vme_start;
6714 pmap = map_pmap;
6715 }
6716
6717 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6718 goto done;
6719 }
6720
6721 entry->in_transition = TRUE;
6722
6723 vm_map_unlock(map);
6724 rc = vm_map_wire_nested(VME_SUBMAP(entry),
6725 sub_start, sub_end,
6726 caller_prot, tag,
6727 user_wire, pmap, pmap_addr,
6728 NULL);
6729 vm_map_lock(map);
6730
6731 /*
6732 * Find the entry again. It could have been clipped
6733 * after we unlocked the map.
6734 */
6735 if (!vm_map_lookup_entry(map, local_start,
6736 &first_entry)) {
6737 panic("vm_map_wire: re-lookup failed");
6738 }
6739 entry = first_entry;
6740
6741 assert(local_start == s);
6742 /* re-compute "e" */
6743 e = entry->vme_end;
6744 if (e > end) {
6745 e = end;
6746 }
6747
6748 last_timestamp = map->timestamp;
6749 while ((entry != vm_map_to_entry(map)) &&
6750 (entry->vme_start < e)) {
6751 assert(entry->in_transition);
6752 entry->in_transition = FALSE;
6753 if (entry->needs_wakeup) {
6754 entry->needs_wakeup = FALSE;
6755 need_wakeup = TRUE;
6756 }
6757 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
6758 subtract_wire_counts(map, entry, user_wire);
6759 }
6760 entry = entry->vme_next;
6761 }
6762 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
6763 goto done;
6764 }
6765
6766 /* no need to relookup again */
6767 s = entry->vme_start;
6768 continue;
6769 }
6770
6771 /*
6772 * If this entry is already wired then increment
6773 * the appropriate wire reference count.
6774 */
6775 if (entry->wired_count) {
6776 if ((entry->protection & access_type) != access_type) {
6777 /* found a protection problem */
6778
6779 /*
6780 * XXX FBDP
6781 * We should always return an error
6782 * in this case but since we didn't
6783 * enforce it before, let's do
6784 * it only for the new "wire_and_extract"
6785 * code path for now...
6786 */
6787 if (wire_and_extract) {
6788 rc = KERN_PROTECTION_FAILURE;
6789 goto done;
6790 }
6791 }
6792
6793 /*
6794 * entry is already wired down, get our reference
6795 * after clipping to our range.
6796 */
6797 vm_map_clip_start(map, entry, s);
6798 vm_map_clip_end(map, entry, end);
6799
6800 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6801 goto done;
6802 }
6803
6804 if (wire_and_extract) {
6805 vm_object_t object;
6806 vm_object_offset_t offset;
6807 vm_page_t m;
6808
6809 /*
6810 * We don't have to "wire" the page again
6811 * bit we still have to "extract" its
6812 * physical page number, after some sanity
6813 * checks.
6814 */
6815 assert((entry->vme_end - entry->vme_start)
6816 == PAGE_SIZE);
6817 assert(!entry->needs_copy);
6818 assert(!entry->is_sub_map);
6819 assert(VME_OBJECT(entry));
6820 if (((entry->vme_end - entry->vme_start)
6821 != PAGE_SIZE) ||
6822 entry->needs_copy ||
6823 entry->is_sub_map ||
6824 VME_OBJECT(entry) == VM_OBJECT_NULL) {
6825 rc = KERN_INVALID_ARGUMENT;
6826 goto done;
6827 }
6828
6829 object = VME_OBJECT(entry);
6830 offset = VME_OFFSET(entry);
6831 /* need exclusive lock to update m->dirty */
6832 if (entry->protection & VM_PROT_WRITE) {
6833 vm_object_lock(object);
6834 } else {
6835 vm_object_lock_shared(object);
6836 }
6837 m = vm_page_lookup(object, offset);
6838 assert(m != VM_PAGE_NULL);
6839 assert(VM_PAGE_WIRED(m));
6840 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6841 *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
6842 if (entry->protection & VM_PROT_WRITE) {
6843 vm_object_lock_assert_exclusive(
6844 object);
6845 m->vmp_dirty = TRUE;
6846 }
6847 } else {
6848 /* not already wired !? */
6849 *physpage_p = 0;
6850 }
6851 vm_object_unlock(object);
6852 }
6853
6854 /* map was not unlocked: no need to relookup */
6855 entry = entry->vme_next;
6856 s = entry->vme_start;
6857 continue;
6858 }
6859
6860 /*
6861 * Unwired entry or wire request transmitted via submap
6862 */
6863
6864 /*
6865 * Wiring would copy the pages to the shadow object.
6866 * The shadow object would not be code-signed so
6867 * attempting to execute code from these copied pages
6868 * would trigger a code-signing violation.
6869 */
6870
6871 if ((entry->protection & VM_PROT_EXECUTE)
6872 #if XNU_TARGET_OS_OSX
6873 &&
6874 map->pmap != kernel_pmap &&
6875 (vm_map_cs_enforcement(map)
6876 #if __arm64__
6877 || !VM_MAP_IS_EXOTIC(map)
6878 #endif /* __arm64__ */
6879 )
6880 #endif /* XNU_TARGET_OS_OSX */
6881 ) {
6882 #if MACH_ASSERT
6883 printf("pid %d[%s] wiring executable range from "
6884 "0x%llx to 0x%llx: rejected to preserve "
6885 "code-signing\n",
6886 proc_selfpid(),
6887 (current_task()->bsd_info
6888 ? proc_name_address(current_task()->bsd_info)
6889 : "?"),
6890 (uint64_t) entry->vme_start,
6891 (uint64_t) entry->vme_end);
6892 #endif /* MACH_ASSERT */
6893 DTRACE_VM2(cs_executable_wire,
6894 uint64_t, (uint64_t)entry->vme_start,
6895 uint64_t, (uint64_t)entry->vme_end);
6896 cs_executable_wire++;
6897 rc = KERN_PROTECTION_FAILURE;
6898 goto done;
6899 }
6900
6901 /*
6902 * Perform actions of vm_map_lookup that need the write
6903 * lock on the map: create a shadow object for a
6904 * copy-on-write region, or an object for a zero-fill
6905 * region.
6906 */
6907 size = entry->vme_end - entry->vme_start;
6908 /*
6909 * If wiring a copy-on-write page, we need to copy it now
6910 * even if we're only (currently) requesting read access.
6911 * This is aggressive, but once it's wired we can't move it.
6912 */
6913 if (entry->needs_copy) {
6914 if (wire_and_extract) {
6915 /*
6916 * We're supposed to share with the original
6917 * provider so should not be "needs_copy"
6918 */
6919 rc = KERN_INVALID_ARGUMENT;
6920 goto done;
6921 }
6922
6923 VME_OBJECT_SHADOW(entry, size);
6924 entry->needs_copy = FALSE;
6925 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
6926 if (wire_and_extract) {
6927 /*
6928 * We're supposed to share with the original
6929 * provider so should already have an object.
6930 */
6931 rc = KERN_INVALID_ARGUMENT;
6932 goto done;
6933 }
6934 VME_OBJECT_SET(entry, vm_object_allocate(size));
6935 VME_OFFSET_SET(entry, (vm_object_offset_t)0);
6936 assert(entry->use_pmap);
6937 }
6938
6939 vm_map_clip_start(map, entry, s);
6940 vm_map_clip_end(map, entry, end);
6941
6942 /* re-compute "e" */
6943 e = entry->vme_end;
6944 if (e > end) {
6945 e = end;
6946 }
6947
6948 /*
6949 * Check for holes and protection mismatch.
6950 * Holes: Next entry should be contiguous unless this
6951 * is the end of the region.
6952 * Protection: Access requested must be allowed, unless
6953 * wiring is by protection class
6954 */
6955 if ((entry->vme_end < end) &&
6956 ((entry->vme_next == vm_map_to_entry(map)) ||
6957 (entry->vme_next->vme_start > entry->vme_end))) {
6958 /* found a hole */
6959 rc = KERN_INVALID_ADDRESS;
6960 goto done;
6961 }
6962 if ((entry->protection & access_type) != access_type) {
6963 /* found a protection problem */
6964 rc = KERN_PROTECTION_FAILURE;
6965 goto done;
6966 }
6967
6968 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
6969
6970 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6971 goto done;
6972 }
6973
6974 entry->in_transition = TRUE;
6975
6976 /*
6977 * This entry might get split once we unlock the map.
6978 * In vm_fault_wire(), we need the current range as
6979 * defined by this entry. In order for this to work
6980 * along with a simultaneous clip operation, we make a
6981 * temporary copy of this entry and use that for the
6982 * wiring. Note that the underlying objects do not
6983 * change during a clip.
6984 */
6985 tmp_entry = *entry;
6986
6987 /*
6988 * The in_transition state guarentees that the entry
6989 * (or entries for this range, if split occured) will be
6990 * there when the map lock is acquired for the second time.
6991 */
6992 vm_map_unlock(map);
6993
6994 if (!user_wire && cur_thread != THREAD_NULL) {
6995 interruptible_state = thread_interrupt_level(THREAD_UNINT);
6996 } else {
6997 interruptible_state = THREAD_UNINT;
6998 }
6999
7000 if (map_pmap) {
7001 rc = vm_fault_wire(map,
7002 &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
7003 physpage_p);
7004 } else {
7005 rc = vm_fault_wire(map,
7006 &tmp_entry, caller_prot, tag, map->pmap,
7007 tmp_entry.vme_start,
7008 physpage_p);
7009 }
7010
7011 if (!user_wire && cur_thread != THREAD_NULL) {
7012 thread_interrupt_level(interruptible_state);
7013 }
7014
7015 vm_map_lock(map);
7016
7017 if (last_timestamp + 1 != map->timestamp) {
7018 /*
7019 * Find the entry again. It could have been clipped
7020 * after we unlocked the map.
7021 */
7022 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7023 &first_entry)) {
7024 panic("vm_map_wire: re-lookup failed");
7025 }
7026
7027 entry = first_entry;
7028 }
7029
7030 last_timestamp = map->timestamp;
7031
7032 while ((entry != vm_map_to_entry(map)) &&
7033 (entry->vme_start < tmp_entry.vme_end)) {
7034 assert(entry->in_transition);
7035 entry->in_transition = FALSE;
7036 if (entry->needs_wakeup) {
7037 entry->needs_wakeup = FALSE;
7038 need_wakeup = TRUE;
7039 }
7040 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
7041 subtract_wire_counts(map, entry, user_wire);
7042 }
7043 entry = entry->vme_next;
7044 }
7045
7046 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
7047 goto done;
7048 }
7049
7050 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
7051 (tmp_entry.vme_end != end) && /* AND, we are not at the end of the requested range */
7052 (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
7053 /* found a "new" hole */
7054 s = tmp_entry.vme_end;
7055 rc = KERN_INVALID_ADDRESS;
7056 goto done;
7057 }
7058
7059 s = entry->vme_start;
7060 } /* end while loop through map entries */
7061
7062 done:
7063 if (rc == KERN_SUCCESS) {
7064 /* repair any damage we may have made to the VM map */
7065 vm_map_simplify_range(map, start, end);
7066 }
7067
7068 vm_map_unlock(map);
7069
7070 /*
7071 * wake up anybody waiting on entries we wired.
7072 */
7073 if (need_wakeup) {
7074 vm_map_entry_wakeup(map);
7075 }
7076
7077 if (rc != KERN_SUCCESS) {
7078 /* undo what has been wired so far */
7079 vm_map_unwire_nested(map, start, s, user_wire,
7080 map_pmap, pmap_addr);
7081 if (physpage_p) {
7082 *physpage_p = 0;
7083 }
7084 }
7085
7086 return rc;
7087 }
7088
7089 kern_return_t
vm_map_wire_external(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t caller_prot,boolean_t user_wire)7090 vm_map_wire_external(
7091 vm_map_t map,
7092 vm_map_offset_t start,
7093 vm_map_offset_t end,
7094 vm_prot_t caller_prot,
7095 boolean_t user_wire)
7096 {
7097 kern_return_t kret;
7098
7099 kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
7100 user_wire, (pmap_t)NULL, 0, NULL);
7101 return kret;
7102 }
7103
7104 kern_return_t
vm_map_wire_kernel(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t caller_prot,vm_tag_t tag,boolean_t user_wire)7105 vm_map_wire_kernel(
7106 vm_map_t map,
7107 vm_map_offset_t start,
7108 vm_map_offset_t end,
7109 vm_prot_t caller_prot,
7110 vm_tag_t tag,
7111 boolean_t user_wire)
7112 {
7113 kern_return_t kret;
7114
7115 kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
7116 user_wire, (pmap_t)NULL, 0, NULL);
7117 return kret;
7118 }
7119
7120 kern_return_t
vm_map_wire_and_extract_external(vm_map_t map,vm_map_offset_t start,vm_prot_t caller_prot,boolean_t user_wire,ppnum_t * physpage_p)7121 vm_map_wire_and_extract_external(
7122 vm_map_t map,
7123 vm_map_offset_t start,
7124 vm_prot_t caller_prot,
7125 boolean_t user_wire,
7126 ppnum_t *physpage_p)
7127 {
7128 kern_return_t kret;
7129
7130 kret = vm_map_wire_nested(map,
7131 start,
7132 start + VM_MAP_PAGE_SIZE(map),
7133 caller_prot,
7134 vm_tag_bt(),
7135 user_wire,
7136 (pmap_t)NULL,
7137 0,
7138 physpage_p);
7139 if (kret != KERN_SUCCESS &&
7140 physpage_p != NULL) {
7141 *physpage_p = 0;
7142 }
7143 return kret;
7144 }
7145
7146 kern_return_t
vm_map_wire_and_extract_kernel(vm_map_t map,vm_map_offset_t start,vm_prot_t caller_prot,vm_tag_t tag,boolean_t user_wire,ppnum_t * physpage_p)7147 vm_map_wire_and_extract_kernel(
7148 vm_map_t map,
7149 vm_map_offset_t start,
7150 vm_prot_t caller_prot,
7151 vm_tag_t tag,
7152 boolean_t user_wire,
7153 ppnum_t *physpage_p)
7154 {
7155 kern_return_t kret;
7156
7157 kret = vm_map_wire_nested(map,
7158 start,
7159 start + VM_MAP_PAGE_SIZE(map),
7160 caller_prot,
7161 tag,
7162 user_wire,
7163 (pmap_t)NULL,
7164 0,
7165 physpage_p);
7166 if (kret != KERN_SUCCESS &&
7167 physpage_p != NULL) {
7168 *physpage_p = 0;
7169 }
7170 return kret;
7171 }
7172
7173 /*
7174 * vm_map_unwire:
7175 *
7176 * Sets the pageability of the specified address range in the target
7177 * as pageable. Regions specified must have been wired previously.
7178 *
7179 * The map must not be locked, but a reference must remain to the map
7180 * throughout the call.
7181 *
7182 * Kernel will panic on failures. User unwire ignores holes and
7183 * unwired and intransition entries to avoid losing memory by leaving
7184 * it unwired.
7185 */
7186 static kern_return_t
vm_map_unwire_nested(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,boolean_t user_wire,pmap_t map_pmap,vm_map_offset_t pmap_addr)7187 vm_map_unwire_nested(
7188 vm_map_t map,
7189 vm_map_offset_t start,
7190 vm_map_offset_t end,
7191 boolean_t user_wire,
7192 pmap_t map_pmap,
7193 vm_map_offset_t pmap_addr)
7194 {
7195 vm_map_entry_t entry;
7196 struct vm_map_entry *first_entry, tmp_entry;
7197 boolean_t need_wakeup;
7198 boolean_t main_map = FALSE;
7199 unsigned int last_timestamp;
7200
7201 vm_map_lock(map);
7202 if (map_pmap == NULL) {
7203 main_map = TRUE;
7204 }
7205 last_timestamp = map->timestamp;
7206
7207 VM_MAP_RANGE_CHECK(map, start, end);
7208 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
7209 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
7210
7211 if (start == end) {
7212 /* We unwired what the caller asked for: zero pages */
7213 vm_map_unlock(map);
7214 return KERN_SUCCESS;
7215 }
7216
7217 if (vm_map_lookup_entry(map, start, &first_entry)) {
7218 entry = first_entry;
7219 /*
7220 * vm_map_clip_start will be done later.
7221 * We don't want to unnest any nested sub maps here !
7222 */
7223 } else {
7224 if (!user_wire) {
7225 panic("vm_map_unwire: start not found");
7226 }
7227 /* Start address is not in map. */
7228 vm_map_unlock(map);
7229 return KERN_INVALID_ADDRESS;
7230 }
7231
7232 if (entry->superpage_size) {
7233 /* superpages are always wired */
7234 vm_map_unlock(map);
7235 return KERN_INVALID_ADDRESS;
7236 }
7237
7238 need_wakeup = FALSE;
7239 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
7240 if (entry->in_transition) {
7241 /*
7242 * 1)
7243 * Another thread is wiring down this entry. Note
7244 * that if it is not for the other thread we would
7245 * be unwiring an unwired entry. This is not
7246 * permitted. If we wait, we will be unwiring memory
7247 * we did not wire.
7248 *
7249 * 2)
7250 * Another thread is unwiring this entry. We did not
7251 * have a reference to it, because if we did, this
7252 * entry will not be getting unwired now.
7253 */
7254 if (!user_wire) {
7255 /*
7256 * XXX FBDP
7257 * This could happen: there could be some
7258 * overlapping vslock/vsunlock operations
7259 * going on.
7260 * We should probably just wait and retry,
7261 * but then we have to be careful that this
7262 * entry could get "simplified" after
7263 * "in_transition" gets unset and before
7264 * we re-lookup the entry, so we would
7265 * have to re-clip the entry to avoid
7266 * re-unwiring what we have already unwired...
7267 * See vm_map_wire_nested().
7268 *
7269 * Or we could just ignore "in_transition"
7270 * here and proceed to decement the wired
7271 * count(s) on this entry. That should be fine
7272 * as long as "wired_count" doesn't drop all
7273 * the way to 0 (and we should panic if THAT
7274 * happens).
7275 */
7276 panic("vm_map_unwire: in_transition entry");
7277 }
7278
7279 entry = entry->vme_next;
7280 continue;
7281 }
7282
7283 if (entry->is_sub_map) {
7284 vm_map_offset_t sub_start;
7285 vm_map_offset_t sub_end;
7286 vm_map_offset_t local_end;
7287 pmap_t pmap;
7288
7289 vm_map_clip_start(map, entry, start);
7290 vm_map_clip_end(map, entry, end);
7291
7292 sub_start = VME_OFFSET(entry);
7293 sub_end = entry->vme_end - entry->vme_start;
7294 sub_end += VME_OFFSET(entry);
7295 local_end = entry->vme_end;
7296 if (map_pmap == NULL) {
7297 if (entry->use_pmap) {
7298 pmap = VME_SUBMAP(entry)->pmap;
7299 pmap_addr = sub_start;
7300 } else {
7301 pmap = map->pmap;
7302 pmap_addr = start;
7303 }
7304 if (entry->wired_count == 0 ||
7305 (user_wire && entry->user_wired_count == 0)) {
7306 if (!user_wire) {
7307 panic("vm_map_unwire: entry is unwired");
7308 }
7309 entry = entry->vme_next;
7310 continue;
7311 }
7312
7313 /*
7314 * Check for holes
7315 * Holes: Next entry should be contiguous unless
7316 * this is the end of the region.
7317 */
7318 if (((entry->vme_end < end) &&
7319 ((entry->vme_next == vm_map_to_entry(map)) ||
7320 (entry->vme_next->vme_start
7321 > entry->vme_end)))) {
7322 if (!user_wire) {
7323 panic("vm_map_unwire: non-contiguous region");
7324 }
7325 /*
7326 * entry = entry->vme_next;
7327 * continue;
7328 */
7329 }
7330
7331 subtract_wire_counts(map, entry, user_wire);
7332
7333 if (entry->wired_count != 0) {
7334 entry = entry->vme_next;
7335 continue;
7336 }
7337
7338 entry->in_transition = TRUE;
7339 tmp_entry = *entry;/* see comment in vm_map_wire() */
7340
7341 /*
7342 * We can unlock the map now. The in_transition state
7343 * guarantees existance of the entry.
7344 */
7345 vm_map_unlock(map);
7346 vm_map_unwire_nested(VME_SUBMAP(entry),
7347 sub_start, sub_end, user_wire, pmap, pmap_addr);
7348 vm_map_lock(map);
7349
7350 if (last_timestamp + 1 != map->timestamp) {
7351 /*
7352 * Find the entry again. It could have been
7353 * clipped or deleted after we unlocked the map.
7354 */
7355 if (!vm_map_lookup_entry(map,
7356 tmp_entry.vme_start,
7357 &first_entry)) {
7358 if (!user_wire) {
7359 panic("vm_map_unwire: re-lookup failed");
7360 }
7361 entry = first_entry->vme_next;
7362 } else {
7363 entry = first_entry;
7364 }
7365 }
7366 last_timestamp = map->timestamp;
7367
7368 /*
7369 * clear transition bit for all constituent entries
7370 * that were in the original entry (saved in
7371 * tmp_entry). Also check for waiters.
7372 */
7373 while ((entry != vm_map_to_entry(map)) &&
7374 (entry->vme_start < tmp_entry.vme_end)) {
7375 assert(entry->in_transition);
7376 entry->in_transition = FALSE;
7377 if (entry->needs_wakeup) {
7378 entry->needs_wakeup = FALSE;
7379 need_wakeup = TRUE;
7380 }
7381 entry = entry->vme_next;
7382 }
7383 continue;
7384 } else {
7385 vm_map_unlock(map);
7386 vm_map_unwire_nested(VME_SUBMAP(entry),
7387 sub_start, sub_end, user_wire, map_pmap,
7388 pmap_addr);
7389 vm_map_lock(map);
7390
7391 if (last_timestamp + 1 != map->timestamp) {
7392 /*
7393 * Find the entry again. It could have been
7394 * clipped or deleted after we unlocked the map.
7395 */
7396 if (!vm_map_lookup_entry(map,
7397 tmp_entry.vme_start,
7398 &first_entry)) {
7399 if (!user_wire) {
7400 panic("vm_map_unwire: re-lookup failed");
7401 }
7402 entry = first_entry->vme_next;
7403 } else {
7404 entry = first_entry;
7405 }
7406 }
7407 last_timestamp = map->timestamp;
7408 }
7409 }
7410
7411
7412 if ((entry->wired_count == 0) ||
7413 (user_wire && entry->user_wired_count == 0)) {
7414 if (!user_wire) {
7415 panic("vm_map_unwire: entry is unwired");
7416 }
7417
7418 entry = entry->vme_next;
7419 continue;
7420 }
7421
7422 assert(entry->wired_count > 0 &&
7423 (!user_wire || entry->user_wired_count > 0));
7424
7425 vm_map_clip_start(map, entry, start);
7426 vm_map_clip_end(map, entry, end);
7427
7428 /*
7429 * Check for holes
7430 * Holes: Next entry should be contiguous unless
7431 * this is the end of the region.
7432 */
7433 if (((entry->vme_end < end) &&
7434 ((entry->vme_next == vm_map_to_entry(map)) ||
7435 (entry->vme_next->vme_start > entry->vme_end)))) {
7436 if (!user_wire) {
7437 panic("vm_map_unwire: non-contiguous region");
7438 }
7439 entry = entry->vme_next;
7440 continue;
7441 }
7442
7443 subtract_wire_counts(map, entry, user_wire);
7444
7445 if (entry->wired_count != 0) {
7446 entry = entry->vme_next;
7447 continue;
7448 }
7449
7450 if (entry->zero_wired_pages) {
7451 entry->zero_wired_pages = FALSE;
7452 }
7453
7454 entry->in_transition = TRUE;
7455 tmp_entry = *entry; /* see comment in vm_map_wire() */
7456
7457 /*
7458 * We can unlock the map now. The in_transition state
7459 * guarantees existance of the entry.
7460 */
7461 vm_map_unlock(map);
7462 if (map_pmap) {
7463 vm_fault_unwire(map,
7464 &tmp_entry, FALSE, map_pmap, pmap_addr);
7465 } else {
7466 vm_fault_unwire(map,
7467 &tmp_entry, FALSE, map->pmap,
7468 tmp_entry.vme_start);
7469 }
7470 vm_map_lock(map);
7471
7472 if (last_timestamp + 1 != map->timestamp) {
7473 /*
7474 * Find the entry again. It could have been clipped
7475 * or deleted after we unlocked the map.
7476 */
7477 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7478 &first_entry)) {
7479 if (!user_wire) {
7480 panic("vm_map_unwire: re-lookup failed");
7481 }
7482 entry = first_entry->vme_next;
7483 } else {
7484 entry = first_entry;
7485 }
7486 }
7487 last_timestamp = map->timestamp;
7488
7489 /*
7490 * clear transition bit for all constituent entries that
7491 * were in the original entry (saved in tmp_entry). Also
7492 * check for waiters.
7493 */
7494 while ((entry != vm_map_to_entry(map)) &&
7495 (entry->vme_start < tmp_entry.vme_end)) {
7496 assert(entry->in_transition);
7497 entry->in_transition = FALSE;
7498 if (entry->needs_wakeup) {
7499 entry->needs_wakeup = FALSE;
7500 need_wakeup = TRUE;
7501 }
7502 entry = entry->vme_next;
7503 }
7504 }
7505
7506 /*
7507 * We might have fragmented the address space when we wired this
7508 * range of addresses. Attempt to re-coalesce these VM map entries
7509 * with their neighbors now that they're no longer wired.
7510 * Under some circumstances, address space fragmentation can
7511 * prevent VM object shadow chain collapsing, which can cause
7512 * swap space leaks.
7513 */
7514 vm_map_simplify_range(map, start, end);
7515
7516 vm_map_unlock(map);
7517 /*
7518 * wake up anybody waiting on entries that we have unwired.
7519 */
7520 if (need_wakeup) {
7521 vm_map_entry_wakeup(map);
7522 }
7523 return KERN_SUCCESS;
7524 }
7525
7526 kern_return_t
vm_map_unwire(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,boolean_t user_wire)7527 vm_map_unwire(
7528 vm_map_t map,
7529 vm_map_offset_t start,
7530 vm_map_offset_t end,
7531 boolean_t user_wire)
7532 {
7533 return vm_map_unwire_nested(map, start, end,
7534 user_wire, (pmap_t)NULL, 0);
7535 }
7536
7537
7538 /*
7539 * vm_map_entry_delete: [ internal use only ]
7540 *
7541 * Deallocate the given entry from the target map.
7542 */
7543 static void
vm_map_entry_delete(vm_map_t map,vm_map_entry_t entry)7544 vm_map_entry_delete(
7545 vm_map_t map,
7546 vm_map_entry_t entry)
7547 {
7548 vm_map_offset_t s, e;
7549 vm_object_t object;
7550 vm_map_t submap;
7551
7552 s = entry->vme_start;
7553 e = entry->vme_end;
7554 assert(VM_MAP_PAGE_ALIGNED(s, FOURK_PAGE_MASK));
7555 assert(VM_MAP_PAGE_ALIGNED(e, FOURK_PAGE_MASK));
7556 if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK) {
7557 assert(page_aligned(s));
7558 assert(page_aligned(e));
7559 }
7560 if (entry->map_aligned == TRUE) {
7561 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
7562 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
7563 }
7564 assert(entry->wired_count == 0);
7565 assert(entry->user_wired_count == 0);
7566 assert(!entry->permanent);
7567
7568 if (entry->is_sub_map) {
7569 object = NULL;
7570 submap = VME_SUBMAP(entry);
7571 } else {
7572 submap = NULL;
7573 object = VME_OBJECT(entry);
7574 }
7575
7576 vm_map_store_entry_unlink(map, entry);
7577 map->size -= e - s;
7578
7579 vm_map_entry_dispose(map, entry);
7580
7581 vm_map_unlock(map);
7582 /*
7583 * Deallocate the object only after removing all
7584 * pmap entries pointing to its pages.
7585 */
7586 if (submap) {
7587 vm_map_deallocate(submap);
7588 } else {
7589 vm_object_deallocate(object);
7590 }
7591 }
7592
7593 void
vm_map_submap_pmap_clean(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_map_t sub_map,vm_map_offset_t offset)7594 vm_map_submap_pmap_clean(
7595 vm_map_t map,
7596 vm_map_offset_t start,
7597 vm_map_offset_t end,
7598 vm_map_t sub_map,
7599 vm_map_offset_t offset)
7600 {
7601 vm_map_offset_t submap_start;
7602 vm_map_offset_t submap_end;
7603 vm_map_size_t remove_size;
7604 vm_map_entry_t entry;
7605
7606 submap_end = offset + (end - start);
7607 submap_start = offset;
7608
7609 vm_map_lock_read(sub_map);
7610 if (vm_map_lookup_entry(sub_map, offset, &entry)) {
7611 remove_size = (entry->vme_end - entry->vme_start);
7612 if (offset > entry->vme_start) {
7613 remove_size -= offset - entry->vme_start;
7614 }
7615
7616
7617 if (submap_end < entry->vme_end) {
7618 remove_size -=
7619 entry->vme_end - submap_end;
7620 }
7621 if (entry->is_sub_map) {
7622 vm_map_submap_pmap_clean(
7623 sub_map,
7624 start,
7625 start + remove_size,
7626 VME_SUBMAP(entry),
7627 VME_OFFSET(entry));
7628 } else {
7629 if (map->mapped_in_other_pmaps &&
7630 os_ref_get_count(&map->map_refcnt) != 0 &&
7631 VME_OBJECT(entry) != NULL) {
7632 vm_object_pmap_protect_options(
7633 VME_OBJECT(entry),
7634 (VME_OFFSET(entry) +
7635 offset -
7636 entry->vme_start),
7637 remove_size,
7638 PMAP_NULL,
7639 PAGE_SIZE,
7640 entry->vme_start,
7641 VM_PROT_NONE,
7642 PMAP_OPTIONS_REMOVE);
7643 } else {
7644 pmap_remove(map->pmap,
7645 (addr64_t)start,
7646 (addr64_t)(start + remove_size));
7647 }
7648 }
7649 }
7650
7651 entry = entry->vme_next;
7652
7653 while ((entry != vm_map_to_entry(sub_map))
7654 && (entry->vme_start < submap_end)) {
7655 remove_size = (entry->vme_end - entry->vme_start);
7656 if (submap_end < entry->vme_end) {
7657 remove_size -= entry->vme_end - submap_end;
7658 }
7659 if (entry->is_sub_map) {
7660 vm_map_submap_pmap_clean(
7661 sub_map,
7662 (start + entry->vme_start) - offset,
7663 ((start + entry->vme_start) - offset) + remove_size,
7664 VME_SUBMAP(entry),
7665 VME_OFFSET(entry));
7666 } else {
7667 if (map->mapped_in_other_pmaps &&
7668 os_ref_get_count(&map->map_refcnt) != 0 &&
7669 VME_OBJECT(entry) != NULL) {
7670 vm_object_pmap_protect_options(
7671 VME_OBJECT(entry),
7672 VME_OFFSET(entry),
7673 remove_size,
7674 PMAP_NULL,
7675 PAGE_SIZE,
7676 entry->vme_start,
7677 VM_PROT_NONE,
7678 PMAP_OPTIONS_REMOVE);
7679 } else {
7680 pmap_remove(map->pmap,
7681 (addr64_t)((start + entry->vme_start)
7682 - offset),
7683 (addr64_t)(((start + entry->vme_start)
7684 - offset) + remove_size));
7685 }
7686 }
7687 entry = entry->vme_next;
7688 }
7689 vm_map_unlock_read(sub_map);
7690 return;
7691 }
7692
7693 /*
7694 * virt_memory_guard_ast:
7695 *
7696 * Handle the AST callout for a virtual memory guard.
7697 * raise an EXC_GUARD exception and terminate the task
7698 * if configured to do so.
7699 */
7700 void
virt_memory_guard_ast(thread_t thread,mach_exception_data_type_t code,mach_exception_data_type_t subcode)7701 virt_memory_guard_ast(
7702 thread_t thread,
7703 mach_exception_data_type_t code,
7704 mach_exception_data_type_t subcode)
7705 {
7706 task_t task = get_threadtask(thread);
7707 assert(task != kernel_task);
7708 assert(task == current_task());
7709 kern_return_t sync_exception_result;
7710 uint32_t behavior;
7711
7712 behavior = task->task_exc_guard;
7713
7714 /* Is delivery enabled */
7715 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7716 return;
7717 }
7718
7719 /* If only once, make sure we're that once */
7720 while (behavior & TASK_EXC_GUARD_VM_ONCE) {
7721 uint32_t new_behavior = behavior & ~TASK_EXC_GUARD_VM_DELIVER;
7722
7723 if (OSCompareAndSwap(behavior, new_behavior, &task->task_exc_guard)) {
7724 break;
7725 }
7726 behavior = task->task_exc_guard;
7727 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7728 return;
7729 }
7730 }
7731
7732 /* Raise exception synchronously and see if handler claimed it */
7733 sync_exception_result = task_exception_notify(EXC_GUARD, code, subcode);
7734
7735 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7736 /*
7737 * If Synchronous EXC_GUARD delivery was successful then
7738 * kill the process and return, else kill the process
7739 * and deliver the exception via EXC_CORPSE_NOTIFY.
7740 */
7741 if (sync_exception_result == KERN_SUCCESS) {
7742 task_bsdtask_kill(current_task());
7743 } else {
7744 exit_with_guard_exception(current_proc(), code, subcode);
7745 }
7746 } else if (task->task_exc_guard & TASK_EXC_GUARD_VM_CORPSE) {
7747 /*
7748 * If the synchronous EXC_GUARD delivery was not successful,
7749 * raise a simulated crash.
7750 */
7751 if (sync_exception_result != KERN_SUCCESS) {
7752 task_violated_guard(code, subcode, NULL);
7753 }
7754 }
7755 }
7756
7757 /*
7758 * vm_map_guard_exception:
7759 *
7760 * Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7761 *
7762 * Right now, we do this when we find nothing mapped, or a
7763 * gap in the mapping when a user address space deallocate
7764 * was requested. We report the address of the first gap found.
7765 */
7766 static void
vm_map_guard_exception(vm_map_offset_t gap_start,unsigned reason)7767 vm_map_guard_exception(
7768 vm_map_offset_t gap_start,
7769 unsigned reason)
7770 {
7771 mach_exception_code_t code = 0;
7772 unsigned int guard_type = GUARD_TYPE_VIRT_MEMORY;
7773 unsigned int target = 0; /* should we pass in pid associated with map? */
7774 mach_exception_data_type_t subcode = (uint64_t)gap_start;
7775 boolean_t fatal = FALSE;
7776
7777 task_t task = current_task();
7778
7779 /* Can't deliver exceptions to kernel task */
7780 if (task == kernel_task) {
7781 return;
7782 }
7783
7784 EXC_GUARD_ENCODE_TYPE(code, guard_type);
7785 EXC_GUARD_ENCODE_FLAVOR(code, reason);
7786 EXC_GUARD_ENCODE_TARGET(code, target);
7787
7788 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7789 fatal = TRUE;
7790 }
7791 thread_guard_violation(current_thread(), code, subcode, fatal);
7792 }
7793
7794 /*
7795 * vm_map_delete: [ internal use only ]
7796 *
7797 * Deallocates the given address range from the target map.
7798 * Removes all user wirings. Unwires one kernel wiring if
7799 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
7800 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
7801 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7802 *
7803 * This routine is called with map locked and leaves map locked.
7804 */
7805 static kern_return_t
vm_map_delete(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,int flags,vm_map_t zap_map)7806 vm_map_delete(
7807 vm_map_t map,
7808 vm_map_offset_t start,
7809 vm_map_offset_t end,
7810 int flags,
7811 vm_map_t zap_map)
7812 {
7813 vm_map_entry_t entry, next;
7814 struct vm_map_entry *first_entry, tmp_entry;
7815 vm_map_offset_t s;
7816 vm_object_t object;
7817 boolean_t need_wakeup;
7818 unsigned int last_timestamp = ~0; /* unlikely value */
7819 int interruptible;
7820 vm_map_offset_t gap_start;
7821 __unused vm_map_offset_t save_start = start;
7822 __unused vm_map_offset_t save_end = end;
7823 const vm_map_offset_t FIND_GAP = 1; /* a not page aligned value */
7824 const vm_map_offset_t GAPS_OK = 2; /* a different not page aligned value */
7825
7826 if (map != kernel_map && !(flags & VM_MAP_REMOVE_GAPS_OK) && !map->terminated) {
7827 gap_start = FIND_GAP;
7828 } else {
7829 gap_start = GAPS_OK;
7830 }
7831
7832 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
7833 THREAD_ABORTSAFE : THREAD_UNINT;
7834
7835 /*
7836 * All our DMA I/O operations in IOKit are currently done by
7837 * wiring through the map entries of the task requesting the I/O.
7838 * Because of this, we must always wait for kernel wirings
7839 * to go away on the entries before deleting them.
7840 *
7841 * Any caller who wants to actually remove a kernel wiring
7842 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7843 * properly remove one wiring instead of blasting through
7844 * them all.
7845 */
7846 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
7847
7848 while (1) {
7849 /*
7850 * Find the start of the region, and clip it
7851 */
7852 if (vm_map_lookup_entry(map, start, &first_entry)) {
7853 entry = first_entry;
7854 if (kalloc_owned_map(map) &&
7855 (entry->vme_start != start ||
7856 entry->vme_end != end)) {
7857 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7858 "mismatched entry %p [0x%llx:0x%llx]\n",
7859 map,
7860 (uint64_t)start,
7861 (uint64_t)end,
7862 entry,
7863 (uint64_t)entry->vme_start,
7864 (uint64_t)entry->vme_end);
7865 }
7866
7867 /*
7868 * If in a superpage, extend the range to include the start of the mapping.
7869 */
7870 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) {
7871 start = SUPERPAGE_ROUND_DOWN(start);
7872 continue;
7873 }
7874
7875 if (start == entry->vme_start) {
7876 /*
7877 * No need to clip. We don't want to cause
7878 * any unnecessary unnesting in this case...
7879 */
7880 } else {
7881 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7882 entry->map_aligned &&
7883 !VM_MAP_PAGE_ALIGNED(
7884 start,
7885 VM_MAP_PAGE_MASK(map))) {
7886 /*
7887 * The entry will no longer be
7888 * map-aligned after clipping
7889 * and the caller said it's OK.
7890 */
7891 entry->map_aligned = FALSE;
7892 }
7893 if (kalloc_owned_map(map)) {
7894 panic("vm_map_delete(%p,0x%llx,0x%llx):"
7895 " clipping %p at 0x%llx\n",
7896 map,
7897 (uint64_t)start,
7898 (uint64_t)end,
7899 entry,
7900 (uint64_t)start);
7901 }
7902 vm_map_clip_start(map, entry, start);
7903 }
7904
7905 /*
7906 * Fix the lookup hint now, rather than each
7907 * time through the loop.
7908 */
7909 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7910 } else {
7911 if (map->pmap == kernel_pmap &&
7912 os_ref_get_count(&map->map_refcnt) != 0) {
7913 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7914 "no map entry at 0x%llx\n",
7915 map,
7916 (uint64_t)start,
7917 (uint64_t)end,
7918 (uint64_t)start);
7919 }
7920 entry = first_entry->vme_next;
7921 if (gap_start == FIND_GAP) {
7922 gap_start = start;
7923 }
7924 }
7925 break;
7926 }
7927 if (entry->superpage_size) {
7928 end = SUPERPAGE_ROUND_UP(end);
7929 }
7930
7931 need_wakeup = FALSE;
7932 /*
7933 * Step through all entries in this region
7934 */
7935 s = entry->vme_start;
7936 while ((entry != vm_map_to_entry(map)) && (s < end)) {
7937 /*
7938 * At this point, we have deleted all the memory entries
7939 * between "start" and "s". We still need to delete
7940 * all memory entries between "s" and "end".
7941 * While we were blocked and the map was unlocked, some
7942 * new memory entries could have been re-allocated between
7943 * "start" and "s" and we don't want to mess with those.
7944 * Some of those entries could even have been re-assembled
7945 * with an entry after "s" (in vm_map_simplify_entry()), so
7946 * we may have to vm_map_clip_start() again.
7947 */
7948
7949 if (entry->vme_start >= s) {
7950 /*
7951 * This entry starts on or after "s"
7952 * so no need to clip its start.
7953 */
7954 } else {
7955 /*
7956 * This entry has been re-assembled by a
7957 * vm_map_simplify_entry(). We need to
7958 * re-clip its start.
7959 */
7960 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7961 entry->map_aligned &&
7962 !VM_MAP_PAGE_ALIGNED(s,
7963 VM_MAP_PAGE_MASK(map))) {
7964 /*
7965 * The entry will no longer be map-aligned
7966 * after clipping and the caller said it's OK.
7967 */
7968 entry->map_aligned = FALSE;
7969 }
7970 if (kalloc_owned_map(map)) {
7971 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7972 "clipping %p at 0x%llx\n",
7973 map,
7974 (uint64_t)start,
7975 (uint64_t)end,
7976 entry,
7977 (uint64_t)s);
7978 }
7979 vm_map_clip_start(map, entry, s);
7980 }
7981 if (entry->vme_end <= end) {
7982 /*
7983 * This entry is going away completely, so no need
7984 * to clip and possibly cause an unnecessary unnesting.
7985 */
7986 } else {
7987 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7988 entry->map_aligned &&
7989 !VM_MAP_PAGE_ALIGNED(end,
7990 VM_MAP_PAGE_MASK(map))) {
7991 /*
7992 * The entry will no longer be map-aligned
7993 * after clipping and the caller said it's OK.
7994 */
7995 entry->map_aligned = FALSE;
7996 }
7997 if (kalloc_owned_map(map)) {
7998 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7999 "clipping %p at 0x%llx\n",
8000 map,
8001 (uint64_t)start,
8002 (uint64_t)end,
8003 entry,
8004 (uint64_t)end);
8005 }
8006 vm_map_clip_end(map, entry, end);
8007 }
8008
8009 if (entry->permanent) {
8010 if (map->pmap == kernel_pmap) {
8011 panic("%s(%p,0x%llx,0x%llx): "
8012 "attempt to remove permanent "
8013 "VM map entry "
8014 "%p [0x%llx:0x%llx]\n",
8015 __FUNCTION__,
8016 map,
8017 (uint64_t) start,
8018 (uint64_t) end,
8019 entry,
8020 (uint64_t) entry->vme_start,
8021 (uint64_t) entry->vme_end);
8022 } else if (flags & VM_MAP_REMOVE_IMMUTABLE) {
8023 // printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
8024 entry->permanent = FALSE;
8025 } else {
8026 if (vm_map_executable_immutable_verbose) {
8027 printf("%d[%s] %s(0x%llx,0x%llx): "
8028 "permanent entry [0x%llx:0x%llx] "
8029 "prot 0x%x/0x%x\n",
8030 proc_selfpid(),
8031 (current_task()->bsd_info
8032 ? proc_name_address(current_task()->bsd_info)
8033 : "?"),
8034 __FUNCTION__,
8035 (uint64_t) start,
8036 (uint64_t) end,
8037 (uint64_t)entry->vme_start,
8038 (uint64_t)entry->vme_end,
8039 entry->protection,
8040 entry->max_protection);
8041 }
8042 /*
8043 * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
8044 */
8045 DTRACE_VM5(vm_map_delete_permanent,
8046 vm_map_offset_t, entry->vme_start,
8047 vm_map_offset_t, entry->vme_end,
8048 vm_prot_t, entry->protection,
8049 vm_prot_t, entry->max_protection,
8050 int, VME_ALIAS(entry));
8051 }
8052 }
8053
8054
8055 if (entry->in_transition) {
8056 wait_result_t wait_result;
8057
8058 /*
8059 * Another thread is wiring/unwiring this entry.
8060 * Let the other thread know we are waiting.
8061 */
8062 assert(s == entry->vme_start);
8063 entry->needs_wakeup = TRUE;
8064
8065 /*
8066 * wake up anybody waiting on entries that we have
8067 * already unwired/deleted.
8068 */
8069 if (need_wakeup) {
8070 vm_map_entry_wakeup(map);
8071 need_wakeup = FALSE;
8072 }
8073
8074 wait_result = vm_map_entry_wait(map, interruptible);
8075
8076 if (interruptible &&
8077 wait_result == THREAD_INTERRUPTED) {
8078 /*
8079 * We do not clear the needs_wakeup flag,
8080 * since we cannot tell if we were the only one.
8081 */
8082 return KERN_ABORTED;
8083 }
8084
8085 /*
8086 * The entry could have been clipped or it
8087 * may not exist anymore. Look it up again.
8088 */
8089 if (!vm_map_lookup_entry(map, s, &first_entry)) {
8090 /*
8091 * User: use the next entry
8092 */
8093 if (gap_start == FIND_GAP) {
8094 gap_start = s;
8095 }
8096 entry = first_entry->vme_next;
8097 s = entry->vme_start;
8098 } else {
8099 entry = first_entry;
8100 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8101 }
8102 last_timestamp = map->timestamp;
8103 continue;
8104 } /* end in_transition */
8105
8106 if (entry->wired_count) {
8107 boolean_t user_wire;
8108
8109 user_wire = entry->user_wired_count > 0;
8110
8111 /*
8112 * Remove a kernel wiring if requested
8113 */
8114 if (flags & VM_MAP_REMOVE_KUNWIRE) {
8115 entry->wired_count--;
8116 }
8117
8118 /*
8119 * Remove all user wirings for proper accounting
8120 */
8121 if (entry->user_wired_count > 0) {
8122 while (entry->user_wired_count) {
8123 subtract_wire_counts(map, entry, user_wire);
8124 }
8125 }
8126
8127 if (entry->wired_count != 0) {
8128 assert(map != kernel_map);
8129 /*
8130 * Cannot continue. Typical case is when
8131 * a user thread has physical io pending on
8132 * on this page. Either wait for the
8133 * kernel wiring to go away or return an
8134 * error.
8135 */
8136 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
8137 wait_result_t wait_result;
8138
8139 assert(s == entry->vme_start);
8140 entry->needs_wakeup = TRUE;
8141 wait_result = vm_map_entry_wait(map,
8142 interruptible);
8143
8144 if (interruptible &&
8145 wait_result == THREAD_INTERRUPTED) {
8146 /*
8147 * We do not clear the
8148 * needs_wakeup flag, since we
8149 * cannot tell if we were the
8150 * only one.
8151 */
8152 return KERN_ABORTED;
8153 }
8154
8155 /*
8156 * The entry could have been clipped or
8157 * it may not exist anymore. Look it
8158 * up again.
8159 */
8160 if (!vm_map_lookup_entry(map, s,
8161 &first_entry)) {
8162 assert(map != kernel_map);
8163 /*
8164 * User: use the next entry
8165 */
8166 if (gap_start == FIND_GAP) {
8167 gap_start = s;
8168 }
8169 entry = first_entry->vme_next;
8170 s = entry->vme_start;
8171 } else {
8172 entry = first_entry;
8173 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8174 }
8175 last_timestamp = map->timestamp;
8176 continue;
8177 } else {
8178 return KERN_FAILURE;
8179 }
8180 }
8181
8182 entry->in_transition = TRUE;
8183 /*
8184 * copy current entry. see comment in vm_map_wire()
8185 */
8186 tmp_entry = *entry;
8187 assert(s == entry->vme_start);
8188
8189 /*
8190 * We can unlock the map now. The in_transition
8191 * state guarentees existance of the entry.
8192 */
8193 vm_map_unlock(map);
8194
8195 if (tmp_entry.is_sub_map) {
8196 vm_map_t sub_map;
8197 vm_map_offset_t sub_start, sub_end;
8198 pmap_t pmap;
8199 vm_map_offset_t pmap_addr;
8200
8201
8202 sub_map = VME_SUBMAP(&tmp_entry);
8203 sub_start = VME_OFFSET(&tmp_entry);
8204 sub_end = sub_start + (tmp_entry.vme_end -
8205 tmp_entry.vme_start);
8206 if (tmp_entry.use_pmap) {
8207 pmap = sub_map->pmap;
8208 pmap_addr = tmp_entry.vme_start;
8209 } else {
8210 pmap = map->pmap;
8211 pmap_addr = tmp_entry.vme_start;
8212 }
8213 (void) vm_map_unwire_nested(sub_map,
8214 sub_start, sub_end,
8215 user_wire,
8216 pmap, pmap_addr);
8217 } else {
8218 if (VME_OBJECT(&tmp_entry) == kernel_object) {
8219 pmap_protect_options(
8220 map->pmap,
8221 tmp_entry.vme_start,
8222 tmp_entry.vme_end,
8223 VM_PROT_NONE,
8224 PMAP_OPTIONS_REMOVE,
8225 NULL);
8226 }
8227 vm_fault_unwire(map, &tmp_entry,
8228 VME_OBJECT(&tmp_entry) == kernel_object,
8229 map->pmap, tmp_entry.vme_start);
8230 }
8231
8232 vm_map_lock(map);
8233
8234 if (last_timestamp + 1 != map->timestamp) {
8235 /*
8236 * Find the entry again. It could have
8237 * been clipped after we unlocked the map.
8238 */
8239 if (!vm_map_lookup_entry(map, s, &first_entry)) {
8240 assert((map != kernel_map) &&
8241 (!entry->is_sub_map));
8242 if (gap_start == FIND_GAP) {
8243 gap_start = s;
8244 }
8245 first_entry = first_entry->vme_next;
8246 s = first_entry->vme_start;
8247 } else {
8248 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8249 }
8250 } else {
8251 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8252 first_entry = entry;
8253 }
8254
8255 last_timestamp = map->timestamp;
8256
8257 entry = first_entry;
8258 while ((entry != vm_map_to_entry(map)) &&
8259 (entry->vme_start < tmp_entry.vme_end)) {
8260 assert(entry->in_transition);
8261 entry->in_transition = FALSE;
8262 if (entry->needs_wakeup) {
8263 entry->needs_wakeup = FALSE;
8264 need_wakeup = TRUE;
8265 }
8266 entry = entry->vme_next;
8267 }
8268 /*
8269 * We have unwired the entry(s). Go back and
8270 * delete them.
8271 */
8272 entry = first_entry;
8273 continue;
8274 }
8275
8276 /* entry is unwired */
8277 assert(entry->wired_count == 0);
8278 assert(entry->user_wired_count == 0);
8279
8280 assert(s == entry->vme_start);
8281
8282 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
8283 /*
8284 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
8285 * vm_map_delete(), some map entries might have been
8286 * transferred to a "zap_map", which doesn't have a
8287 * pmap. The original pmap has already been flushed
8288 * in the vm_map_delete() call targeting the original
8289 * map, but when we get to destroying the "zap_map",
8290 * we don't have any pmap to flush, so let's just skip
8291 * all this.
8292 */
8293 } else if (entry->is_sub_map) {
8294 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) >= VM_MAP_PAGE_SHIFT(map),
8295 "map %p (%d) entry %p submap %p (%d)\n",
8296 map, VM_MAP_PAGE_SHIFT(map), entry,
8297 VME_SUBMAP(entry),
8298 VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
8299 if (entry->use_pmap) {
8300 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) == VM_MAP_PAGE_SHIFT(map),
8301 "map %p (%d) entry %p submap %p (%d)\n",
8302 map, VM_MAP_PAGE_SHIFT(map), entry,
8303 VME_SUBMAP(entry),
8304 VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
8305 #ifndef NO_NESTED_PMAP
8306 int pmap_flags;
8307
8308 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
8309 /*
8310 * This is the final cleanup of the
8311 * address space being terminated.
8312 * No new mappings are expected and
8313 * we don't really need to unnest the
8314 * shared region (and lose the "global"
8315 * pmap mappings, if applicable).
8316 *
8317 * Tell the pmap layer that we're
8318 * "clean" wrt nesting.
8319 */
8320 pmap_flags = PMAP_UNNEST_CLEAN;
8321 } else {
8322 /*
8323 * We're unmapping part of the nested
8324 * shared region, so we can't keep the
8325 * nested pmap.
8326 */
8327 pmap_flags = 0;
8328 }
8329 pmap_unnest_options(
8330 map->pmap,
8331 (addr64_t)entry->vme_start,
8332 entry->vme_end - entry->vme_start,
8333 pmap_flags);
8334 #endif /* NO_NESTED_PMAP */
8335 if (map->mapped_in_other_pmaps &&
8336 os_ref_get_count(&map->map_refcnt) != 0) {
8337 /* clean up parent map/maps */
8338 vm_map_submap_pmap_clean(
8339 map, entry->vme_start,
8340 entry->vme_end,
8341 VME_SUBMAP(entry),
8342 VME_OFFSET(entry));
8343 }
8344 } else {
8345 vm_map_submap_pmap_clean(
8346 map, entry->vme_start, entry->vme_end,
8347 VME_SUBMAP(entry),
8348 VME_OFFSET(entry));
8349 }
8350 } else if (VME_OBJECT(entry) != kernel_object &&
8351 VME_OBJECT(entry) != compressor_object) {
8352 object = VME_OBJECT(entry);
8353 if (map->mapped_in_other_pmaps &&
8354 os_ref_get_count(&map->map_refcnt) != 0) {
8355 vm_object_pmap_protect_options(
8356 object, VME_OFFSET(entry),
8357 entry->vme_end - entry->vme_start,
8358 PMAP_NULL,
8359 PAGE_SIZE,
8360 entry->vme_start,
8361 VM_PROT_NONE,
8362 PMAP_OPTIONS_REMOVE);
8363 } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
8364 (map->pmap == kernel_pmap)) {
8365 /* Remove translations associated
8366 * with this range unless the entry
8367 * does not have an object, or
8368 * it's the kernel map or a descendant
8369 * since the platform could potentially
8370 * create "backdoor" mappings invisible
8371 * to the VM. It is expected that
8372 * objectless, non-kernel ranges
8373 * do not have such VM invisible
8374 * translations.
8375 */
8376 pmap_remove_options(map->pmap,
8377 (addr64_t)entry->vme_start,
8378 (addr64_t)entry->vme_end,
8379 PMAP_OPTIONS_REMOVE);
8380 }
8381 }
8382
8383 if (entry->iokit_acct) {
8384 /* alternate accounting */
8385 DTRACE_VM4(vm_map_iokit_unmapped_region,
8386 vm_map_t, map,
8387 vm_map_offset_t, entry->vme_start,
8388 vm_map_offset_t, entry->vme_end,
8389 int, VME_ALIAS(entry));
8390 vm_map_iokit_unmapped_region(map,
8391 (entry->vme_end -
8392 entry->vme_start));
8393 entry->iokit_acct = FALSE;
8394 entry->use_pmap = FALSE;
8395 }
8396
8397 /*
8398 * All pmap mappings for this map entry must have been
8399 * cleared by now.
8400 */
8401 #if DEBUG
8402 assert(pmap_is_empty(map->pmap,
8403 entry->vme_start,
8404 entry->vme_end));
8405 #endif /* DEBUG */
8406
8407 next = entry->vme_next;
8408
8409 if (map->pmap == kernel_pmap &&
8410 os_ref_get_count(&map->map_refcnt) != 0) {
8411 if (entry->vme_end < end && (next == vm_map_to_entry(map) || next->vme_start != entry->vme_end)) {
8412 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8413 "hole after %p at 0x%llx\n",
8414 map,
8415 (uint64_t)start,
8416 (uint64_t)end,
8417 entry,
8418 (uint64_t)entry->vme_end);
8419 }
8420
8421 if (entry->vme_atomic && (entry->vme_start != start || entry->vme_end != end)) {
8422 /*
8423 * In the kernel map and its submaps, the removal of
8424 * an atomic entry is strict. An atomic entry is
8425 * processed only if it was specifically targeted. We
8426 * might have deleted non-atomic entries before it but
8427 * we won't remove this atomic entry OR anything after it.
8428 */
8429 #if DEVELOPMENT || DEBUG
8430 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8431 "request loosely encompasses atomic entry %p at (0x%llx,0x%llx)\n",
8432 map,
8433 (uint64_t)start,
8434 (uint64_t)end,
8435 entry,
8436 (uint64_t)entry->vme_start,
8437 (uint64_t)entry->vme_end);
8438 #endif /* DEVELOPMENT || DEBUG */
8439
8440 break;
8441 }
8442 }
8443
8444 /*
8445 * If the desired range didn't end with "entry", then there is a gap if
8446 * we wrapped around to the start of the map or if "entry" and "next"
8447 * aren't contiguous.
8448 *
8449 * The vm_map_round_page() is needed since an entry can be less than VM_MAP_PAGE_MASK() sized.
8450 * For example, devices which have h/w 4K pages, but entry sizes are all now 16K.
8451 */
8452 if (gap_start == FIND_GAP &&
8453 vm_map_round_page(entry->vme_end, VM_MAP_PAGE_MASK(map)) < end &&
8454 (next == vm_map_to_entry(map) || entry->vme_end != next->vme_start)) {
8455 gap_start = entry->vme_end;
8456 }
8457 s = next->vme_start;
8458 last_timestamp = map->timestamp;
8459
8460 if (entry->permanent) {
8461 /*
8462 * A permanent entry can not be removed, so leave it
8463 * in place but remove all access permissions.
8464 */
8465 entry->protection = VM_PROT_NONE;
8466 entry->max_protection = VM_PROT_NONE;
8467 } else if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
8468 zap_map != VM_MAP_NULL) {
8469 vm_map_size_t entry_size;
8470 /*
8471 * The caller wants to save the affected VM map entries
8472 * into the "zap_map". The caller will take care of
8473 * these entries.
8474 */
8475 /* unlink the entry from "map" ... */
8476 vm_map_store_entry_unlink(map, entry);
8477 /* ... and add it to the end of the "zap_map" */
8478 vm_map_store_entry_link(zap_map,
8479 vm_map_last_entry(zap_map),
8480 entry,
8481 VM_MAP_KERNEL_FLAGS_NONE);
8482 entry_size = entry->vme_end - entry->vme_start;
8483 map->size -= entry_size;
8484 zap_map->size += entry_size;
8485 /* we didn't unlock the map, so no timestamp increase */
8486 last_timestamp--;
8487 } else {
8488 vm_map_entry_delete(map, entry);
8489 /* vm_map_entry_delete unlocks the map */
8490 vm_map_lock(map);
8491 }
8492
8493 entry = next;
8494
8495 if (entry == vm_map_to_entry(map)) {
8496 break;
8497 }
8498 if (last_timestamp + 1 != map->timestamp) {
8499 /*
8500 * We are responsible for deleting everything
8501 * from the given space. If someone has interfered,
8502 * we pick up where we left off. Back fills should
8503 * be all right for anyone, except map_delete, and
8504 * we have to assume that the task has been fully
8505 * disabled before we get here
8506 */
8507 if (!vm_map_lookup_entry(map, s, &entry)) {
8508 entry = entry->vme_next;
8509
8510 /*
8511 * Nothing found for s. If we weren't already done, then there is a gap.
8512 */
8513 if (gap_start == FIND_GAP && s < end) {
8514 gap_start = s;
8515 }
8516 s = entry->vme_start;
8517 } else {
8518 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8519 }
8520 /*
8521 * others can not only allocate behind us, we can
8522 * also see coalesce while we don't have the map lock
8523 */
8524 if (entry == vm_map_to_entry(map)) {
8525 break;
8526 }
8527 }
8528 last_timestamp = map->timestamp;
8529 }
8530
8531 if (map->wait_for_space) {
8532 thread_wakeup((event_t) map);
8533 }
8534 /*
8535 * wake up anybody waiting on entries that we have already deleted.
8536 */
8537 if (need_wakeup) {
8538 vm_map_entry_wakeup(map);
8539 }
8540
8541 if (gap_start != FIND_GAP && gap_start != GAPS_OK) {
8542 DTRACE_VM3(kern_vm_deallocate_gap,
8543 vm_map_offset_t, gap_start,
8544 vm_map_offset_t, save_start,
8545 vm_map_offset_t, save_end);
8546 if (!(flags & VM_MAP_REMOVE_GAPS_OK)) {
8547 vm_map_guard_exception(gap_start, kGUARD_EXC_DEALLOC_GAP);
8548 }
8549 }
8550
8551 return KERN_SUCCESS;
8552 }
8553
8554
8555 /*
8556 * vm_map_terminate:
8557 *
8558 * Clean out a task's map.
8559 */
8560 kern_return_t
vm_map_terminate(vm_map_t map)8561 vm_map_terminate(
8562 vm_map_t map)
8563 {
8564 vm_map_lock(map);
8565 map->terminated = TRUE;
8566 vm_map_unlock(map);
8567
8568 return vm_map_remove(map,
8569 map->min_offset,
8570 map->max_offset,
8571 /*
8572 * Final cleanup:
8573 * + no unnesting
8574 * + remove immutable mappings
8575 * + allow gaps in range
8576 */
8577 (VM_MAP_REMOVE_NO_UNNESTING |
8578 VM_MAP_REMOVE_IMMUTABLE |
8579 VM_MAP_REMOVE_GAPS_OK));
8580 }
8581
8582 /*
8583 * vm_map_remove:
8584 *
8585 * Remove the given address range from the target map.
8586 * This is the exported form of vm_map_delete.
8587 */
8588 kern_return_t
vm_map_remove(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,boolean_t flags)8589 vm_map_remove(
8590 vm_map_t map,
8591 vm_map_offset_t start,
8592 vm_map_offset_t end,
8593 boolean_t flags)
8594 {
8595 kern_return_t result;
8596
8597 vm_map_lock(map);
8598 VM_MAP_RANGE_CHECK(map, start, end);
8599 /*
8600 * For the zone maps, the kernel controls the allocation/freeing of memory.
8601 * Any free to the zone maps should be within the bounds of the map and
8602 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
8603 * free to the zone maps into a no-op, there is a problem and we should
8604 * panic.
8605 */
8606 if ((start == end) && zone_maps_owned(start, 1)) {
8607 panic("Nothing being freed to a zone map. start = end = %p", (void *)start);
8608 }
8609 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8610 vm_map_unlock(map);
8611
8612 return result;
8613 }
8614
8615 /*
8616 * vm_map_remove_locked:
8617 *
8618 * Remove the given address range from the target locked map.
8619 * This is the exported form of vm_map_delete.
8620 */
8621 kern_return_t
vm_map_remove_locked(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,boolean_t flags)8622 vm_map_remove_locked(
8623 vm_map_t map,
8624 vm_map_offset_t start,
8625 vm_map_offset_t end,
8626 boolean_t flags)
8627 {
8628 kern_return_t result;
8629
8630 VM_MAP_RANGE_CHECK(map, start, end);
8631 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8632 return result;
8633 }
8634
8635
8636 /*
8637 * Routine: vm_map_copy_allocate
8638 *
8639 * Description:
8640 * Allocates and initializes a map copy object.
8641 */
8642 static vm_map_copy_t
vm_map_copy_allocate(void)8643 vm_map_copy_allocate(void)
8644 {
8645 vm_map_copy_t new_copy;
8646
8647 new_copy = zalloc_flags(vm_map_copy_zone, Z_WAITOK | Z_ZERO);
8648 new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8649 vm_map_copy_first_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8650 vm_map_copy_last_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8651 return new_copy;
8652 }
8653
8654 /*
8655 * Routine: vm_map_copy_discard
8656 *
8657 * Description:
8658 * Dispose of a map copy object (returned by
8659 * vm_map_copyin).
8660 */
8661 void
vm_map_copy_discard(vm_map_copy_t copy)8662 vm_map_copy_discard(
8663 vm_map_copy_t copy)
8664 {
8665 if (copy == VM_MAP_COPY_NULL) {
8666 return;
8667 }
8668
8669 switch (copy->type) {
8670 case VM_MAP_COPY_ENTRY_LIST:
8671 while (vm_map_copy_first_entry(copy) !=
8672 vm_map_copy_to_entry(copy)) {
8673 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
8674
8675 vm_map_copy_entry_unlink(copy, entry);
8676 if (entry->is_sub_map) {
8677 vm_map_deallocate(VME_SUBMAP(entry));
8678 } else {
8679 vm_object_deallocate(VME_OBJECT(entry));
8680 }
8681 vm_map_copy_entry_dispose(copy, entry);
8682 }
8683 break;
8684 case VM_MAP_COPY_OBJECT:
8685 vm_object_deallocate(copy->cpy_object);
8686 break;
8687 case VM_MAP_COPY_KERNEL_BUFFER:
8688
8689 /*
8690 * The vm_map_copy_t and possibly the data buffer were
8691 * allocated by a single call to kalloc_data(), i.e. the
8692 * vm_map_copy_t was not allocated out of the zone.
8693 */
8694 if (copy->size > msg_ool_size_small || copy->offset) {
8695 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8696 (long long)copy->size, (long long)copy->offset);
8697 }
8698 kfree_data(copy->cpy_kdata, copy->size);
8699 }
8700 zfree(vm_map_copy_zone, copy);
8701 }
8702
8703 /*
8704 * Routine: vm_map_copy_copy
8705 *
8706 * Description:
8707 * Move the information in a map copy object to
8708 * a new map copy object, leaving the old one
8709 * empty.
8710 *
8711 * This is used by kernel routines that need
8712 * to look at out-of-line data (in copyin form)
8713 * before deciding whether to return SUCCESS.
8714 * If the routine returns FAILURE, the original
8715 * copy object will be deallocated; therefore,
8716 * these routines must make a copy of the copy
8717 * object and leave the original empty so that
8718 * deallocation will not fail.
8719 */
8720 vm_map_copy_t
vm_map_copy_copy(vm_map_copy_t copy)8721 vm_map_copy_copy(
8722 vm_map_copy_t copy)
8723 {
8724 vm_map_copy_t new_copy;
8725
8726 if (copy == VM_MAP_COPY_NULL) {
8727 return VM_MAP_COPY_NULL;
8728 }
8729
8730 /*
8731 * Allocate a new copy object, and copy the information
8732 * from the old one into it.
8733 */
8734
8735 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8736 memcpy((void *) new_copy, (void *) copy, sizeof(struct vm_map_copy));
8737 #if __has_feature(ptrauth_calls)
8738 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8739 new_copy->cpy_kdata = copy->cpy_kdata;
8740 }
8741 #endif
8742
8743 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
8744 /*
8745 * The links in the entry chain must be
8746 * changed to point to the new copy object.
8747 */
8748 vm_map_copy_first_entry(copy)->vme_prev
8749 = vm_map_copy_to_entry(new_copy);
8750 vm_map_copy_last_entry(copy)->vme_next
8751 = vm_map_copy_to_entry(new_copy);
8752 }
8753
8754 /*
8755 * Change the old copy object into one that contains
8756 * nothing to be deallocated.
8757 */
8758 copy->type = VM_MAP_COPY_OBJECT;
8759 copy->cpy_object = VM_OBJECT_NULL;
8760
8761 /*
8762 * Return the new object.
8763 */
8764 return new_copy;
8765 }
8766
8767 static boolean_t
vm_map_entry_is_overwritable(vm_map_t dst_map __unused,vm_map_entry_t entry)8768 vm_map_entry_is_overwritable(
8769 vm_map_t dst_map __unused,
8770 vm_map_entry_t entry)
8771 {
8772 if (!(entry->protection & VM_PROT_WRITE)) {
8773 /* can't overwrite if not writable */
8774 return FALSE;
8775 }
8776 #if !__x86_64__
8777 if (entry->used_for_jit &&
8778 vm_map_cs_enforcement(dst_map) &&
8779 !dst_map->cs_debugged) {
8780 /*
8781 * Can't overwrite a JIT region while cs_enforced
8782 * and not cs_debugged.
8783 */
8784 return FALSE;
8785 }
8786 #endif /* !__x86_64__ */
8787 return TRUE;
8788 }
8789
8790 static kern_return_t
vm_map_overwrite_submap_recurse(vm_map_t dst_map,vm_map_offset_t dst_addr,vm_map_size_t dst_size)8791 vm_map_overwrite_submap_recurse(
8792 vm_map_t dst_map,
8793 vm_map_offset_t dst_addr,
8794 vm_map_size_t dst_size)
8795 {
8796 vm_map_offset_t dst_end;
8797 vm_map_entry_t tmp_entry;
8798 vm_map_entry_t entry;
8799 kern_return_t result;
8800 boolean_t encountered_sub_map = FALSE;
8801
8802
8803
8804 /*
8805 * Verify that the destination is all writeable
8806 * initially. We have to trunc the destination
8807 * address and round the copy size or we'll end up
8808 * splitting entries in strange ways.
8809 */
8810
8811 dst_end = vm_map_round_page(dst_addr + dst_size,
8812 VM_MAP_PAGE_MASK(dst_map));
8813 vm_map_lock(dst_map);
8814
8815 start_pass_1:
8816 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8817 vm_map_unlock(dst_map);
8818 return KERN_INVALID_ADDRESS;
8819 }
8820
8821 vm_map_clip_start(dst_map,
8822 tmp_entry,
8823 vm_map_trunc_page(dst_addr,
8824 VM_MAP_PAGE_MASK(dst_map)));
8825 if (tmp_entry->is_sub_map) {
8826 /* clipping did unnest if needed */
8827 assert(!tmp_entry->use_pmap);
8828 }
8829
8830 for (entry = tmp_entry;;) {
8831 vm_map_entry_t next;
8832
8833 next = entry->vme_next;
8834 while (entry->is_sub_map) {
8835 vm_map_offset_t sub_start;
8836 vm_map_offset_t sub_end;
8837 vm_map_offset_t local_end;
8838
8839 if (entry->in_transition) {
8840 /*
8841 * Say that we are waiting, and wait for entry.
8842 */
8843 entry->needs_wakeup = TRUE;
8844 vm_map_entry_wait(dst_map, THREAD_UNINT);
8845
8846 goto start_pass_1;
8847 }
8848
8849 encountered_sub_map = TRUE;
8850 sub_start = VME_OFFSET(entry);
8851
8852 if (entry->vme_end < dst_end) {
8853 sub_end = entry->vme_end;
8854 } else {
8855 sub_end = dst_end;
8856 }
8857 sub_end -= entry->vme_start;
8858 sub_end += VME_OFFSET(entry);
8859 local_end = entry->vme_end;
8860 vm_map_unlock(dst_map);
8861
8862 result = vm_map_overwrite_submap_recurse(
8863 VME_SUBMAP(entry),
8864 sub_start,
8865 sub_end - sub_start);
8866
8867 if (result != KERN_SUCCESS) {
8868 return result;
8869 }
8870 if (dst_end <= entry->vme_end) {
8871 return KERN_SUCCESS;
8872 }
8873 vm_map_lock(dst_map);
8874 if (!vm_map_lookup_entry(dst_map, local_end,
8875 &tmp_entry)) {
8876 vm_map_unlock(dst_map);
8877 return KERN_INVALID_ADDRESS;
8878 }
8879 entry = tmp_entry;
8880 next = entry->vme_next;
8881 }
8882
8883 if (!(entry->protection & VM_PROT_WRITE)) {
8884 vm_map_unlock(dst_map);
8885 return KERN_PROTECTION_FAILURE;
8886 }
8887
8888 if (!vm_map_entry_is_overwritable(dst_map, entry)) {
8889 vm_map_unlock(dst_map);
8890 return KERN_PROTECTION_FAILURE;
8891 }
8892
8893 /*
8894 * If the entry is in transition, we must wait
8895 * for it to exit that state. Anything could happen
8896 * when we unlock the map, so start over.
8897 */
8898 if (entry->in_transition) {
8899 /*
8900 * Say that we are waiting, and wait for entry.
8901 */
8902 entry->needs_wakeup = TRUE;
8903 vm_map_entry_wait(dst_map, THREAD_UNINT);
8904
8905 goto start_pass_1;
8906 }
8907
8908 /*
8909 * our range is contained completely within this map entry
8910 */
8911 if (dst_end <= entry->vme_end) {
8912 vm_map_unlock(dst_map);
8913 return KERN_SUCCESS;
8914 }
8915 /*
8916 * check that range specified is contiguous region
8917 */
8918 if ((next == vm_map_to_entry(dst_map)) ||
8919 (next->vme_start != entry->vme_end)) {
8920 vm_map_unlock(dst_map);
8921 return KERN_INVALID_ADDRESS;
8922 }
8923
8924 /*
8925 * Check for permanent objects in the destination.
8926 */
8927 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8928 ((!VME_OBJECT(entry)->internal) ||
8929 (VME_OBJECT(entry)->true_share))) {
8930 if (encountered_sub_map) {
8931 vm_map_unlock(dst_map);
8932 return KERN_FAILURE;
8933 }
8934 }
8935
8936
8937 entry = next;
8938 }/* for */
8939 vm_map_unlock(dst_map);
8940 return KERN_SUCCESS;
8941 }
8942
8943 /*
8944 * Routine: vm_map_copy_overwrite
8945 *
8946 * Description:
8947 * Copy the memory described by the map copy
8948 * object (copy; returned by vm_map_copyin) onto
8949 * the specified destination region (dst_map, dst_addr).
8950 * The destination must be writeable.
8951 *
8952 * Unlike vm_map_copyout, this routine actually
8953 * writes over previously-mapped memory. If the
8954 * previous mapping was to a permanent (user-supplied)
8955 * memory object, it is preserved.
8956 *
8957 * The attributes (protection and inheritance) of the
8958 * destination region are preserved.
8959 *
8960 * If successful, consumes the copy object.
8961 * Otherwise, the caller is responsible for it.
8962 *
8963 * Implementation notes:
8964 * To overwrite aligned temporary virtual memory, it is
8965 * sufficient to remove the previous mapping and insert
8966 * the new copy. This replacement is done either on
8967 * the whole region (if no permanent virtual memory
8968 * objects are embedded in the destination region) or
8969 * in individual map entries.
8970 *
8971 * To overwrite permanent virtual memory , it is necessary
8972 * to copy each page, as the external memory management
8973 * interface currently does not provide any optimizations.
8974 *
8975 * Unaligned memory also has to be copied. It is possible
8976 * to use 'vm_trickery' to copy the aligned data. This is
8977 * not done but not hard to implement.
8978 *
8979 * Once a page of permanent memory has been overwritten,
8980 * it is impossible to interrupt this function; otherwise,
8981 * the call would be neither atomic nor location-independent.
8982 * The kernel-state portion of a user thread must be
8983 * interruptible.
8984 *
8985 * It may be expensive to forward all requests that might
8986 * overwrite permanent memory (vm_write, vm_copy) to
8987 * uninterruptible kernel threads. This routine may be
8988 * called by interruptible threads; however, success is
8989 * not guaranteed -- if the request cannot be performed
8990 * atomically and interruptibly, an error indication is
8991 * returned.
8992 */
8993
8994 static kern_return_t
vm_map_copy_overwrite_nested(vm_map_t dst_map,vm_map_address_t dst_addr,vm_map_copy_t copy,boolean_t interruptible,pmap_t pmap,boolean_t discard_on_success)8995 vm_map_copy_overwrite_nested(
8996 vm_map_t dst_map,
8997 vm_map_address_t dst_addr,
8998 vm_map_copy_t copy,
8999 boolean_t interruptible,
9000 pmap_t pmap,
9001 boolean_t discard_on_success)
9002 {
9003 vm_map_offset_t dst_end;
9004 vm_map_entry_t tmp_entry;
9005 vm_map_entry_t entry;
9006 kern_return_t kr;
9007 boolean_t aligned = TRUE;
9008 boolean_t contains_permanent_objects = FALSE;
9009 boolean_t encountered_sub_map = FALSE;
9010 vm_map_offset_t base_addr;
9011 vm_map_size_t copy_size;
9012 vm_map_size_t total_size;
9013 int copy_page_shift;
9014
9015
9016 /*
9017 * Check for null copy object.
9018 */
9019
9020 if (copy == VM_MAP_COPY_NULL) {
9021 return KERN_SUCCESS;
9022 }
9023
9024 /*
9025 * Assert that the vm_map_copy is coming from the right
9026 * zone and hasn't been forged
9027 */
9028 vm_map_copy_require(copy);
9029
9030 /*
9031 * Check for special kernel buffer allocated
9032 * by new_ipc_kmsg_copyin.
9033 */
9034
9035 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
9036 return vm_map_copyout_kernel_buffer(
9037 dst_map, &dst_addr,
9038 copy, copy->size, TRUE, discard_on_success);
9039 }
9040
9041 /*
9042 * Only works for entry lists at the moment. Will
9043 * support page lists later.
9044 */
9045
9046 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9047
9048 if (copy->size == 0) {
9049 if (discard_on_success) {
9050 vm_map_copy_discard(copy);
9051 }
9052 return KERN_SUCCESS;
9053 }
9054
9055 copy_page_shift = copy->cpy_hdr.page_shift;
9056
9057 /*
9058 * Verify that the destination is all writeable
9059 * initially. We have to trunc the destination
9060 * address and round the copy size or we'll end up
9061 * splitting entries in strange ways.
9062 */
9063
9064 if (!VM_MAP_PAGE_ALIGNED(copy->size,
9065 VM_MAP_PAGE_MASK(dst_map)) ||
9066 !VM_MAP_PAGE_ALIGNED(copy->offset,
9067 VM_MAP_PAGE_MASK(dst_map)) ||
9068 !VM_MAP_PAGE_ALIGNED(dst_addr,
9069 VM_MAP_PAGE_MASK(dst_map)) ||
9070 copy_page_shift != VM_MAP_PAGE_SHIFT(dst_map)) {
9071 aligned = FALSE;
9072 dst_end = vm_map_round_page(dst_addr + copy->size,
9073 VM_MAP_PAGE_MASK(dst_map));
9074 } else {
9075 dst_end = dst_addr + copy->size;
9076 }
9077
9078 vm_map_lock(dst_map);
9079
9080 /* LP64todo - remove this check when vm_map_commpage64()
9081 * no longer has to stuff in a map_entry for the commpage
9082 * above the map's max_offset.
9083 */
9084 if (dst_addr >= dst_map->max_offset) {
9085 vm_map_unlock(dst_map);
9086 return KERN_INVALID_ADDRESS;
9087 }
9088
9089 start_pass_1:
9090 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
9091 vm_map_unlock(dst_map);
9092 return KERN_INVALID_ADDRESS;
9093 }
9094 vm_map_clip_start(dst_map,
9095 tmp_entry,
9096 vm_map_trunc_page(dst_addr,
9097 VM_MAP_PAGE_MASK(dst_map)));
9098 for (entry = tmp_entry;;) {
9099 vm_map_entry_t next = entry->vme_next;
9100
9101 while (entry->is_sub_map) {
9102 vm_map_offset_t sub_start;
9103 vm_map_offset_t sub_end;
9104 vm_map_offset_t local_end;
9105
9106 if (entry->in_transition) {
9107 /*
9108 * Say that we are waiting, and wait for entry.
9109 */
9110 entry->needs_wakeup = TRUE;
9111 vm_map_entry_wait(dst_map, THREAD_UNINT);
9112
9113 goto start_pass_1;
9114 }
9115
9116 local_end = entry->vme_end;
9117 if (!(entry->needs_copy)) {
9118 /* if needs_copy we are a COW submap */
9119 /* in such a case we just replace so */
9120 /* there is no need for the follow- */
9121 /* ing check. */
9122 encountered_sub_map = TRUE;
9123 sub_start = VME_OFFSET(entry);
9124
9125 if (entry->vme_end < dst_end) {
9126 sub_end = entry->vme_end;
9127 } else {
9128 sub_end = dst_end;
9129 }
9130 sub_end -= entry->vme_start;
9131 sub_end += VME_OFFSET(entry);
9132 vm_map_unlock(dst_map);
9133
9134 kr = vm_map_overwrite_submap_recurse(
9135 VME_SUBMAP(entry),
9136 sub_start,
9137 sub_end - sub_start);
9138 if (kr != KERN_SUCCESS) {
9139 return kr;
9140 }
9141 vm_map_lock(dst_map);
9142 }
9143
9144 if (dst_end <= entry->vme_end) {
9145 goto start_overwrite;
9146 }
9147 if (!vm_map_lookup_entry(dst_map, local_end,
9148 &entry)) {
9149 vm_map_unlock(dst_map);
9150 return KERN_INVALID_ADDRESS;
9151 }
9152 next = entry->vme_next;
9153 }
9154
9155 if (!(entry->protection & VM_PROT_WRITE)) {
9156 vm_map_unlock(dst_map);
9157 return KERN_PROTECTION_FAILURE;
9158 }
9159
9160 if (!vm_map_entry_is_overwritable(dst_map, entry)) {
9161 vm_map_unlock(dst_map);
9162 return KERN_PROTECTION_FAILURE;
9163 }
9164
9165 /*
9166 * If the entry is in transition, we must wait
9167 * for it to exit that state. Anything could happen
9168 * when we unlock the map, so start over.
9169 */
9170 if (entry->in_transition) {
9171 /*
9172 * Say that we are waiting, and wait for entry.
9173 */
9174 entry->needs_wakeup = TRUE;
9175 vm_map_entry_wait(dst_map, THREAD_UNINT);
9176
9177 goto start_pass_1;
9178 }
9179
9180 /*
9181 * our range is contained completely within this map entry
9182 */
9183 if (dst_end <= entry->vme_end) {
9184 break;
9185 }
9186 /*
9187 * check that range specified is contiguous region
9188 */
9189 if ((next == vm_map_to_entry(dst_map)) ||
9190 (next->vme_start != entry->vme_end)) {
9191 vm_map_unlock(dst_map);
9192 return KERN_INVALID_ADDRESS;
9193 }
9194
9195
9196 /*
9197 * Check for permanent objects in the destination.
9198 */
9199 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
9200 ((!VME_OBJECT(entry)->internal) ||
9201 (VME_OBJECT(entry)->true_share))) {
9202 contains_permanent_objects = TRUE;
9203 }
9204
9205 entry = next;
9206 }/* for */
9207
9208 start_overwrite:
9209 /*
9210 * If there are permanent objects in the destination, then
9211 * the copy cannot be interrupted.
9212 */
9213
9214 if (interruptible && contains_permanent_objects) {
9215 vm_map_unlock(dst_map);
9216 return KERN_FAILURE; /* XXX */
9217 }
9218
9219 /*
9220 *
9221 * Make a second pass, overwriting the data
9222 * At the beginning of each loop iteration,
9223 * the next entry to be overwritten is "tmp_entry"
9224 * (initially, the value returned from the lookup above),
9225 * and the starting address expected in that entry
9226 * is "start".
9227 */
9228
9229 total_size = copy->size;
9230 if (encountered_sub_map) {
9231 copy_size = 0;
9232 /* re-calculate tmp_entry since we've had the map */
9233 /* unlocked */
9234 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
9235 vm_map_unlock(dst_map);
9236 return KERN_INVALID_ADDRESS;
9237 }
9238 } else {
9239 copy_size = copy->size;
9240 }
9241
9242 base_addr = dst_addr;
9243 while (TRUE) {
9244 /* deconstruct the copy object and do in parts */
9245 /* only in sub_map, interruptable case */
9246 vm_map_entry_t copy_entry;
9247 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
9248 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
9249 int nentries;
9250 int remaining_entries = 0;
9251 vm_map_offset_t new_offset = 0;
9252
9253 for (entry = tmp_entry; copy_size == 0;) {
9254 vm_map_entry_t next;
9255
9256 next = entry->vme_next;
9257
9258 /* tmp_entry and base address are moved along */
9259 /* each time we encounter a sub-map. Otherwise */
9260 /* entry can outpase tmp_entry, and the copy_size */
9261 /* may reflect the distance between them */
9262 /* if the current entry is found to be in transition */
9263 /* we will start over at the beginning or the last */
9264 /* encounter of a submap as dictated by base_addr */
9265 /* we will zero copy_size accordingly. */
9266 if (entry->in_transition) {
9267 /*
9268 * Say that we are waiting, and wait for entry.
9269 */
9270 entry->needs_wakeup = TRUE;
9271 vm_map_entry_wait(dst_map, THREAD_UNINT);
9272
9273 if (!vm_map_lookup_entry(dst_map, base_addr,
9274 &tmp_entry)) {
9275 vm_map_unlock(dst_map);
9276 return KERN_INVALID_ADDRESS;
9277 }
9278 copy_size = 0;
9279 entry = tmp_entry;
9280 continue;
9281 }
9282 if (entry->is_sub_map) {
9283 vm_map_offset_t sub_start;
9284 vm_map_offset_t sub_end;
9285 vm_map_offset_t local_end;
9286
9287 if (entry->needs_copy) {
9288 /* if this is a COW submap */
9289 /* just back the range with a */
9290 /* anonymous entry */
9291 if (entry->vme_end < dst_end) {
9292 sub_end = entry->vme_end;
9293 } else {
9294 sub_end = dst_end;
9295 }
9296 if (entry->vme_start < base_addr) {
9297 sub_start = base_addr;
9298 } else {
9299 sub_start = entry->vme_start;
9300 }
9301 vm_map_clip_end(
9302 dst_map, entry, sub_end);
9303 vm_map_clip_start(
9304 dst_map, entry, sub_start);
9305 assert(!entry->use_pmap);
9306 assert(!entry->iokit_acct);
9307 entry->use_pmap = TRUE;
9308 entry->is_sub_map = FALSE;
9309 vm_map_deallocate(
9310 VME_SUBMAP(entry));
9311 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
9312 VME_OFFSET_SET(entry, 0);
9313 entry->is_shared = FALSE;
9314 entry->needs_copy = FALSE;
9315 entry->protection = VM_PROT_DEFAULT;
9316 entry->max_protection = VM_PROT_ALL;
9317 entry->wired_count = 0;
9318 entry->user_wired_count = 0;
9319 if (entry->inheritance
9320 == VM_INHERIT_SHARE) {
9321 entry->inheritance = VM_INHERIT_COPY;
9322 }
9323 continue;
9324 }
9325 /* first take care of any non-sub_map */
9326 /* entries to send */
9327 if (base_addr < entry->vme_start) {
9328 /* stuff to send */
9329 copy_size =
9330 entry->vme_start - base_addr;
9331 break;
9332 }
9333 sub_start = VME_OFFSET(entry);
9334
9335 if (entry->vme_end < dst_end) {
9336 sub_end = entry->vme_end;
9337 } else {
9338 sub_end = dst_end;
9339 }
9340 sub_end -= entry->vme_start;
9341 sub_end += VME_OFFSET(entry);
9342 local_end = entry->vme_end;
9343 vm_map_unlock(dst_map);
9344 copy_size = sub_end - sub_start;
9345
9346 /* adjust the copy object */
9347 if (total_size > copy_size) {
9348 vm_map_size_t local_size = 0;
9349 vm_map_size_t entry_size;
9350
9351 nentries = 1;
9352 new_offset = copy->offset;
9353 copy_entry = vm_map_copy_first_entry(copy);
9354 while (copy_entry !=
9355 vm_map_copy_to_entry(copy)) {
9356 entry_size = copy_entry->vme_end -
9357 copy_entry->vme_start;
9358 if ((local_size < copy_size) &&
9359 ((local_size + entry_size)
9360 >= copy_size)) {
9361 vm_map_copy_clip_end(copy,
9362 copy_entry,
9363 copy_entry->vme_start +
9364 (copy_size - local_size));
9365 entry_size = copy_entry->vme_end -
9366 copy_entry->vme_start;
9367 local_size += entry_size;
9368 new_offset += entry_size;
9369 }
9370 if (local_size >= copy_size) {
9371 next_copy = copy_entry->vme_next;
9372 copy_entry->vme_next =
9373 vm_map_copy_to_entry(copy);
9374 previous_prev =
9375 copy->cpy_hdr.links.prev;
9376 copy->cpy_hdr.links.prev = copy_entry;
9377 copy->size = copy_size;
9378 remaining_entries =
9379 copy->cpy_hdr.nentries;
9380 remaining_entries -= nentries;
9381 copy->cpy_hdr.nentries = nentries;
9382 break;
9383 } else {
9384 local_size += entry_size;
9385 new_offset += entry_size;
9386 nentries++;
9387 }
9388 copy_entry = copy_entry->vme_next;
9389 }
9390 }
9391
9392 if ((entry->use_pmap) && (pmap == NULL)) {
9393 kr = vm_map_copy_overwrite_nested(
9394 VME_SUBMAP(entry),
9395 sub_start,
9396 copy,
9397 interruptible,
9398 VME_SUBMAP(entry)->pmap,
9399 TRUE);
9400 } else if (pmap != NULL) {
9401 kr = vm_map_copy_overwrite_nested(
9402 VME_SUBMAP(entry),
9403 sub_start,
9404 copy,
9405 interruptible, pmap,
9406 TRUE);
9407 } else {
9408 kr = vm_map_copy_overwrite_nested(
9409 VME_SUBMAP(entry),
9410 sub_start,
9411 copy,
9412 interruptible,
9413 dst_map->pmap,
9414 TRUE);
9415 }
9416 if (kr != KERN_SUCCESS) {
9417 if (next_copy != NULL) {
9418 copy->cpy_hdr.nentries +=
9419 remaining_entries;
9420 copy->cpy_hdr.links.prev->vme_next =
9421 next_copy;
9422 copy->cpy_hdr.links.prev
9423 = previous_prev;
9424 copy->size = total_size;
9425 }
9426 return kr;
9427 }
9428 if (dst_end <= local_end) {
9429 return KERN_SUCCESS;
9430 }
9431 /* otherwise copy no longer exists, it was */
9432 /* destroyed after successful copy_overwrite */
9433 copy = vm_map_copy_allocate();
9434 copy->type = VM_MAP_COPY_ENTRY_LIST;
9435 copy->offset = new_offset;
9436 copy->cpy_hdr.page_shift = copy_page_shift;
9437
9438 /*
9439 * XXX FBDP
9440 * this does not seem to deal with
9441 * the VM map store (R&B tree)
9442 */
9443
9444 total_size -= copy_size;
9445 copy_size = 0;
9446 /* put back remainder of copy in container */
9447 if (next_copy != NULL) {
9448 copy->cpy_hdr.nentries = remaining_entries;
9449 copy->cpy_hdr.links.next = next_copy;
9450 copy->cpy_hdr.links.prev = previous_prev;
9451 copy->size = total_size;
9452 next_copy->vme_prev =
9453 vm_map_copy_to_entry(copy);
9454 next_copy = NULL;
9455 }
9456 base_addr = local_end;
9457 vm_map_lock(dst_map);
9458 if (!vm_map_lookup_entry(dst_map,
9459 local_end, &tmp_entry)) {
9460 vm_map_unlock(dst_map);
9461 return KERN_INVALID_ADDRESS;
9462 }
9463 entry = tmp_entry;
9464 continue;
9465 }
9466 if (dst_end <= entry->vme_end) {
9467 copy_size = dst_end - base_addr;
9468 break;
9469 }
9470
9471 if ((next == vm_map_to_entry(dst_map)) ||
9472 (next->vme_start != entry->vme_end)) {
9473 vm_map_unlock(dst_map);
9474 return KERN_INVALID_ADDRESS;
9475 }
9476
9477 entry = next;
9478 }/* for */
9479
9480 next_copy = NULL;
9481 nentries = 1;
9482
9483 /* adjust the copy object */
9484 if (total_size > copy_size) {
9485 vm_map_size_t local_size = 0;
9486 vm_map_size_t entry_size;
9487
9488 new_offset = copy->offset;
9489 copy_entry = vm_map_copy_first_entry(copy);
9490 while (copy_entry != vm_map_copy_to_entry(copy)) {
9491 entry_size = copy_entry->vme_end -
9492 copy_entry->vme_start;
9493 if ((local_size < copy_size) &&
9494 ((local_size + entry_size)
9495 >= copy_size)) {
9496 vm_map_copy_clip_end(copy, copy_entry,
9497 copy_entry->vme_start +
9498 (copy_size - local_size));
9499 entry_size = copy_entry->vme_end -
9500 copy_entry->vme_start;
9501 local_size += entry_size;
9502 new_offset += entry_size;
9503 }
9504 if (local_size >= copy_size) {
9505 next_copy = copy_entry->vme_next;
9506 copy_entry->vme_next =
9507 vm_map_copy_to_entry(copy);
9508 previous_prev =
9509 copy->cpy_hdr.links.prev;
9510 copy->cpy_hdr.links.prev = copy_entry;
9511 copy->size = copy_size;
9512 remaining_entries =
9513 copy->cpy_hdr.nentries;
9514 remaining_entries -= nentries;
9515 copy->cpy_hdr.nentries = nentries;
9516 break;
9517 } else {
9518 local_size += entry_size;
9519 new_offset += entry_size;
9520 nentries++;
9521 }
9522 copy_entry = copy_entry->vme_next;
9523 }
9524 }
9525
9526 if (aligned) {
9527 pmap_t local_pmap;
9528
9529 if (pmap) {
9530 local_pmap = pmap;
9531 } else {
9532 local_pmap = dst_map->pmap;
9533 }
9534
9535 if ((kr = vm_map_copy_overwrite_aligned(
9536 dst_map, tmp_entry, copy,
9537 base_addr, local_pmap)) != KERN_SUCCESS) {
9538 if (next_copy != NULL) {
9539 copy->cpy_hdr.nentries +=
9540 remaining_entries;
9541 copy->cpy_hdr.links.prev->vme_next =
9542 next_copy;
9543 copy->cpy_hdr.links.prev =
9544 previous_prev;
9545 copy->size += copy_size;
9546 }
9547 return kr;
9548 }
9549 vm_map_unlock(dst_map);
9550 } else {
9551 /*
9552 * Performance gain:
9553 *
9554 * if the copy and dst address are misaligned but the same
9555 * offset within the page we can copy_not_aligned the
9556 * misaligned parts and copy aligned the rest. If they are
9557 * aligned but len is unaligned we simply need to copy
9558 * the end bit unaligned. We'll need to split the misaligned
9559 * bits of the region in this case !
9560 */
9561 /* ALWAYS UNLOCKS THE dst_map MAP */
9562 kr = vm_map_copy_overwrite_unaligned(
9563 dst_map,
9564 tmp_entry,
9565 copy,
9566 base_addr,
9567 discard_on_success);
9568 if (kr != KERN_SUCCESS) {
9569 if (next_copy != NULL) {
9570 copy->cpy_hdr.nentries +=
9571 remaining_entries;
9572 copy->cpy_hdr.links.prev->vme_next =
9573 next_copy;
9574 copy->cpy_hdr.links.prev =
9575 previous_prev;
9576 copy->size += copy_size;
9577 }
9578 return kr;
9579 }
9580 }
9581 total_size -= copy_size;
9582 if (total_size == 0) {
9583 break;
9584 }
9585 base_addr += copy_size;
9586 copy_size = 0;
9587 copy->offset = new_offset;
9588 if (next_copy != NULL) {
9589 copy->cpy_hdr.nentries = remaining_entries;
9590 copy->cpy_hdr.links.next = next_copy;
9591 copy->cpy_hdr.links.prev = previous_prev;
9592 next_copy->vme_prev = vm_map_copy_to_entry(copy);
9593 copy->size = total_size;
9594 }
9595 vm_map_lock(dst_map);
9596 while (TRUE) {
9597 if (!vm_map_lookup_entry(dst_map,
9598 base_addr, &tmp_entry)) {
9599 vm_map_unlock(dst_map);
9600 return KERN_INVALID_ADDRESS;
9601 }
9602 if (tmp_entry->in_transition) {
9603 entry->needs_wakeup = TRUE;
9604 vm_map_entry_wait(dst_map, THREAD_UNINT);
9605 } else {
9606 break;
9607 }
9608 }
9609 vm_map_clip_start(dst_map,
9610 tmp_entry,
9611 vm_map_trunc_page(base_addr,
9612 VM_MAP_PAGE_MASK(dst_map)));
9613
9614 entry = tmp_entry;
9615 } /* while */
9616
9617 /*
9618 * Throw away the vm_map_copy object
9619 */
9620 if (discard_on_success) {
9621 vm_map_copy_discard(copy);
9622 }
9623
9624 return KERN_SUCCESS;
9625 }/* vm_map_copy_overwrite */
9626
9627 kern_return_t
vm_map_copy_overwrite(vm_map_t dst_map,vm_map_offset_t dst_addr,vm_map_copy_t copy,vm_map_size_t copy_size,boolean_t interruptible)9628 vm_map_copy_overwrite(
9629 vm_map_t dst_map,
9630 vm_map_offset_t dst_addr,
9631 vm_map_copy_t copy,
9632 vm_map_size_t copy_size,
9633 boolean_t interruptible)
9634 {
9635 vm_map_size_t head_size, tail_size;
9636 vm_map_copy_t head_copy, tail_copy;
9637 vm_map_offset_t head_addr, tail_addr;
9638 vm_map_entry_t entry;
9639 kern_return_t kr;
9640 vm_map_offset_t effective_page_mask, effective_page_size;
9641 int copy_page_shift;
9642
9643 head_size = 0;
9644 tail_size = 0;
9645 head_copy = NULL;
9646 tail_copy = NULL;
9647 head_addr = 0;
9648 tail_addr = 0;
9649
9650 if (interruptible ||
9651 copy == VM_MAP_COPY_NULL ||
9652 copy->type != VM_MAP_COPY_ENTRY_LIST) {
9653 /*
9654 * We can't split the "copy" map if we're interruptible
9655 * or if we don't have a "copy" map...
9656 */
9657 blunt_copy:
9658 return vm_map_copy_overwrite_nested(dst_map,
9659 dst_addr,
9660 copy,
9661 interruptible,
9662 (pmap_t) NULL,
9663 TRUE);
9664 }
9665
9666 copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy);
9667 if (copy_page_shift < PAGE_SHIFT ||
9668 VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
9669 goto blunt_copy;
9670 }
9671
9672 if (VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
9673 effective_page_mask = VM_MAP_PAGE_MASK(dst_map);
9674 } else {
9675 effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
9676 effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
9677 effective_page_mask);
9678 }
9679 effective_page_size = effective_page_mask + 1;
9680
9681 if (copy_size < VM_MAP_COPY_OVERWRITE_OPTIMIZATION_THRESHOLD_PAGES * effective_page_size) {
9682 /*
9683 * Too small to bother with optimizing...
9684 */
9685 goto blunt_copy;
9686 }
9687
9688 if ((dst_addr & effective_page_mask) !=
9689 (copy->offset & effective_page_mask)) {
9690 /*
9691 * Incompatible mis-alignment of source and destination...
9692 */
9693 goto blunt_copy;
9694 }
9695
9696 /*
9697 * Proper alignment or identical mis-alignment at the beginning.
9698 * Let's try and do a small unaligned copy first (if needed)
9699 * and then an aligned copy for the rest.
9700 */
9701 if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
9702 head_addr = dst_addr;
9703 head_size = (effective_page_size -
9704 (copy->offset & effective_page_mask));
9705 head_size = MIN(head_size, copy_size);
9706 }
9707 if (!vm_map_page_aligned(copy->offset + copy_size,
9708 effective_page_mask)) {
9709 /*
9710 * Mis-alignment at the end.
9711 * Do an aligned copy up to the last page and
9712 * then an unaligned copy for the remaining bytes.
9713 */
9714 tail_size = ((copy->offset + copy_size) &
9715 effective_page_mask);
9716 tail_size = MIN(tail_size, copy_size);
9717 tail_addr = dst_addr + copy_size - tail_size;
9718 assert(tail_addr >= head_addr + head_size);
9719 }
9720 assert(head_size + tail_size <= copy_size);
9721
9722 if (head_size + tail_size == copy_size) {
9723 /*
9724 * It's all unaligned, no optimization possible...
9725 */
9726 goto blunt_copy;
9727 }
9728
9729 /*
9730 * Can't optimize if there are any submaps in the
9731 * destination due to the way we free the "copy" map
9732 * progressively in vm_map_copy_overwrite_nested()
9733 * in that case.
9734 */
9735 vm_map_lock_read(dst_map);
9736 if (!vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
9737 vm_map_unlock_read(dst_map);
9738 goto blunt_copy;
9739 }
9740 for (;
9741 (entry != vm_map_copy_to_entry(copy) &&
9742 entry->vme_start < dst_addr + copy_size);
9743 entry = entry->vme_next) {
9744 if (entry->is_sub_map) {
9745 vm_map_unlock_read(dst_map);
9746 goto blunt_copy;
9747 }
9748 }
9749 vm_map_unlock_read(dst_map);
9750
9751 if (head_size) {
9752 /*
9753 * Unaligned copy of the first "head_size" bytes, to reach
9754 * a page boundary.
9755 */
9756
9757 /*
9758 * Extract "head_copy" out of "copy".
9759 */
9760 head_copy = vm_map_copy_allocate();
9761 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
9762 head_copy->cpy_hdr.entries_pageable =
9763 copy->cpy_hdr.entries_pageable;
9764 vm_map_store_init(&head_copy->cpy_hdr);
9765 head_copy->cpy_hdr.page_shift = copy_page_shift;
9766
9767 entry = vm_map_copy_first_entry(copy);
9768 if (entry->vme_end < copy->offset + head_size) {
9769 head_size = entry->vme_end - copy->offset;
9770 }
9771
9772 head_copy->offset = copy->offset;
9773 head_copy->size = head_size;
9774 copy->offset += head_size;
9775 copy->size -= head_size;
9776 copy_size -= head_size;
9777 assert(copy_size > 0);
9778
9779 vm_map_copy_clip_end(copy, entry, copy->offset);
9780 vm_map_copy_entry_unlink(copy, entry);
9781 vm_map_copy_entry_link(head_copy,
9782 vm_map_copy_to_entry(head_copy),
9783 entry);
9784
9785 /*
9786 * Do the unaligned copy.
9787 */
9788 kr = vm_map_copy_overwrite_nested(dst_map,
9789 head_addr,
9790 head_copy,
9791 interruptible,
9792 (pmap_t) NULL,
9793 FALSE);
9794 if (kr != KERN_SUCCESS) {
9795 goto done;
9796 }
9797 }
9798
9799 if (tail_size) {
9800 /*
9801 * Extract "tail_copy" out of "copy".
9802 */
9803 tail_copy = vm_map_copy_allocate();
9804 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
9805 tail_copy->cpy_hdr.entries_pageable =
9806 copy->cpy_hdr.entries_pageable;
9807 vm_map_store_init(&tail_copy->cpy_hdr);
9808 tail_copy->cpy_hdr.page_shift = copy_page_shift;
9809
9810 tail_copy->offset = copy->offset + copy_size - tail_size;
9811 tail_copy->size = tail_size;
9812
9813 copy->size -= tail_size;
9814 copy_size -= tail_size;
9815 assert(copy_size > 0);
9816
9817 entry = vm_map_copy_last_entry(copy);
9818 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
9819 entry = vm_map_copy_last_entry(copy);
9820 vm_map_copy_entry_unlink(copy, entry);
9821 vm_map_copy_entry_link(tail_copy,
9822 vm_map_copy_last_entry(tail_copy),
9823 entry);
9824 }
9825
9826 /*
9827 * If we are here from ipc_kmsg_copyout_ool_descriptor(),
9828 * we want to avoid TOCTOU issues w.r.t copy->size but
9829 * we don't need to change vm_map_copy_overwrite_nested()
9830 * and all other vm_map_copy_overwrite variants.
9831 *
9832 * So we assign the original copy_size that was passed into
9833 * this routine back to copy.
9834 *
9835 * This use of local 'copy_size' passed into this routine is
9836 * to try and protect against TOCTOU attacks where the kernel
9837 * has been exploited. We don't expect this to be an issue
9838 * during normal system operation.
9839 */
9840 assertf(copy->size == copy_size,
9841 "Mismatch of copy sizes. Expected 0x%llx, Got 0x%llx\n", (uint64_t) copy_size, (uint64_t) copy->size);
9842 copy->size = copy_size;
9843
9844 /*
9845 * Copy most (or possibly all) of the data.
9846 */
9847 kr = vm_map_copy_overwrite_nested(dst_map,
9848 dst_addr + head_size,
9849 copy,
9850 interruptible,
9851 (pmap_t) NULL,
9852 FALSE);
9853 if (kr != KERN_SUCCESS) {
9854 goto done;
9855 }
9856
9857 if (tail_size) {
9858 kr = vm_map_copy_overwrite_nested(dst_map,
9859 tail_addr,
9860 tail_copy,
9861 interruptible,
9862 (pmap_t) NULL,
9863 FALSE);
9864 }
9865
9866 done:
9867 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9868 if (kr == KERN_SUCCESS) {
9869 /*
9870 * Discard all the copy maps.
9871 */
9872 if (head_copy) {
9873 vm_map_copy_discard(head_copy);
9874 head_copy = NULL;
9875 }
9876 vm_map_copy_discard(copy);
9877 if (tail_copy) {
9878 vm_map_copy_discard(tail_copy);
9879 tail_copy = NULL;
9880 }
9881 } else {
9882 /*
9883 * Re-assemble the original copy map.
9884 */
9885 if (head_copy) {
9886 entry = vm_map_copy_first_entry(head_copy);
9887 vm_map_copy_entry_unlink(head_copy, entry);
9888 vm_map_copy_entry_link(copy,
9889 vm_map_copy_to_entry(copy),
9890 entry);
9891 copy->offset -= head_size;
9892 copy->size += head_size;
9893 vm_map_copy_discard(head_copy);
9894 head_copy = NULL;
9895 }
9896 if (tail_copy) {
9897 entry = vm_map_copy_last_entry(tail_copy);
9898 vm_map_copy_entry_unlink(tail_copy, entry);
9899 vm_map_copy_entry_link(copy,
9900 vm_map_copy_last_entry(copy),
9901 entry);
9902 copy->size += tail_size;
9903 vm_map_copy_discard(tail_copy);
9904 tail_copy = NULL;
9905 }
9906 }
9907 return kr;
9908 }
9909
9910
9911 /*
9912 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
9913 *
9914 * Decription:
9915 * Physically copy unaligned data
9916 *
9917 * Implementation:
9918 * Unaligned parts of pages have to be physically copied. We use
9919 * a modified form of vm_fault_copy (which understands none-aligned
9920 * page offsets and sizes) to do the copy. We attempt to copy as
9921 * much memory in one go as possibly, however vm_fault_copy copies
9922 * within 1 memory object so we have to find the smaller of "amount left"
9923 * "source object data size" and "target object data size". With
9924 * unaligned data we don't need to split regions, therefore the source
9925 * (copy) object should be one map entry, the target range may be split
9926 * over multiple map entries however. In any event we are pessimistic
9927 * about these assumptions.
9928 *
9929 * Assumptions:
9930 * dst_map is locked on entry and is return locked on success,
9931 * unlocked on error.
9932 */
9933
9934 static kern_return_t
vm_map_copy_overwrite_unaligned(vm_map_t dst_map,vm_map_entry_t entry,vm_map_copy_t copy,vm_map_offset_t start,boolean_t discard_on_success)9935 vm_map_copy_overwrite_unaligned(
9936 vm_map_t dst_map,
9937 vm_map_entry_t entry,
9938 vm_map_copy_t copy,
9939 vm_map_offset_t start,
9940 boolean_t discard_on_success)
9941 {
9942 vm_map_entry_t copy_entry;
9943 vm_map_entry_t copy_entry_next;
9944 vm_map_version_t version;
9945 vm_object_t dst_object;
9946 vm_object_offset_t dst_offset;
9947 vm_object_offset_t src_offset;
9948 vm_object_offset_t entry_offset;
9949 vm_map_offset_t entry_end;
9950 vm_map_size_t src_size,
9951 dst_size,
9952 copy_size,
9953 amount_left;
9954 kern_return_t kr = KERN_SUCCESS;
9955
9956
9957 copy_entry = vm_map_copy_first_entry(copy);
9958
9959 vm_map_lock_write_to_read(dst_map);
9960
9961 src_offset = copy->offset - trunc_page_mask_64(copy->offset, VM_MAP_COPY_PAGE_MASK(copy));
9962 amount_left = copy->size;
9963 /*
9964 * unaligned so we never clipped this entry, we need the offset into
9965 * the vm_object not just the data.
9966 */
9967 while (amount_left > 0) {
9968 if (entry == vm_map_to_entry(dst_map)) {
9969 vm_map_unlock_read(dst_map);
9970 return KERN_INVALID_ADDRESS;
9971 }
9972
9973 /* "start" must be within the current map entry */
9974 assert((start >= entry->vme_start) && (start < entry->vme_end));
9975
9976 dst_offset = start - entry->vme_start;
9977
9978 dst_size = entry->vme_end - start;
9979
9980 src_size = copy_entry->vme_end -
9981 (copy_entry->vme_start + src_offset);
9982
9983 if (dst_size < src_size) {
9984 /*
9985 * we can only copy dst_size bytes before
9986 * we have to get the next destination entry
9987 */
9988 copy_size = dst_size;
9989 } else {
9990 /*
9991 * we can only copy src_size bytes before
9992 * we have to get the next source copy entry
9993 */
9994 copy_size = src_size;
9995 }
9996
9997 if (copy_size > amount_left) {
9998 copy_size = amount_left;
9999 }
10000 /*
10001 * Entry needs copy, create a shadow shadow object for
10002 * Copy on write region.
10003 */
10004 if (entry->needs_copy &&
10005 ((entry->protection & VM_PROT_WRITE) != 0)) {
10006 if (vm_map_lock_read_to_write(dst_map)) {
10007 vm_map_lock_read(dst_map);
10008 goto RetryLookup;
10009 }
10010 VME_OBJECT_SHADOW(entry,
10011 (vm_map_size_t)(entry->vme_end
10012 - entry->vme_start));
10013 entry->needs_copy = FALSE;
10014 vm_map_lock_write_to_read(dst_map);
10015 }
10016 dst_object = VME_OBJECT(entry);
10017 /*
10018 * unlike with the virtual (aligned) copy we're going
10019 * to fault on it therefore we need a target object.
10020 */
10021 if (dst_object == VM_OBJECT_NULL) {
10022 if (vm_map_lock_read_to_write(dst_map)) {
10023 vm_map_lock_read(dst_map);
10024 goto RetryLookup;
10025 }
10026 dst_object = vm_object_allocate((vm_map_size_t)
10027 entry->vme_end - entry->vme_start);
10028 VME_OBJECT_SET(entry, dst_object);
10029 VME_OFFSET_SET(entry, 0);
10030 assert(entry->use_pmap);
10031 vm_map_lock_write_to_read(dst_map);
10032 }
10033 /*
10034 * Take an object reference and unlock map. The "entry" may
10035 * disappear or change when the map is unlocked.
10036 */
10037 vm_object_reference(dst_object);
10038 version.main_timestamp = dst_map->timestamp;
10039 entry_offset = VME_OFFSET(entry);
10040 entry_end = entry->vme_end;
10041 vm_map_unlock_read(dst_map);
10042 /*
10043 * Copy as much as possible in one pass
10044 */
10045 kr = vm_fault_copy(
10046 VME_OBJECT(copy_entry),
10047 VME_OFFSET(copy_entry) + src_offset,
10048 ©_size,
10049 dst_object,
10050 entry_offset + dst_offset,
10051 dst_map,
10052 &version,
10053 THREAD_UNINT );
10054
10055 start += copy_size;
10056 src_offset += copy_size;
10057 amount_left -= copy_size;
10058 /*
10059 * Release the object reference
10060 */
10061 vm_object_deallocate(dst_object);
10062 /*
10063 * If a hard error occurred, return it now
10064 */
10065 if (kr != KERN_SUCCESS) {
10066 return kr;
10067 }
10068
10069 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
10070 || amount_left == 0) {
10071 /*
10072 * all done with this copy entry, dispose.
10073 */
10074 copy_entry_next = copy_entry->vme_next;
10075
10076 if (discard_on_success) {
10077 vm_map_copy_entry_unlink(copy, copy_entry);
10078 assert(!copy_entry->is_sub_map);
10079 vm_object_deallocate(VME_OBJECT(copy_entry));
10080 vm_map_copy_entry_dispose(copy, copy_entry);
10081 }
10082
10083 if (copy_entry_next == vm_map_copy_to_entry(copy) &&
10084 amount_left) {
10085 /*
10086 * not finished copying but run out of source
10087 */
10088 return KERN_INVALID_ADDRESS;
10089 }
10090
10091 copy_entry = copy_entry_next;
10092
10093 src_offset = 0;
10094 }
10095
10096 if (amount_left == 0) {
10097 return KERN_SUCCESS;
10098 }
10099
10100 vm_map_lock_read(dst_map);
10101 if (version.main_timestamp == dst_map->timestamp) {
10102 if (start == entry_end) {
10103 /*
10104 * destination region is split. Use the version
10105 * information to avoid a lookup in the normal
10106 * case.
10107 */
10108 entry = entry->vme_next;
10109 /*
10110 * should be contiguous. Fail if we encounter
10111 * a hole in the destination.
10112 */
10113 if (start != entry->vme_start) {
10114 vm_map_unlock_read(dst_map);
10115 return KERN_INVALID_ADDRESS;
10116 }
10117 }
10118 } else {
10119 /*
10120 * Map version check failed.
10121 * we must lookup the entry because somebody
10122 * might have changed the map behind our backs.
10123 */
10124 RetryLookup:
10125 if (!vm_map_lookup_entry(dst_map, start, &entry)) {
10126 vm_map_unlock_read(dst_map);
10127 return KERN_INVALID_ADDRESS;
10128 }
10129 }
10130 }/* while */
10131
10132 return KERN_SUCCESS;
10133 }/* vm_map_copy_overwrite_unaligned */
10134
10135 /*
10136 * Routine: vm_map_copy_overwrite_aligned [internal use only]
10137 *
10138 * Description:
10139 * Does all the vm_trickery possible for whole pages.
10140 *
10141 * Implementation:
10142 *
10143 * If there are no permanent objects in the destination,
10144 * and the source and destination map entry zones match,
10145 * and the destination map entry is not shared,
10146 * then the map entries can be deleted and replaced
10147 * with those from the copy. The following code is the
10148 * basic idea of what to do, but there are lots of annoying
10149 * little details about getting protection and inheritance
10150 * right. Should add protection, inheritance, and sharing checks
10151 * to the above pass and make sure that no wiring is involved.
10152 */
10153
10154 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
10155 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
10156 int vm_map_copy_overwrite_aligned_src_large = 0;
10157
10158 static kern_return_t
vm_map_copy_overwrite_aligned(vm_map_t dst_map,vm_map_entry_t tmp_entry,vm_map_copy_t copy,vm_map_offset_t start,__unused pmap_t pmap)10159 vm_map_copy_overwrite_aligned(
10160 vm_map_t dst_map,
10161 vm_map_entry_t tmp_entry,
10162 vm_map_copy_t copy,
10163 vm_map_offset_t start,
10164 __unused pmap_t pmap)
10165 {
10166 vm_object_t object;
10167 vm_map_entry_t copy_entry;
10168 vm_map_size_t copy_size;
10169 vm_map_size_t size;
10170 vm_map_entry_t entry;
10171
10172 while ((copy_entry = vm_map_copy_first_entry(copy))
10173 != vm_map_copy_to_entry(copy)) {
10174 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
10175
10176 entry = tmp_entry;
10177 if (entry->is_sub_map) {
10178 /* unnested when clipped earlier */
10179 assert(!entry->use_pmap);
10180 }
10181 if (entry == vm_map_to_entry(dst_map)) {
10182 vm_map_unlock(dst_map);
10183 return KERN_INVALID_ADDRESS;
10184 }
10185 size = (entry->vme_end - entry->vme_start);
10186 /*
10187 * Make sure that no holes popped up in the
10188 * address map, and that the protection is
10189 * still valid, in case the map was unlocked
10190 * earlier.
10191 */
10192
10193 if ((entry->vme_start != start) || ((entry->is_sub_map)
10194 && !entry->needs_copy)) {
10195 vm_map_unlock(dst_map);
10196 return KERN_INVALID_ADDRESS;
10197 }
10198 assert(entry != vm_map_to_entry(dst_map));
10199
10200 /*
10201 * Check protection again
10202 */
10203
10204 if (!(entry->protection & VM_PROT_WRITE)) {
10205 vm_map_unlock(dst_map);
10206 return KERN_PROTECTION_FAILURE;
10207 }
10208
10209 if (!vm_map_entry_is_overwritable(dst_map, entry)) {
10210 vm_map_unlock(dst_map);
10211 return KERN_PROTECTION_FAILURE;
10212 }
10213
10214 /*
10215 * Adjust to source size first
10216 */
10217
10218 if (copy_size < size) {
10219 if (entry->map_aligned &&
10220 !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
10221 VM_MAP_PAGE_MASK(dst_map))) {
10222 /* no longer map-aligned */
10223 entry->map_aligned = FALSE;
10224 }
10225 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
10226 size = copy_size;
10227 }
10228
10229 /*
10230 * Adjust to destination size
10231 */
10232
10233 if (size < copy_size) {
10234 vm_map_copy_clip_end(copy, copy_entry,
10235 copy_entry->vme_start + size);
10236 copy_size = size;
10237 }
10238
10239 assert((entry->vme_end - entry->vme_start) == size);
10240 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
10241 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
10242
10243 /*
10244 * If the destination contains temporary unshared memory,
10245 * we can perform the copy by throwing it away and
10246 * installing the source data.
10247 */
10248
10249 object = VME_OBJECT(entry);
10250 if ((!entry->is_shared &&
10251 ((object == VM_OBJECT_NULL) ||
10252 (object->internal && !object->true_share))) ||
10253 entry->needs_copy) {
10254 vm_object_t old_object = VME_OBJECT(entry);
10255 vm_object_offset_t old_offset = VME_OFFSET(entry);
10256 vm_object_offset_t offset;
10257
10258 /*
10259 * Ensure that the source and destination aren't
10260 * identical
10261 */
10262 if (old_object == VME_OBJECT(copy_entry) &&
10263 old_offset == VME_OFFSET(copy_entry)) {
10264 vm_map_copy_entry_unlink(copy, copy_entry);
10265 vm_map_copy_entry_dispose(copy, copy_entry);
10266
10267 if (old_object != VM_OBJECT_NULL) {
10268 vm_object_deallocate(old_object);
10269 }
10270
10271 start = tmp_entry->vme_end;
10272 tmp_entry = tmp_entry->vme_next;
10273 continue;
10274 }
10275
10276 #if XNU_TARGET_OS_OSX
10277 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
10278 #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
10279 if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
10280 VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
10281 copy_size <= __TRADEOFF1_COPY_SIZE) {
10282 /*
10283 * Virtual vs. Physical copy tradeoff #1.
10284 *
10285 * Copying only a few pages out of a large
10286 * object: do a physical copy instead of
10287 * a virtual copy, to avoid possibly keeping
10288 * the entire large object alive because of
10289 * those few copy-on-write pages.
10290 */
10291 vm_map_copy_overwrite_aligned_src_large++;
10292 goto slow_copy;
10293 }
10294 #endif /* XNU_TARGET_OS_OSX */
10295
10296 if ((dst_map->pmap != kernel_pmap) &&
10297 (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
10298 (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_MEDIUM)) {
10299 vm_object_t new_object, new_shadow;
10300
10301 /*
10302 * We're about to map something over a mapping
10303 * established by malloc()...
10304 */
10305 new_object = VME_OBJECT(copy_entry);
10306 if (new_object != VM_OBJECT_NULL) {
10307 vm_object_lock_shared(new_object);
10308 }
10309 while (new_object != VM_OBJECT_NULL &&
10310 #if XNU_TARGET_OS_OSX
10311 !new_object->true_share &&
10312 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10313 #endif /* XNU_TARGET_OS_OSX */
10314 new_object->internal) {
10315 new_shadow = new_object->shadow;
10316 if (new_shadow == VM_OBJECT_NULL) {
10317 break;
10318 }
10319 vm_object_lock_shared(new_shadow);
10320 vm_object_unlock(new_object);
10321 new_object = new_shadow;
10322 }
10323 if (new_object != VM_OBJECT_NULL) {
10324 if (!new_object->internal) {
10325 /*
10326 * The new mapping is backed
10327 * by an external object. We
10328 * don't want malloc'ed memory
10329 * to be replaced with such a
10330 * non-anonymous mapping, so
10331 * let's go off the optimized
10332 * path...
10333 */
10334 vm_map_copy_overwrite_aligned_src_not_internal++;
10335 vm_object_unlock(new_object);
10336 goto slow_copy;
10337 }
10338 #if XNU_TARGET_OS_OSX
10339 if (new_object->true_share ||
10340 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
10341 /*
10342 * Same if there's a "true_share"
10343 * object in the shadow chain, or
10344 * an object with a non-default
10345 * (SYMMETRIC) copy strategy.
10346 */
10347 vm_map_copy_overwrite_aligned_src_not_symmetric++;
10348 vm_object_unlock(new_object);
10349 goto slow_copy;
10350 }
10351 #endif /* XNU_TARGET_OS_OSX */
10352 vm_object_unlock(new_object);
10353 }
10354 /*
10355 * The new mapping is still backed by
10356 * anonymous (internal) memory, so it's
10357 * OK to substitute it for the original
10358 * malloc() mapping.
10359 */
10360 }
10361
10362 if (old_object != VM_OBJECT_NULL) {
10363 if (entry->is_sub_map) {
10364 if (entry->use_pmap) {
10365 #ifndef NO_NESTED_PMAP
10366 pmap_unnest(dst_map->pmap,
10367 (addr64_t)entry->vme_start,
10368 entry->vme_end - entry->vme_start);
10369 #endif /* NO_NESTED_PMAP */
10370 if (dst_map->mapped_in_other_pmaps) {
10371 /* clean up parent */
10372 /* map/maps */
10373 vm_map_submap_pmap_clean(
10374 dst_map, entry->vme_start,
10375 entry->vme_end,
10376 VME_SUBMAP(entry),
10377 VME_OFFSET(entry));
10378 }
10379 } else {
10380 vm_map_submap_pmap_clean(
10381 dst_map, entry->vme_start,
10382 entry->vme_end,
10383 VME_SUBMAP(entry),
10384 VME_OFFSET(entry));
10385 }
10386 vm_map_deallocate(VME_SUBMAP(entry));
10387 } else {
10388 if (dst_map->mapped_in_other_pmaps) {
10389 vm_object_pmap_protect_options(
10390 VME_OBJECT(entry),
10391 VME_OFFSET(entry),
10392 entry->vme_end
10393 - entry->vme_start,
10394 PMAP_NULL,
10395 PAGE_SIZE,
10396 entry->vme_start,
10397 VM_PROT_NONE,
10398 PMAP_OPTIONS_REMOVE);
10399 } else {
10400 pmap_remove_options(
10401 dst_map->pmap,
10402 (addr64_t)(entry->vme_start),
10403 (addr64_t)(entry->vme_end),
10404 PMAP_OPTIONS_REMOVE);
10405 }
10406 vm_object_deallocate(old_object);
10407 }
10408 }
10409
10410 if (entry->iokit_acct) {
10411 /* keep using iokit accounting */
10412 entry->use_pmap = FALSE;
10413 } else {
10414 /* use pmap accounting */
10415 entry->use_pmap = TRUE;
10416 }
10417 entry->is_sub_map = FALSE;
10418 VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
10419 object = VME_OBJECT(entry);
10420 entry->needs_copy = copy_entry->needs_copy;
10421 entry->wired_count = 0;
10422 entry->user_wired_count = 0;
10423 offset = VME_OFFSET(copy_entry);
10424 VME_OFFSET_SET(entry, offset);
10425
10426 vm_map_copy_entry_unlink(copy, copy_entry);
10427 vm_map_copy_entry_dispose(copy, copy_entry);
10428
10429 /*
10430 * we could try to push pages into the pmap at this point, BUT
10431 * this optimization only saved on average 2 us per page if ALL
10432 * the pages in the source were currently mapped
10433 * and ALL the pages in the dest were touched, if there were fewer
10434 * than 2/3 of the pages touched, this optimization actually cost more cycles
10435 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
10436 */
10437
10438 /*
10439 * Set up for the next iteration. The map
10440 * has not been unlocked, so the next
10441 * address should be at the end of this
10442 * entry, and the next map entry should be
10443 * the one following it.
10444 */
10445
10446 start = tmp_entry->vme_end;
10447 tmp_entry = tmp_entry->vme_next;
10448 } else {
10449 vm_map_version_t version;
10450 vm_object_t dst_object;
10451 vm_object_offset_t dst_offset;
10452 kern_return_t r;
10453
10454 slow_copy:
10455 if (entry->needs_copy) {
10456 VME_OBJECT_SHADOW(entry,
10457 (entry->vme_end -
10458 entry->vme_start));
10459 entry->needs_copy = FALSE;
10460 }
10461
10462 dst_object = VME_OBJECT(entry);
10463 dst_offset = VME_OFFSET(entry);
10464
10465 /*
10466 * Take an object reference, and record
10467 * the map version information so that the
10468 * map can be safely unlocked.
10469 */
10470
10471 if (dst_object == VM_OBJECT_NULL) {
10472 /*
10473 * We would usually have just taken the
10474 * optimized path above if the destination
10475 * object has not been allocated yet. But we
10476 * now disable that optimization if the copy
10477 * entry's object is not backed by anonymous
10478 * memory to avoid replacing malloc'ed
10479 * (i.e. re-usable) anonymous memory with a
10480 * not-so-anonymous mapping.
10481 * So we have to handle this case here and
10482 * allocate a new VM object for this map entry.
10483 */
10484 dst_object = vm_object_allocate(
10485 entry->vme_end - entry->vme_start);
10486 dst_offset = 0;
10487 VME_OBJECT_SET(entry, dst_object);
10488 VME_OFFSET_SET(entry, dst_offset);
10489 assert(entry->use_pmap);
10490 }
10491
10492 vm_object_reference(dst_object);
10493
10494 /* account for unlock bumping up timestamp */
10495 version.main_timestamp = dst_map->timestamp + 1;
10496
10497 vm_map_unlock(dst_map);
10498
10499 /*
10500 * Copy as much as possible in one pass
10501 */
10502
10503 copy_size = size;
10504 r = vm_fault_copy(
10505 VME_OBJECT(copy_entry),
10506 VME_OFFSET(copy_entry),
10507 ©_size,
10508 dst_object,
10509 dst_offset,
10510 dst_map,
10511 &version,
10512 THREAD_UNINT );
10513
10514 /*
10515 * Release the object reference
10516 */
10517
10518 vm_object_deallocate(dst_object);
10519
10520 /*
10521 * If a hard error occurred, return it now
10522 */
10523
10524 if (r != KERN_SUCCESS) {
10525 return r;
10526 }
10527
10528 if (copy_size != 0) {
10529 /*
10530 * Dispose of the copied region
10531 */
10532
10533 vm_map_copy_clip_end(copy, copy_entry,
10534 copy_entry->vme_start + copy_size);
10535 vm_map_copy_entry_unlink(copy, copy_entry);
10536 vm_object_deallocate(VME_OBJECT(copy_entry));
10537 vm_map_copy_entry_dispose(copy, copy_entry);
10538 }
10539
10540 /*
10541 * Pick up in the destination map where we left off.
10542 *
10543 * Use the version information to avoid a lookup
10544 * in the normal case.
10545 */
10546
10547 start += copy_size;
10548 vm_map_lock(dst_map);
10549 if (version.main_timestamp == dst_map->timestamp &&
10550 copy_size != 0) {
10551 /* We can safely use saved tmp_entry value */
10552
10553 if (tmp_entry->map_aligned &&
10554 !VM_MAP_PAGE_ALIGNED(
10555 start,
10556 VM_MAP_PAGE_MASK(dst_map))) {
10557 /* no longer map-aligned */
10558 tmp_entry->map_aligned = FALSE;
10559 }
10560 vm_map_clip_end(dst_map, tmp_entry, start);
10561 tmp_entry = tmp_entry->vme_next;
10562 } else {
10563 /* Must do lookup of tmp_entry */
10564
10565 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
10566 vm_map_unlock(dst_map);
10567 return KERN_INVALID_ADDRESS;
10568 }
10569 if (tmp_entry->map_aligned &&
10570 !VM_MAP_PAGE_ALIGNED(
10571 start,
10572 VM_MAP_PAGE_MASK(dst_map))) {
10573 /* no longer map-aligned */
10574 tmp_entry->map_aligned = FALSE;
10575 }
10576 vm_map_clip_start(dst_map, tmp_entry, start);
10577 }
10578 }
10579 }/* while */
10580
10581 return KERN_SUCCESS;
10582 }/* vm_map_copy_overwrite_aligned */
10583
10584 /*
10585 * Routine: vm_map_copyin_kernel_buffer [internal use only]
10586 *
10587 * Description:
10588 * Copy in data to a kernel buffer from space in the
10589 * source map. The original space may be optionally
10590 * deallocated.
10591 *
10592 * If successful, returns a new copy object.
10593 */
10594 static kern_return_t
vm_map_copyin_kernel_buffer(vm_map_t src_map,vm_map_offset_t src_addr,vm_map_size_t len,boolean_t src_destroy,vm_map_copy_t * copy_result)10595 vm_map_copyin_kernel_buffer(
10596 vm_map_t src_map,
10597 vm_map_offset_t src_addr,
10598 vm_map_size_t len,
10599 boolean_t src_destroy,
10600 vm_map_copy_t *copy_result)
10601 {
10602 kern_return_t kr;
10603 vm_map_copy_t copy;
10604
10605 if (len > msg_ool_size_small) {
10606 return KERN_INVALID_ARGUMENT;
10607 }
10608
10609 copy = zalloc_flags(vm_map_copy_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
10610 copy->cpy_kdata = kalloc_data(len, Z_WAITOK);
10611 if (copy->cpy_kdata == NULL) {
10612 zfree(vm_map_copy_zone, copy);
10613 return KERN_RESOURCE_SHORTAGE;
10614 }
10615
10616 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
10617 copy->size = len;
10618 copy->offset = 0;
10619
10620 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
10621 if (kr != KERN_SUCCESS) {
10622 kfree_data(copy->cpy_kdata, len);
10623 zfree(vm_map_copy_zone, copy);
10624 return kr;
10625 }
10626 if (src_destroy) {
10627 (void) vm_map_remove(
10628 src_map,
10629 vm_map_trunc_page(src_addr,
10630 VM_MAP_PAGE_MASK(src_map)),
10631 vm_map_round_page(src_addr + len,
10632 VM_MAP_PAGE_MASK(src_map)),
10633 (VM_MAP_REMOVE_INTERRUPTIBLE |
10634 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
10635 ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : VM_MAP_REMOVE_NO_FLAGS)));
10636 }
10637 *copy_result = copy;
10638 return KERN_SUCCESS;
10639 }
10640
10641 /*
10642 * Routine: vm_map_copyout_kernel_buffer [internal use only]
10643 *
10644 * Description:
10645 * Copy out data from a kernel buffer into space in the
10646 * destination map. The space may be otpionally dynamically
10647 * allocated.
10648 *
10649 * If successful, consumes the copy object.
10650 * Otherwise, the caller is responsible for it.
10651 */
10652 static int vm_map_copyout_kernel_buffer_failures = 0;
10653 static kern_return_t
vm_map_copyout_kernel_buffer(vm_map_t map,vm_map_address_t * addr,vm_map_copy_t copy,vm_map_size_t copy_size,boolean_t overwrite,boolean_t consume_on_success)10654 vm_map_copyout_kernel_buffer(
10655 vm_map_t map,
10656 vm_map_address_t *addr, /* IN/OUT */
10657 vm_map_copy_t copy,
10658 vm_map_size_t copy_size,
10659 boolean_t overwrite,
10660 boolean_t consume_on_success)
10661 {
10662 kern_return_t kr = KERN_SUCCESS;
10663 thread_t thread = current_thread();
10664
10665 assert(copy->size == copy_size);
10666
10667 /*
10668 * check for corrupted vm_map_copy structure
10669 */
10670 if (copy_size > msg_ool_size_small || copy->offset) {
10671 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
10672 (long long)copy->size, (long long)copy->offset);
10673 }
10674
10675 if (!overwrite) {
10676 /*
10677 * Allocate space in the target map for the data
10678 */
10679 *addr = 0;
10680 kr = vm_map_enter(map,
10681 addr,
10682 vm_map_round_page(copy_size,
10683 VM_MAP_PAGE_MASK(map)),
10684 (vm_map_offset_t) 0,
10685 VM_FLAGS_ANYWHERE,
10686 VM_MAP_KERNEL_FLAGS_NONE,
10687 VM_KERN_MEMORY_NONE,
10688 VM_OBJECT_NULL,
10689 (vm_object_offset_t) 0,
10690 FALSE,
10691 VM_PROT_DEFAULT,
10692 VM_PROT_ALL,
10693 VM_INHERIT_DEFAULT);
10694 if (kr != KERN_SUCCESS) {
10695 return kr;
10696 }
10697 #if KASAN
10698 if (map->pmap == kernel_pmap) {
10699 kasan_notify_address(*addr, copy->size);
10700 }
10701 #endif
10702 }
10703
10704 /*
10705 * Copyout the data from the kernel buffer to the target map.
10706 */
10707 if (thread->map == map) {
10708 /*
10709 * If the target map is the current map, just do
10710 * the copy.
10711 */
10712 assert((vm_size_t)copy_size == copy_size);
10713 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10714 kr = KERN_INVALID_ADDRESS;
10715 }
10716 } else {
10717 vm_map_t oldmap;
10718
10719 /*
10720 * If the target map is another map, assume the
10721 * target's address space identity for the duration
10722 * of the copy.
10723 */
10724 vm_map_reference(map);
10725 oldmap = vm_map_switch(map);
10726
10727 assert((vm_size_t)copy_size == copy_size);
10728 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10729 vm_map_copyout_kernel_buffer_failures++;
10730 kr = KERN_INVALID_ADDRESS;
10731 }
10732
10733 (void) vm_map_switch(oldmap);
10734 vm_map_deallocate(map);
10735 }
10736
10737 if (kr != KERN_SUCCESS) {
10738 /* the copy failed, clean up */
10739 if (!overwrite) {
10740 /*
10741 * Deallocate the space we allocated in the target map.
10742 */
10743 (void) vm_map_remove(
10744 map,
10745 vm_map_trunc_page(*addr,
10746 VM_MAP_PAGE_MASK(map)),
10747 vm_map_round_page((*addr +
10748 vm_map_round_page(copy_size,
10749 VM_MAP_PAGE_MASK(map))),
10750 VM_MAP_PAGE_MASK(map)),
10751 VM_MAP_REMOVE_NO_FLAGS);
10752 *addr = 0;
10753 }
10754 } else {
10755 /* copy was successful, dicard the copy structure */
10756 if (consume_on_success) {
10757 kfree_data(copy->cpy_kdata, copy_size);
10758 zfree(vm_map_copy_zone, copy);
10759 }
10760 }
10761
10762 return kr;
10763 }
10764
10765 /*
10766 * Routine: vm_map_copy_insert [internal use only]
10767 *
10768 * Description:
10769 * Link a copy chain ("copy") into a map at the
10770 * specified location (after "where").
10771 * Side effects:
10772 * The copy chain is destroyed.
10773 */
10774 static void
vm_map_copy_insert(vm_map_t map,vm_map_entry_t after_where,vm_map_copy_t copy)10775 vm_map_copy_insert(
10776 vm_map_t map,
10777 vm_map_entry_t after_where,
10778 vm_map_copy_t copy)
10779 {
10780 vm_map_entry_t entry;
10781
10782 while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
10783 entry = vm_map_copy_first_entry(copy);
10784 vm_map_copy_entry_unlink(copy, entry);
10785 vm_map_store_entry_link(map, after_where, entry,
10786 VM_MAP_KERNEL_FLAGS_NONE);
10787 after_where = entry;
10788 }
10789 zfree(vm_map_copy_zone, copy);
10790 }
10791
10792 void
vm_map_copy_remap(vm_map_t map,vm_map_entry_t where,vm_map_copy_t copy,vm_map_offset_t adjustment,vm_prot_t cur_prot,vm_prot_t max_prot,vm_inherit_t inheritance)10793 vm_map_copy_remap(
10794 vm_map_t map,
10795 vm_map_entry_t where,
10796 vm_map_copy_t copy,
10797 vm_map_offset_t adjustment,
10798 vm_prot_t cur_prot,
10799 vm_prot_t max_prot,
10800 vm_inherit_t inheritance)
10801 {
10802 vm_map_entry_t copy_entry, new_entry;
10803
10804 for (copy_entry = vm_map_copy_first_entry(copy);
10805 copy_entry != vm_map_copy_to_entry(copy);
10806 copy_entry = copy_entry->vme_next) {
10807 /* get a new VM map entry for the map */
10808 new_entry = vm_map_entry_create(map,
10809 !map->hdr.entries_pageable);
10810 /* copy the "copy entry" to the new entry */
10811 vm_map_entry_copy(map, new_entry, copy_entry);
10812 /* adjust "start" and "end" */
10813 new_entry->vme_start += adjustment;
10814 new_entry->vme_end += adjustment;
10815 /* clear some attributes */
10816 new_entry->inheritance = inheritance;
10817 new_entry->protection = cur_prot;
10818 new_entry->max_protection = max_prot;
10819 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
10820 /* take an extra reference on the entry's "object" */
10821 if (new_entry->is_sub_map) {
10822 assert(!new_entry->use_pmap); /* not nested */
10823 vm_map_lock(VME_SUBMAP(new_entry));
10824 vm_map_reference(VME_SUBMAP(new_entry));
10825 vm_map_unlock(VME_SUBMAP(new_entry));
10826 } else {
10827 vm_object_reference(VME_OBJECT(new_entry));
10828 }
10829 /* insert the new entry in the map */
10830 vm_map_store_entry_link(map, where, new_entry,
10831 VM_MAP_KERNEL_FLAGS_NONE);
10832 /* continue inserting the "copy entries" after the new entry */
10833 where = new_entry;
10834 }
10835 }
10836
10837
10838 /*
10839 * Returns true if *size matches (or is in the range of) copy->size.
10840 * Upon returning true, the *size field is updated with the actual size of the
10841 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
10842 */
10843 boolean_t
vm_map_copy_validate_size(vm_map_t dst_map,vm_map_copy_t copy,vm_map_size_t * size)10844 vm_map_copy_validate_size(
10845 vm_map_t dst_map,
10846 vm_map_copy_t copy,
10847 vm_map_size_t *size)
10848 {
10849 if (copy == VM_MAP_COPY_NULL) {
10850 return FALSE;
10851 }
10852 vm_map_size_t copy_sz = copy->size;
10853 vm_map_size_t sz = *size;
10854 switch (copy->type) {
10855 case VM_MAP_COPY_OBJECT:
10856 case VM_MAP_COPY_KERNEL_BUFFER:
10857 if (sz == copy_sz) {
10858 return TRUE;
10859 }
10860 break;
10861 case VM_MAP_COPY_ENTRY_LIST:
10862 /*
10863 * potential page-size rounding prevents us from exactly
10864 * validating this flavor of vm_map_copy, but we can at least
10865 * assert that it's within a range.
10866 */
10867 if (copy_sz >= sz &&
10868 copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
10869 *size = copy_sz;
10870 return TRUE;
10871 }
10872 break;
10873 default:
10874 break;
10875 }
10876 return FALSE;
10877 }
10878
10879 /*
10880 * Routine: vm_map_copyout_size
10881 *
10882 * Description:
10883 * Copy out a copy chain ("copy") into newly-allocated
10884 * space in the destination map. Uses a prevalidated
10885 * size for the copy object (vm_map_copy_validate_size).
10886 *
10887 * If successful, consumes the copy object.
10888 * Otherwise, the caller is responsible for it.
10889 */
10890 kern_return_t
vm_map_copyout_size(vm_map_t dst_map,vm_map_address_t * dst_addr,vm_map_copy_t copy,vm_map_size_t copy_size)10891 vm_map_copyout_size(
10892 vm_map_t dst_map,
10893 vm_map_address_t *dst_addr, /* OUT */
10894 vm_map_copy_t copy,
10895 vm_map_size_t copy_size)
10896 {
10897 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
10898 TRUE, /* consume_on_success */
10899 VM_PROT_DEFAULT,
10900 VM_PROT_ALL,
10901 VM_INHERIT_DEFAULT);
10902 }
10903
10904 /*
10905 * Routine: vm_map_copyout
10906 *
10907 * Description:
10908 * Copy out a copy chain ("copy") into newly-allocated
10909 * space in the destination map.
10910 *
10911 * If successful, consumes the copy object.
10912 * Otherwise, the caller is responsible for it.
10913 */
10914 kern_return_t
vm_map_copyout(vm_map_t dst_map,vm_map_address_t * dst_addr,vm_map_copy_t copy)10915 vm_map_copyout(
10916 vm_map_t dst_map,
10917 vm_map_address_t *dst_addr, /* OUT */
10918 vm_map_copy_t copy)
10919 {
10920 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
10921 TRUE, /* consume_on_success */
10922 VM_PROT_DEFAULT,
10923 VM_PROT_ALL,
10924 VM_INHERIT_DEFAULT);
10925 }
10926
10927 kern_return_t
vm_map_copyout_internal(vm_map_t dst_map,vm_map_address_t * dst_addr,vm_map_copy_t copy,vm_map_size_t copy_size,boolean_t consume_on_success,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)10928 vm_map_copyout_internal(
10929 vm_map_t dst_map,
10930 vm_map_address_t *dst_addr, /* OUT */
10931 vm_map_copy_t copy,
10932 vm_map_size_t copy_size,
10933 boolean_t consume_on_success,
10934 vm_prot_t cur_protection,
10935 vm_prot_t max_protection,
10936 vm_inherit_t inheritance)
10937 {
10938 vm_map_size_t size;
10939 vm_map_size_t adjustment;
10940 vm_map_offset_t start;
10941 vm_object_offset_t vm_copy_start;
10942 vm_map_entry_t last;
10943 vm_map_entry_t entry;
10944 vm_map_entry_t hole_entry;
10945 vm_map_copy_t original_copy;
10946
10947 /*
10948 * Check for null copy object.
10949 */
10950
10951 if (copy == VM_MAP_COPY_NULL) {
10952 *dst_addr = 0;
10953 return KERN_SUCCESS;
10954 }
10955
10956 /*
10957 * Assert that the vm_map_copy is coming from the right
10958 * zone and hasn't been forged
10959 */
10960 vm_map_copy_require(copy);
10961
10962 if (copy->size != copy_size) {
10963 *dst_addr = 0;
10964 return KERN_FAILURE;
10965 }
10966
10967 /*
10968 * Check for special copy object, created
10969 * by vm_map_copyin_object.
10970 */
10971
10972 if (copy->type == VM_MAP_COPY_OBJECT) {
10973 vm_object_t object = copy->cpy_object;
10974 kern_return_t kr;
10975 vm_object_offset_t offset;
10976
10977 offset = vm_object_trunc_page(copy->offset);
10978 size = vm_map_round_page((copy_size +
10979 (vm_map_size_t)(copy->offset -
10980 offset)),
10981 VM_MAP_PAGE_MASK(dst_map));
10982 *dst_addr = 0;
10983 kr = vm_map_enter(dst_map, dst_addr, size,
10984 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
10985 VM_MAP_KERNEL_FLAGS_NONE,
10986 VM_KERN_MEMORY_NONE,
10987 object, offset, FALSE,
10988 VM_PROT_DEFAULT, VM_PROT_ALL,
10989 VM_INHERIT_DEFAULT);
10990 if (kr != KERN_SUCCESS) {
10991 return kr;
10992 }
10993 /* Account for non-pagealigned copy object */
10994 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
10995 if (consume_on_success) {
10996 zfree(vm_map_copy_zone, copy);
10997 }
10998 return KERN_SUCCESS;
10999 }
11000
11001 /*
11002 * Check for special kernel buffer allocated
11003 * by new_ipc_kmsg_copyin.
11004 */
11005
11006 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
11007 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
11008 copy, copy_size, FALSE,
11009 consume_on_success);
11010 }
11011
11012 original_copy = copy;
11013 if (copy->cpy_hdr.page_shift != VM_MAP_PAGE_SHIFT(dst_map)) {
11014 kern_return_t kr;
11015 vm_map_copy_t target_copy;
11016 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
11017
11018 target_copy = VM_MAP_COPY_NULL;
11019 DEBUG4K_ADJUST("adjusting...\n");
11020 kr = vm_map_copy_adjust_to_target(
11021 copy,
11022 0, /* offset */
11023 copy->size, /* size */
11024 dst_map,
11025 TRUE, /* copy */
11026 &target_copy,
11027 &overmap_start,
11028 &overmap_end,
11029 &trimmed_start);
11030 if (kr != KERN_SUCCESS) {
11031 DEBUG4K_COPY("adjust failed 0x%x\n", kr);
11032 return kr;
11033 }
11034 DEBUG4K_COPY("copy %p (%d 0x%llx 0x%llx) dst_map %p (%d) target_copy %p (%d 0x%llx 0x%llx) overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx\n", copy, copy->cpy_hdr.page_shift, copy->offset, (uint64_t)copy->size, dst_map, VM_MAP_PAGE_SHIFT(dst_map), target_copy, target_copy->cpy_hdr.page_shift, target_copy->offset, (uint64_t)target_copy->size, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start);
11035 if (target_copy != copy) {
11036 copy = target_copy;
11037 }
11038 copy_size = copy->size;
11039 }
11040
11041 /*
11042 * Find space for the data
11043 */
11044
11045 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
11046 VM_MAP_COPY_PAGE_MASK(copy));
11047 size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
11048 VM_MAP_COPY_PAGE_MASK(copy))
11049 - vm_copy_start;
11050
11051
11052 StartAgain:;
11053
11054 vm_map_lock(dst_map);
11055 if (dst_map->disable_vmentry_reuse == TRUE) {
11056 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
11057 last = entry;
11058 } else {
11059 if (dst_map->holelistenabled) {
11060 hole_entry = CAST_TO_VM_MAP_ENTRY(dst_map->holes_list);
11061
11062 if (hole_entry == NULL) {
11063 /*
11064 * No more space in the map?
11065 */
11066 vm_map_unlock(dst_map);
11067 return KERN_NO_SPACE;
11068 }
11069
11070 last = hole_entry;
11071 start = last->vme_start;
11072 } else {
11073 assert(first_free_is_valid(dst_map));
11074 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
11075 vm_map_min(dst_map) : last->vme_end;
11076 }
11077 start = vm_map_round_page(start,
11078 VM_MAP_PAGE_MASK(dst_map));
11079 }
11080
11081 while (TRUE) {
11082 vm_map_entry_t next = last->vme_next;
11083 vm_map_offset_t end = start + size;
11084
11085 if ((end > dst_map->max_offset) || (end < start)) {
11086 if (dst_map->wait_for_space) {
11087 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
11088 assert_wait((event_t) dst_map,
11089 THREAD_INTERRUPTIBLE);
11090 vm_map_unlock(dst_map);
11091 thread_block(THREAD_CONTINUE_NULL);
11092 goto StartAgain;
11093 }
11094 }
11095 vm_map_unlock(dst_map);
11096 return KERN_NO_SPACE;
11097 }
11098
11099 if (dst_map->holelistenabled) {
11100 if (last->vme_end >= end) {
11101 break;
11102 }
11103 } else {
11104 /*
11105 * If there are no more entries, we must win.
11106 *
11107 * OR
11108 *
11109 * If there is another entry, it must be
11110 * after the end of the potential new region.
11111 */
11112
11113 if (next == vm_map_to_entry(dst_map)) {
11114 break;
11115 }
11116
11117 if (next->vme_start >= end) {
11118 break;
11119 }
11120 }
11121
11122 last = next;
11123
11124 if (dst_map->holelistenabled) {
11125 if (last == CAST_TO_VM_MAP_ENTRY(dst_map->holes_list)) {
11126 /*
11127 * Wrapped around
11128 */
11129 vm_map_unlock(dst_map);
11130 return KERN_NO_SPACE;
11131 }
11132 start = last->vme_start;
11133 } else {
11134 start = last->vme_end;
11135 }
11136 start = vm_map_round_page(start,
11137 VM_MAP_PAGE_MASK(dst_map));
11138 }
11139
11140 if (dst_map->holelistenabled) {
11141 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
11142 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.", last, (unsigned long long)last->vme_start);
11143 }
11144 }
11145
11146
11147 adjustment = start - vm_copy_start;
11148 if (!consume_on_success) {
11149 /*
11150 * We're not allowed to consume "copy", so we'll have to
11151 * copy its map entries into the destination map below.
11152 * No need to re-allocate map entries from the correct
11153 * (pageable or not) zone, since we'll get new map entries
11154 * during the transfer.
11155 * We'll also adjust the map entries's "start" and "end"
11156 * during the transfer, to keep "copy"'s entries consistent
11157 * with its "offset".
11158 */
11159 goto after_adjustments;
11160 }
11161
11162 /*
11163 * Since we're going to just drop the map
11164 * entries from the copy into the destination
11165 * map, they must come from the same pool.
11166 */
11167
11168 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
11169 /*
11170 * Mismatches occur when dealing with the default
11171 * pager.
11172 */
11173 vm_map_entry_t next, new;
11174
11175 /*
11176 * Find the zone that the copies were allocated from
11177 */
11178
11179 entry = vm_map_copy_first_entry(copy);
11180
11181 /*
11182 * Reinitialize the copy so that vm_map_copy_entry_link
11183 * will work.
11184 */
11185 vm_map_store_copy_reset(copy, entry);
11186 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
11187
11188 /*
11189 * Copy each entry.
11190 */
11191 while (entry != vm_map_copy_to_entry(copy)) {
11192 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11193 vm_map_entry_copy_full(new, entry);
11194 new->vme_no_copy_on_read = FALSE;
11195 assert(!new->iokit_acct);
11196 if (new->is_sub_map) {
11197 /* clr address space specifics */
11198 new->use_pmap = FALSE;
11199 }
11200 vm_map_copy_entry_link(copy,
11201 vm_map_copy_last_entry(copy),
11202 new);
11203 next = entry->vme_next;
11204 _vm_map_entry_dispose(NULL, entry);
11205 entry = next;
11206 }
11207 }
11208
11209 /*
11210 * Adjust the addresses in the copy chain, and
11211 * reset the region attributes.
11212 */
11213
11214 for (entry = vm_map_copy_first_entry(copy);
11215 entry != vm_map_copy_to_entry(copy);
11216 entry = entry->vme_next) {
11217 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
11218 /*
11219 * We're injecting this copy entry into a map that
11220 * has the standard page alignment, so clear
11221 * "map_aligned" (which might have been inherited
11222 * from the original map entry).
11223 */
11224 entry->map_aligned = FALSE;
11225 }
11226
11227 entry->vme_start += adjustment;
11228 entry->vme_end += adjustment;
11229
11230 if (entry->map_aligned) {
11231 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
11232 VM_MAP_PAGE_MASK(dst_map)));
11233 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
11234 VM_MAP_PAGE_MASK(dst_map)));
11235 }
11236
11237 entry->inheritance = VM_INHERIT_DEFAULT;
11238 entry->protection = VM_PROT_DEFAULT;
11239 entry->max_protection = VM_PROT_ALL;
11240 entry->behavior = VM_BEHAVIOR_DEFAULT;
11241
11242 /*
11243 * If the entry is now wired,
11244 * map the pages into the destination map.
11245 */
11246 if (entry->wired_count != 0) {
11247 vm_map_offset_t va;
11248 vm_object_offset_t offset;
11249 vm_object_t object;
11250 vm_prot_t prot;
11251 int type_of_fault;
11252
11253 /* TODO4K would need to use actual page size */
11254 assert(VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT);
11255
11256 object = VME_OBJECT(entry);
11257 offset = VME_OFFSET(entry);
11258 va = entry->vme_start;
11259
11260 pmap_pageable(dst_map->pmap,
11261 entry->vme_start,
11262 entry->vme_end,
11263 TRUE);
11264
11265 while (va < entry->vme_end) {
11266 vm_page_t m;
11267 struct vm_object_fault_info fault_info = {};
11268
11269 /*
11270 * Look up the page in the object.
11271 * Assert that the page will be found in the
11272 * top object:
11273 * either
11274 * the object was newly created by
11275 * vm_object_copy_slowly, and has
11276 * copies of all of the pages from
11277 * the source object
11278 * or
11279 * the object was moved from the old
11280 * map entry; because the old map
11281 * entry was wired, all of the pages
11282 * were in the top-level object.
11283 * (XXX not true if we wire pages for
11284 * reading)
11285 */
11286 vm_object_lock(object);
11287
11288 m = vm_page_lookup(object, offset);
11289 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
11290 m->vmp_absent) {
11291 panic("vm_map_copyout: wiring %p", m);
11292 }
11293
11294 prot = entry->protection;
11295
11296 if (override_nx(dst_map, VME_ALIAS(entry)) &&
11297 prot) {
11298 prot |= VM_PROT_EXECUTE;
11299 }
11300
11301 type_of_fault = DBG_CACHE_HIT_FAULT;
11302
11303 fault_info.user_tag = VME_ALIAS(entry);
11304 fault_info.pmap_options = 0;
11305 if (entry->iokit_acct ||
11306 (!entry->is_sub_map && !entry->use_pmap)) {
11307 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
11308 }
11309
11310 vm_fault_enter(m,
11311 dst_map->pmap,
11312 va,
11313 PAGE_SIZE, 0,
11314 prot,
11315 prot,
11316 VM_PAGE_WIRED(m),
11317 FALSE, /* change_wiring */
11318 VM_KERN_MEMORY_NONE, /* tag - not wiring */
11319 &fault_info,
11320 NULL, /* need_retry */
11321 &type_of_fault);
11322
11323 vm_object_unlock(object);
11324
11325 offset += PAGE_SIZE_64;
11326 va += PAGE_SIZE;
11327 }
11328 }
11329 }
11330
11331 after_adjustments:
11332
11333 /*
11334 * Correct the page alignment for the result
11335 */
11336
11337 *dst_addr = start + (copy->offset - vm_copy_start);
11338
11339 #if KASAN
11340 kasan_notify_address(*dst_addr, size);
11341 #endif
11342
11343 /*
11344 * Update the hints and the map size
11345 */
11346
11347 if (consume_on_success) {
11348 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
11349 } else {
11350 SAVE_HINT_MAP_WRITE(dst_map, last);
11351 }
11352
11353 dst_map->size += size;
11354
11355 /*
11356 * Link in the copy
11357 */
11358
11359 if (consume_on_success) {
11360 vm_map_copy_insert(dst_map, last, copy);
11361 if (copy != original_copy) {
11362 vm_map_copy_discard(original_copy);
11363 original_copy = VM_MAP_COPY_NULL;
11364 }
11365 } else {
11366 vm_map_copy_remap(dst_map, last, copy, adjustment,
11367 cur_protection, max_protection,
11368 inheritance);
11369 if (copy != original_copy && original_copy != VM_MAP_COPY_NULL) {
11370 vm_map_copy_discard(copy);
11371 copy = original_copy;
11372 }
11373 }
11374
11375
11376 vm_map_unlock(dst_map);
11377
11378 /*
11379 * XXX If wiring_required, call vm_map_pageable
11380 */
11381
11382 return KERN_SUCCESS;
11383 }
11384
11385 /*
11386 * Routine: vm_map_copyin
11387 *
11388 * Description:
11389 * see vm_map_copyin_common. Exported via Unsupported.exports.
11390 *
11391 */
11392
11393 #undef vm_map_copyin
11394
11395 kern_return_t
vm_map_copyin(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,boolean_t src_destroy,vm_map_copy_t * copy_result)11396 vm_map_copyin(
11397 vm_map_t src_map,
11398 vm_map_address_t src_addr,
11399 vm_map_size_t len,
11400 boolean_t src_destroy,
11401 vm_map_copy_t *copy_result) /* OUT */
11402 {
11403 return vm_map_copyin_common(src_map, src_addr, len, src_destroy,
11404 FALSE, copy_result, FALSE);
11405 }
11406
11407 /*
11408 * Routine: vm_map_copyin_common
11409 *
11410 * Description:
11411 * Copy the specified region (src_addr, len) from the
11412 * source address space (src_map), possibly removing
11413 * the region from the source address space (src_destroy).
11414 *
11415 * Returns:
11416 * A vm_map_copy_t object (copy_result), suitable for
11417 * insertion into another address space (using vm_map_copyout),
11418 * copying over another address space region (using
11419 * vm_map_copy_overwrite). If the copy is unused, it
11420 * should be destroyed (using vm_map_copy_discard).
11421 *
11422 * In/out conditions:
11423 * The source map should not be locked on entry.
11424 */
11425
11426 typedef struct submap_map {
11427 vm_map_t parent_map;
11428 vm_map_offset_t base_start;
11429 vm_map_offset_t base_end;
11430 vm_map_size_t base_len;
11431 struct submap_map *next;
11432 } submap_map_t;
11433
11434 kern_return_t
vm_map_copyin_common(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,boolean_t src_destroy,__unused boolean_t src_volatile,vm_map_copy_t * copy_result,boolean_t use_maxprot)11435 vm_map_copyin_common(
11436 vm_map_t src_map,
11437 vm_map_address_t src_addr,
11438 vm_map_size_t len,
11439 boolean_t src_destroy,
11440 __unused boolean_t src_volatile,
11441 vm_map_copy_t *copy_result, /* OUT */
11442 boolean_t use_maxprot)
11443 {
11444 int flags;
11445
11446 flags = 0;
11447 if (src_destroy) {
11448 flags |= VM_MAP_COPYIN_SRC_DESTROY;
11449 }
11450 if (use_maxprot) {
11451 flags |= VM_MAP_COPYIN_USE_MAXPROT;
11452 }
11453 return vm_map_copyin_internal(src_map,
11454 src_addr,
11455 len,
11456 flags,
11457 copy_result);
11458 }
11459 kern_return_t
vm_map_copyin_internal(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,int flags,vm_map_copy_t * copy_result)11460 vm_map_copyin_internal(
11461 vm_map_t src_map,
11462 vm_map_address_t src_addr,
11463 vm_map_size_t len,
11464 int flags,
11465 vm_map_copy_t *copy_result) /* OUT */
11466 {
11467 vm_map_entry_t tmp_entry; /* Result of last map lookup --
11468 * in multi-level lookup, this
11469 * entry contains the actual
11470 * vm_object/offset.
11471 */
11472 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
11473
11474 vm_map_offset_t src_start; /* Start of current entry --
11475 * where copy is taking place now
11476 */
11477 vm_map_offset_t src_end; /* End of entire region to be
11478 * copied */
11479 vm_map_offset_t src_base;
11480 vm_map_t base_map = src_map;
11481 boolean_t map_share = FALSE;
11482 submap_map_t *parent_maps = NULL;
11483
11484 vm_map_copy_t copy; /* Resulting copy */
11485 vm_map_address_t copy_addr;
11486 vm_map_size_t copy_size;
11487 boolean_t src_destroy;
11488 boolean_t use_maxprot;
11489 boolean_t preserve_purgeable;
11490 boolean_t entry_was_shared;
11491 vm_map_entry_t saved_src_entry;
11492
11493 if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
11494 return KERN_INVALID_ARGUMENT;
11495 }
11496
11497 src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
11498 use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
11499 preserve_purgeable =
11500 (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
11501
11502 /*
11503 * Check for copies of zero bytes.
11504 */
11505
11506 if (len == 0) {
11507 *copy_result = VM_MAP_COPY_NULL;
11508 return KERN_SUCCESS;
11509 }
11510
11511 /*
11512 * Check that the end address doesn't overflow
11513 */
11514 src_end = src_addr + len;
11515 if (src_end < src_addr) {
11516 return KERN_INVALID_ADDRESS;
11517 }
11518
11519 /*
11520 * Compute (page aligned) start and end of region
11521 */
11522 src_start = vm_map_trunc_page(src_addr,
11523 VM_MAP_PAGE_MASK(src_map));
11524 src_end = vm_map_round_page(src_end,
11525 VM_MAP_PAGE_MASK(src_map));
11526
11527 /*
11528 * If the copy is sufficiently small, use a kernel buffer instead
11529 * of making a virtual copy. The theory being that the cost of
11530 * setting up VM (and taking C-O-W faults) dominates the copy costs
11531 * for small regions.
11532 */
11533 if ((len < msg_ool_size_small) &&
11534 !use_maxprot &&
11535 !preserve_purgeable &&
11536 !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
11537 /*
11538 * Since the "msg_ool_size_small" threshold was increased and
11539 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
11540 * address space limits, we revert to doing a virtual copy if the
11541 * copied range goes beyond those limits. Otherwise, mach_vm_read()
11542 * of the commpage would now fail when it used to work.
11543 */
11544 (src_start >= vm_map_min(src_map) &&
11545 src_start < vm_map_max(src_map) &&
11546 src_end >= vm_map_min(src_map) &&
11547 src_end < vm_map_max(src_map))) {
11548 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
11549 src_destroy, copy_result);
11550 }
11551
11552 /*
11553 * Allocate a header element for the list.
11554 *
11555 * Use the start and end in the header to
11556 * remember the endpoints prior to rounding.
11557 */
11558
11559 copy = vm_map_copy_allocate();
11560 copy->type = VM_MAP_COPY_ENTRY_LIST;
11561 copy->cpy_hdr.entries_pageable = TRUE;
11562 copy->cpy_hdr.page_shift = VM_MAP_PAGE_SHIFT(src_map);
11563
11564 vm_map_store_init( &(copy->cpy_hdr));
11565
11566 copy->offset = src_addr;
11567 copy->size = len;
11568
11569 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11570
11571 #define RETURN(x) \
11572 MACRO_BEGIN \
11573 vm_map_unlock(src_map); \
11574 if(src_map != base_map) \
11575 vm_map_deallocate(src_map); \
11576 if (new_entry != VM_MAP_ENTRY_NULL) \
11577 vm_map_copy_entry_dispose(copy,new_entry); \
11578 vm_map_copy_discard(copy); \
11579 { \
11580 submap_map_t *_ptr; \
11581 \
11582 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11583 parent_maps=parent_maps->next; \
11584 if (_ptr->parent_map != base_map) \
11585 vm_map_deallocate(_ptr->parent_map); \
11586 kfree_type(submap_map_t, _ptr); \
11587 } \
11588 } \
11589 MACRO_RETURN(x); \
11590 MACRO_END
11591
11592 /*
11593 * Find the beginning of the region.
11594 */
11595
11596 vm_map_lock(src_map);
11597
11598 /*
11599 * Lookup the original "src_addr" rather than the truncated
11600 * "src_start", in case "src_start" falls in a non-map-aligned
11601 * map entry *before* the map entry that contains "src_addr"...
11602 */
11603 if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry)) {
11604 RETURN(KERN_INVALID_ADDRESS);
11605 }
11606 if (!tmp_entry->is_sub_map) {
11607 /*
11608 * ... but clip to the map-rounded "src_start" rather than
11609 * "src_addr" to preserve map-alignment. We'll adjust the
11610 * first copy entry at the end, if needed.
11611 */
11612 vm_map_clip_start(src_map, tmp_entry, src_start);
11613 }
11614 if (src_start < tmp_entry->vme_start) {
11615 /*
11616 * Move "src_start" up to the start of the
11617 * first map entry to copy.
11618 */
11619 src_start = tmp_entry->vme_start;
11620 }
11621 /* set for later submap fix-up */
11622 copy_addr = src_start;
11623
11624 /*
11625 * Go through entries until we get to the end.
11626 */
11627
11628 while (TRUE) {
11629 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
11630 vm_map_size_t src_size; /* Size of source
11631 * map entry (in both
11632 * maps)
11633 */
11634
11635 vm_object_t src_object; /* Object to copy */
11636 vm_object_offset_t src_offset;
11637
11638 boolean_t src_needs_copy; /* Should source map
11639 * be made read-only
11640 * for copy-on-write?
11641 */
11642
11643 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
11644
11645 boolean_t was_wired; /* Was source wired? */
11646 vm_map_version_t version; /* Version before locks
11647 * dropped to make copy
11648 */
11649 kern_return_t result; /* Return value from
11650 * copy_strategically.
11651 */
11652 while (tmp_entry->is_sub_map) {
11653 vm_map_size_t submap_len;
11654 submap_map_t *ptr;
11655
11656 ptr = kalloc_type(submap_map_t, Z_WAITOK);
11657 ptr->next = parent_maps;
11658 parent_maps = ptr;
11659 ptr->parent_map = src_map;
11660 ptr->base_start = src_start;
11661 ptr->base_end = src_end;
11662 submap_len = tmp_entry->vme_end - src_start;
11663 if (submap_len > (src_end - src_start)) {
11664 submap_len = src_end - src_start;
11665 }
11666 ptr->base_len = submap_len;
11667
11668 src_start -= tmp_entry->vme_start;
11669 src_start += VME_OFFSET(tmp_entry);
11670 src_end = src_start + submap_len;
11671 src_map = VME_SUBMAP(tmp_entry);
11672 vm_map_lock(src_map);
11673 /* keep an outstanding reference for all maps in */
11674 /* the parents tree except the base map */
11675 vm_map_reference(src_map);
11676 vm_map_unlock(ptr->parent_map);
11677 if (!vm_map_lookup_entry(
11678 src_map, src_start, &tmp_entry)) {
11679 RETURN(KERN_INVALID_ADDRESS);
11680 }
11681 map_share = TRUE;
11682 if (!tmp_entry->is_sub_map) {
11683 vm_map_clip_start(src_map, tmp_entry, src_start);
11684 }
11685 src_entry = tmp_entry;
11686 }
11687 /* we are now in the lowest level submap... */
11688
11689 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
11690 (VME_OBJECT(tmp_entry)->phys_contiguous)) {
11691 /* This is not, supported for now.In future */
11692 /* we will need to detect the phys_contig */
11693 /* condition and then upgrade copy_slowly */
11694 /* to do physical copy from the device mem */
11695 /* based object. We can piggy-back off of */
11696 /* the was wired boolean to set-up the */
11697 /* proper handling */
11698 RETURN(KERN_PROTECTION_FAILURE);
11699 }
11700 /*
11701 * Create a new address map entry to hold the result.
11702 * Fill in the fields from the appropriate source entries.
11703 * We must unlock the source map to do this if we need
11704 * to allocate a map entry.
11705 */
11706 if (new_entry == VM_MAP_ENTRY_NULL) {
11707 version.main_timestamp = src_map->timestamp;
11708 vm_map_unlock(src_map);
11709
11710 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11711
11712 vm_map_lock(src_map);
11713 if ((version.main_timestamp + 1) != src_map->timestamp) {
11714 if (!vm_map_lookup_entry(src_map, src_start,
11715 &tmp_entry)) {
11716 RETURN(KERN_INVALID_ADDRESS);
11717 }
11718 if (!tmp_entry->is_sub_map) {
11719 vm_map_clip_start(src_map, tmp_entry, src_start);
11720 }
11721 continue; /* restart w/ new tmp_entry */
11722 }
11723 }
11724
11725 /*
11726 * Verify that the region can be read.
11727 */
11728 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
11729 !use_maxprot) ||
11730 (src_entry->max_protection & VM_PROT_READ) == 0) {
11731 RETURN(KERN_PROTECTION_FAILURE);
11732 }
11733
11734 /*
11735 * Clip against the endpoints of the entire region.
11736 */
11737
11738 vm_map_clip_end(src_map, src_entry, src_end);
11739
11740 src_size = src_entry->vme_end - src_start;
11741 src_object = VME_OBJECT(src_entry);
11742 src_offset = VME_OFFSET(src_entry);
11743 was_wired = (src_entry->wired_count != 0);
11744
11745 vm_map_entry_copy(src_map, new_entry, src_entry);
11746 if (new_entry->is_sub_map) {
11747 /* clr address space specifics */
11748 new_entry->use_pmap = FALSE;
11749 } else {
11750 /*
11751 * We're dealing with a copy-on-write operation,
11752 * so the resulting mapping should not inherit the
11753 * original mapping's accounting settings.
11754 * "iokit_acct" should have been cleared in
11755 * vm_map_entry_copy().
11756 * "use_pmap" should be reset to its default (TRUE)
11757 * so that the new mapping gets accounted for in
11758 * the task's memory footprint.
11759 */
11760 assert(!new_entry->iokit_acct);
11761 new_entry->use_pmap = TRUE;
11762 }
11763
11764 /*
11765 * Attempt non-blocking copy-on-write optimizations.
11766 */
11767
11768 /*
11769 * If we are destroying the source, and the object
11770 * is internal, we could move the object reference
11771 * from the source to the copy. The copy is
11772 * copy-on-write only if the source is.
11773 * We make another reference to the object, because
11774 * destroying the source entry will deallocate it.
11775 *
11776 * This memory transfer has to be atomic, (to prevent
11777 * the VM object from being shared or copied while
11778 * it's being moved here), so we could only do this
11779 * if we won't have to unlock the VM map until the
11780 * original mapping has been fully removed.
11781 */
11782
11783 RestartCopy:
11784 if ((src_object == VM_OBJECT_NULL ||
11785 (!was_wired && !map_share && !tmp_entry->is_shared
11786 && !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT))) &&
11787 vm_object_copy_quickly(
11788 VME_OBJECT_PTR(new_entry),
11789 src_offset,
11790 src_size,
11791 &src_needs_copy,
11792 &new_entry_needs_copy)) {
11793 new_entry->needs_copy = new_entry_needs_copy;
11794
11795 /*
11796 * Handle copy-on-write obligations
11797 */
11798
11799 if (src_needs_copy && !tmp_entry->needs_copy) {
11800 vm_prot_t prot;
11801
11802 prot = src_entry->protection & ~VM_PROT_WRITE;
11803
11804 if (override_nx(src_map, VME_ALIAS(src_entry))
11805 && prot) {
11806 prot |= VM_PROT_EXECUTE;
11807 }
11808
11809 vm_object_pmap_protect(
11810 src_object,
11811 src_offset,
11812 src_size,
11813 (src_entry->is_shared ?
11814 PMAP_NULL
11815 : src_map->pmap),
11816 VM_MAP_PAGE_SIZE(src_map),
11817 src_entry->vme_start,
11818 prot);
11819
11820 assert(tmp_entry->wired_count == 0);
11821 tmp_entry->needs_copy = TRUE;
11822 }
11823
11824 /*
11825 * The map has never been unlocked, so it's safe
11826 * to move to the next entry rather than doing
11827 * another lookup.
11828 */
11829
11830 goto CopySuccessful;
11831 }
11832
11833 entry_was_shared = tmp_entry->is_shared;
11834
11835 /*
11836 * Take an object reference, so that we may
11837 * release the map lock(s).
11838 */
11839
11840 assert(src_object != VM_OBJECT_NULL);
11841 vm_object_reference(src_object);
11842
11843 /*
11844 * Record the timestamp for later verification.
11845 * Unlock the map.
11846 */
11847
11848 version.main_timestamp = src_map->timestamp;
11849 vm_map_unlock(src_map); /* Increments timestamp once! */
11850 saved_src_entry = src_entry;
11851 tmp_entry = VM_MAP_ENTRY_NULL;
11852 src_entry = VM_MAP_ENTRY_NULL;
11853
11854 /*
11855 * Perform the copy
11856 */
11857
11858 if (was_wired ||
11859 (debug4k_no_cow_copyin &&
11860 VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT)) {
11861 CopySlowly:
11862 vm_object_lock(src_object);
11863 result = vm_object_copy_slowly(
11864 src_object,
11865 src_offset,
11866 src_size,
11867 THREAD_UNINT,
11868 VME_OBJECT_PTR(new_entry));
11869 VME_OFFSET_SET(new_entry,
11870 src_offset - vm_object_trunc_page(src_offset));
11871 new_entry->needs_copy = FALSE;
11872 } else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11873 (entry_was_shared || map_share)) {
11874 vm_object_t new_object;
11875
11876 vm_object_lock_shared(src_object);
11877 new_object = vm_object_copy_delayed(
11878 src_object,
11879 src_offset,
11880 src_size,
11881 TRUE);
11882 if (new_object == VM_OBJECT_NULL) {
11883 goto CopySlowly;
11884 }
11885
11886 VME_OBJECT_SET(new_entry, new_object);
11887 assert(new_entry->wired_count == 0);
11888 new_entry->needs_copy = TRUE;
11889 assert(!new_entry->iokit_acct);
11890 assert(new_object->purgable == VM_PURGABLE_DENY);
11891 assertf(new_entry->use_pmap, "src_map %p new_entry %p\n", src_map, new_entry);
11892 result = KERN_SUCCESS;
11893 } else {
11894 vm_object_offset_t new_offset;
11895 new_offset = VME_OFFSET(new_entry);
11896 result = vm_object_copy_strategically(src_object,
11897 src_offset,
11898 src_size,
11899 VME_OBJECT_PTR(new_entry),
11900 &new_offset,
11901 &new_entry_needs_copy);
11902 if (new_offset != VME_OFFSET(new_entry)) {
11903 VME_OFFSET_SET(new_entry, new_offset);
11904 }
11905
11906 new_entry->needs_copy = new_entry_needs_copy;
11907 }
11908
11909 if (result == KERN_SUCCESS &&
11910 ((preserve_purgeable &&
11911 src_object->purgable != VM_PURGABLE_DENY) ||
11912 new_entry->used_for_jit)) {
11913 /*
11914 * Purgeable objects should be COPY_NONE, true share;
11915 * this should be propogated to the copy.
11916 *
11917 * Also force mappings the pmap specially protects to
11918 * be COPY_NONE; trying to COW these mappings would
11919 * change the effective protections, which could have
11920 * side effects if the pmap layer relies on the
11921 * specified protections.
11922 */
11923
11924 vm_object_t new_object;
11925
11926 new_object = VME_OBJECT(new_entry);
11927 assert(new_object != src_object);
11928 vm_object_lock(new_object);
11929 assert(new_object->ref_count == 1);
11930 assert(new_object->shadow == VM_OBJECT_NULL);
11931 assert(new_object->copy == VM_OBJECT_NULL);
11932 assert(new_object->vo_owner == NULL);
11933
11934 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
11935
11936 if (preserve_purgeable &&
11937 src_object->purgable != VM_PURGABLE_DENY) {
11938 new_object->true_share = TRUE;
11939
11940 /* start as non-volatile with no owner... */
11941 new_object->purgable = VM_PURGABLE_NONVOLATILE;
11942 vm_purgeable_nonvolatile_enqueue(new_object, NULL);
11943 /* ... and move to src_object's purgeable state */
11944 if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
11945 int state;
11946 state = src_object->purgable;
11947 vm_object_purgable_control(
11948 new_object,
11949 VM_PURGABLE_SET_STATE_FROM_KERNEL,
11950 &state);
11951 }
11952 /* no pmap accounting for purgeable objects */
11953 new_entry->use_pmap = FALSE;
11954 }
11955
11956 vm_object_unlock(new_object);
11957 new_object = VM_OBJECT_NULL;
11958 }
11959
11960 if (result != KERN_SUCCESS &&
11961 result != KERN_MEMORY_RESTART_COPY) {
11962 vm_map_lock(src_map);
11963 RETURN(result);
11964 }
11965
11966 /*
11967 * Throw away the extra reference
11968 */
11969
11970 vm_object_deallocate(src_object);
11971
11972 /*
11973 * Verify that the map has not substantially
11974 * changed while the copy was being made.
11975 */
11976
11977 vm_map_lock(src_map);
11978
11979 if ((version.main_timestamp + 1) == src_map->timestamp) {
11980 /* src_map hasn't changed: src_entry is still valid */
11981 src_entry = saved_src_entry;
11982 goto VerificationSuccessful;
11983 }
11984
11985 /*
11986 * Simple version comparison failed.
11987 *
11988 * Retry the lookup and verify that the
11989 * same object/offset are still present.
11990 *
11991 * [Note: a memory manager that colludes with
11992 * the calling task can detect that we have
11993 * cheated. While the map was unlocked, the
11994 * mapping could have been changed and restored.]
11995 */
11996
11997 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
11998 if (result != KERN_MEMORY_RESTART_COPY) {
11999 vm_object_deallocate(VME_OBJECT(new_entry));
12000 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
12001 /* reset accounting state */
12002 new_entry->iokit_acct = FALSE;
12003 new_entry->use_pmap = TRUE;
12004 }
12005 RETURN(KERN_INVALID_ADDRESS);
12006 }
12007
12008 src_entry = tmp_entry;
12009 vm_map_clip_start(src_map, src_entry, src_start);
12010
12011 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
12012 !use_maxprot) ||
12013 ((src_entry->max_protection & VM_PROT_READ) == 0)) {
12014 goto VerificationFailed;
12015 }
12016
12017 if (src_entry->vme_end < new_entry->vme_end) {
12018 /*
12019 * This entry might have been shortened
12020 * (vm_map_clip_end) or been replaced with
12021 * an entry that ends closer to "src_start"
12022 * than before.
12023 * Adjust "new_entry" accordingly; copying
12024 * less memory would be correct but we also
12025 * redo the copy (see below) if the new entry
12026 * no longer points at the same object/offset.
12027 */
12028 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
12029 VM_MAP_COPY_PAGE_MASK(copy)));
12030 new_entry->vme_end = src_entry->vme_end;
12031 src_size = new_entry->vme_end - src_start;
12032 } else if (src_entry->vme_end > new_entry->vme_end) {
12033 /*
12034 * This entry might have been extended
12035 * (vm_map_entry_simplify() or coalesce)
12036 * or been replaced with an entry that ends farther
12037 * from "src_start" than before.
12038 *
12039 * We've called vm_object_copy_*() only on
12040 * the previous <start:end> range, so we can't
12041 * just extend new_entry. We have to re-do
12042 * the copy based on the new entry as if it was
12043 * pointing at a different object/offset (see
12044 * "Verification failed" below).
12045 */
12046 }
12047
12048 if ((VME_OBJECT(src_entry) != src_object) ||
12049 (VME_OFFSET(src_entry) != src_offset) ||
12050 (src_entry->vme_end > new_entry->vme_end)) {
12051 /*
12052 * Verification failed.
12053 *
12054 * Start over with this top-level entry.
12055 */
12056
12057 VerificationFailed: ;
12058
12059 vm_object_deallocate(VME_OBJECT(new_entry));
12060 tmp_entry = src_entry;
12061 continue;
12062 }
12063
12064 /*
12065 * Verification succeeded.
12066 */
12067
12068 VerificationSuccessful:;
12069
12070 if (result == KERN_MEMORY_RESTART_COPY) {
12071 goto RestartCopy;
12072 }
12073
12074 /*
12075 * Copy succeeded.
12076 */
12077
12078 CopySuccessful: ;
12079
12080 /*
12081 * Link in the new copy entry.
12082 */
12083
12084 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
12085 new_entry);
12086
12087 /*
12088 * Determine whether the entire region
12089 * has been copied.
12090 */
12091 src_base = src_start;
12092 src_start = new_entry->vme_end;
12093 new_entry = VM_MAP_ENTRY_NULL;
12094 while ((src_start >= src_end) && (src_end != 0)) {
12095 submap_map_t *ptr;
12096
12097 if (src_map == base_map) {
12098 /* back to the top */
12099 break;
12100 }
12101
12102 ptr = parent_maps;
12103 assert(ptr != NULL);
12104 parent_maps = parent_maps->next;
12105
12106 /* fix up the damage we did in that submap */
12107 vm_map_simplify_range(src_map,
12108 src_base,
12109 src_end);
12110
12111 vm_map_unlock(src_map);
12112 vm_map_deallocate(src_map);
12113 vm_map_lock(ptr->parent_map);
12114 src_map = ptr->parent_map;
12115 src_base = ptr->base_start;
12116 src_start = ptr->base_start + ptr->base_len;
12117 src_end = ptr->base_end;
12118 if (!vm_map_lookup_entry(src_map,
12119 src_start,
12120 &tmp_entry) &&
12121 (src_end > src_start)) {
12122 RETURN(KERN_INVALID_ADDRESS);
12123 }
12124 kfree_type(submap_map_t, ptr);
12125 if (parent_maps == NULL) {
12126 map_share = FALSE;
12127 }
12128 src_entry = tmp_entry->vme_prev;
12129 }
12130
12131 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
12132 (src_start >= src_addr + len) &&
12133 (src_addr + len != 0)) {
12134 /*
12135 * Stop copying now, even though we haven't reached
12136 * "src_end". We'll adjust the end of the last copy
12137 * entry at the end, if needed.
12138 *
12139 * If src_map's aligment is different from the
12140 * system's page-alignment, there could be
12141 * extra non-map-aligned map entries between
12142 * the original (non-rounded) "src_addr + len"
12143 * and the rounded "src_end".
12144 * We do not want to copy those map entries since
12145 * they're not part of the copied range.
12146 */
12147 break;
12148 }
12149
12150 if ((src_start >= src_end) && (src_end != 0)) {
12151 break;
12152 }
12153
12154 /*
12155 * Verify that there are no gaps in the region
12156 */
12157
12158 tmp_entry = src_entry->vme_next;
12159 if ((tmp_entry->vme_start != src_start) ||
12160 (tmp_entry == vm_map_to_entry(src_map))) {
12161 RETURN(KERN_INVALID_ADDRESS);
12162 }
12163 }
12164
12165 /*
12166 * If the source should be destroyed, do it now, since the
12167 * copy was successful.
12168 */
12169 if (src_destroy) {
12170 (void) vm_map_delete(
12171 src_map,
12172 vm_map_trunc_page(src_addr,
12173 VM_MAP_PAGE_MASK(src_map)),
12174 src_end,
12175 ((src_map == kernel_map) ?
12176 VM_MAP_REMOVE_KUNWIRE :
12177 VM_MAP_REMOVE_NO_FLAGS),
12178 VM_MAP_NULL);
12179 } else {
12180 /* fix up the damage we did in the base map */
12181 vm_map_simplify_range(
12182 src_map,
12183 vm_map_trunc_page(src_addr,
12184 VM_MAP_PAGE_MASK(src_map)),
12185 vm_map_round_page(src_end,
12186 VM_MAP_PAGE_MASK(src_map)));
12187 }
12188
12189 vm_map_unlock(src_map);
12190 tmp_entry = VM_MAP_ENTRY_NULL;
12191
12192 if (VM_MAP_PAGE_SHIFT(src_map) > PAGE_SHIFT &&
12193 VM_MAP_PAGE_SHIFT(src_map) != VM_MAP_COPY_PAGE_SHIFT(copy)) {
12194 vm_map_offset_t original_start, original_offset, original_end;
12195
12196 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
12197
12198 /* adjust alignment of first copy_entry's "vme_start" */
12199 tmp_entry = vm_map_copy_first_entry(copy);
12200 if (tmp_entry != vm_map_copy_to_entry(copy)) {
12201 vm_map_offset_t adjustment;
12202
12203 original_start = tmp_entry->vme_start;
12204 original_offset = VME_OFFSET(tmp_entry);
12205
12206 /* map-align the start of the first copy entry... */
12207 adjustment = (tmp_entry->vme_start -
12208 vm_map_trunc_page(
12209 tmp_entry->vme_start,
12210 VM_MAP_PAGE_MASK(src_map)));
12211 tmp_entry->vme_start -= adjustment;
12212 VME_OFFSET_SET(tmp_entry,
12213 VME_OFFSET(tmp_entry) - adjustment);
12214 copy_addr -= adjustment;
12215 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12216 /* ... adjust for mis-aligned start of copy range */
12217 adjustment =
12218 (vm_map_trunc_page(copy->offset,
12219 PAGE_MASK) -
12220 vm_map_trunc_page(copy->offset,
12221 VM_MAP_PAGE_MASK(src_map)));
12222 if (adjustment) {
12223 assert(page_aligned(adjustment));
12224 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
12225 tmp_entry->vme_start += adjustment;
12226 VME_OFFSET_SET(tmp_entry,
12227 (VME_OFFSET(tmp_entry) +
12228 adjustment));
12229 copy_addr += adjustment;
12230 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12231 }
12232
12233 /*
12234 * Assert that the adjustments haven't exposed
12235 * more than was originally copied...
12236 */
12237 assert(tmp_entry->vme_start >= original_start);
12238 assert(VME_OFFSET(tmp_entry) >= original_offset);
12239 /*
12240 * ... and that it did not adjust outside of a
12241 * a single 16K page.
12242 */
12243 assert(vm_map_trunc_page(tmp_entry->vme_start,
12244 VM_MAP_PAGE_MASK(src_map)) ==
12245 vm_map_trunc_page(original_start,
12246 VM_MAP_PAGE_MASK(src_map)));
12247 }
12248
12249 /* adjust alignment of last copy_entry's "vme_end" */
12250 tmp_entry = vm_map_copy_last_entry(copy);
12251 if (tmp_entry != vm_map_copy_to_entry(copy)) {
12252 vm_map_offset_t adjustment;
12253
12254 original_end = tmp_entry->vme_end;
12255
12256 /* map-align the end of the last copy entry... */
12257 tmp_entry->vme_end =
12258 vm_map_round_page(tmp_entry->vme_end,
12259 VM_MAP_PAGE_MASK(src_map));
12260 /* ... adjust for mis-aligned end of copy range */
12261 adjustment =
12262 (vm_map_round_page((copy->offset +
12263 copy->size),
12264 VM_MAP_PAGE_MASK(src_map)) -
12265 vm_map_round_page((copy->offset +
12266 copy->size),
12267 PAGE_MASK));
12268 if (adjustment) {
12269 assert(page_aligned(adjustment));
12270 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
12271 tmp_entry->vme_end -= adjustment;
12272 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12273 }
12274
12275 /*
12276 * Assert that the adjustments haven't exposed
12277 * more than was originally copied...
12278 */
12279 assert(tmp_entry->vme_end <= original_end);
12280 /*
12281 * ... and that it did not adjust outside of a
12282 * a single 16K page.
12283 */
12284 assert(vm_map_round_page(tmp_entry->vme_end,
12285 VM_MAP_PAGE_MASK(src_map)) ==
12286 vm_map_round_page(original_end,
12287 VM_MAP_PAGE_MASK(src_map)));
12288 }
12289 }
12290
12291 /* Fix-up start and end points in copy. This is necessary */
12292 /* when the various entries in the copy object were picked */
12293 /* up from different sub-maps */
12294
12295 tmp_entry = vm_map_copy_first_entry(copy);
12296 copy_size = 0; /* compute actual size */
12297 while (tmp_entry != vm_map_copy_to_entry(copy)) {
12298 assert(VM_MAP_PAGE_ALIGNED(
12299 copy_addr + (tmp_entry->vme_end -
12300 tmp_entry->vme_start),
12301 MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
12302 assert(VM_MAP_PAGE_ALIGNED(
12303 copy_addr,
12304 MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
12305
12306 /*
12307 * The copy_entries will be injected directly into the
12308 * destination map and might not be "map aligned" there...
12309 */
12310 tmp_entry->map_aligned = FALSE;
12311
12312 tmp_entry->vme_end = copy_addr +
12313 (tmp_entry->vme_end - tmp_entry->vme_start);
12314 tmp_entry->vme_start = copy_addr;
12315 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12316 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
12317 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
12318 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
12319 }
12320
12321 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
12322 copy_size < copy->size) {
12323 /*
12324 * The actual size of the VM map copy is smaller than what
12325 * was requested by the caller. This must be because some
12326 * PAGE_SIZE-sized pages are missing at the end of the last
12327 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
12328 * The caller might not have been aware of those missing
12329 * pages and might not want to be aware of it, which is
12330 * fine as long as they don't try to access (and crash on)
12331 * those missing pages.
12332 * Let's adjust the size of the "copy", to avoid failing
12333 * in vm_map_copyout() or vm_map_copy_overwrite().
12334 */
12335 assert(vm_map_round_page(copy_size,
12336 VM_MAP_PAGE_MASK(src_map)) ==
12337 vm_map_round_page(copy->size,
12338 VM_MAP_PAGE_MASK(src_map)));
12339 copy->size = copy_size;
12340 }
12341
12342 *copy_result = copy;
12343 return KERN_SUCCESS;
12344
12345 #undef RETURN
12346 }
12347
12348 kern_return_t
vm_map_copy_extract(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,boolean_t do_copy,vm_map_copy_t * copy_result,vm_prot_t * cur_prot,vm_prot_t * max_prot,vm_inherit_t inheritance,vm_map_kernel_flags_t vmk_flags)12349 vm_map_copy_extract(
12350 vm_map_t src_map,
12351 vm_map_address_t src_addr,
12352 vm_map_size_t len,
12353 boolean_t do_copy,
12354 vm_map_copy_t *copy_result, /* OUT */
12355 vm_prot_t *cur_prot, /* IN/OUT */
12356 vm_prot_t *max_prot, /* IN/OUT */
12357 vm_inherit_t inheritance,
12358 vm_map_kernel_flags_t vmk_flags)
12359 {
12360 vm_map_copy_t copy;
12361 kern_return_t kr;
12362 vm_prot_t required_cur_prot, required_max_prot;
12363
12364 /*
12365 * Check for copies of zero bytes.
12366 */
12367
12368 if (len == 0) {
12369 *copy_result = VM_MAP_COPY_NULL;
12370 return KERN_SUCCESS;
12371 }
12372
12373 /*
12374 * Check that the end address doesn't overflow
12375 */
12376 if (src_addr + len < src_addr) {
12377 return KERN_INVALID_ADDRESS;
12378 }
12379
12380 if (VM_MAP_PAGE_SIZE(src_map) < PAGE_SIZE) {
12381 DEBUG4K_SHARE("src_map %p src_addr 0x%llx src_end 0x%llx\n", src_map, (uint64_t)src_addr, (uint64_t)(src_addr + len));
12382 }
12383
12384 required_cur_prot = *cur_prot;
12385 required_max_prot = *max_prot;
12386
12387 /*
12388 * Allocate a header element for the list.
12389 *
12390 * Use the start and end in the header to
12391 * remember the endpoints prior to rounding.
12392 */
12393
12394 copy = vm_map_copy_allocate();
12395 copy->type = VM_MAP_COPY_ENTRY_LIST;
12396 copy->cpy_hdr.entries_pageable = vmk_flags.vmkf_copy_pageable;
12397
12398 vm_map_store_init(©->cpy_hdr);
12399
12400 copy->offset = 0;
12401 copy->size = len;
12402
12403 kr = vm_map_remap_extract(src_map,
12404 src_addr,
12405 len,
12406 do_copy, /* copy */
12407 ©->cpy_hdr,
12408 cur_prot, /* IN/OUT */
12409 max_prot, /* IN/OUT */
12410 inheritance,
12411 vmk_flags);
12412 if (kr != KERN_SUCCESS) {
12413 vm_map_copy_discard(copy);
12414 return kr;
12415 }
12416 if (required_cur_prot != VM_PROT_NONE) {
12417 assert((*cur_prot & required_cur_prot) == required_cur_prot);
12418 assert((*max_prot & required_max_prot) == required_max_prot);
12419 }
12420
12421 *copy_result = copy;
12422 return KERN_SUCCESS;
12423 }
12424
12425 /*
12426 * vm_map_copyin_object:
12427 *
12428 * Create a copy object from an object.
12429 * Our caller donates an object reference.
12430 */
12431
12432 kern_return_t
vm_map_copyin_object(vm_object_t object,vm_object_offset_t offset,vm_object_size_t size,vm_map_copy_t * copy_result)12433 vm_map_copyin_object(
12434 vm_object_t object,
12435 vm_object_offset_t offset, /* offset of region in object */
12436 vm_object_size_t size, /* size of region in object */
12437 vm_map_copy_t *copy_result) /* OUT */
12438 {
12439 vm_map_copy_t copy; /* Resulting copy */
12440
12441 /*
12442 * We drop the object into a special copy object
12443 * that contains the object directly.
12444 */
12445
12446 copy = vm_map_copy_allocate();
12447 copy->type = VM_MAP_COPY_OBJECT;
12448 copy->cpy_object = object;
12449 copy->offset = offset;
12450 copy->size = size;
12451
12452 *copy_result = copy;
12453 return KERN_SUCCESS;
12454 }
12455
12456 static void
vm_map_fork_share(vm_map_t old_map,vm_map_entry_t old_entry,vm_map_t new_map)12457 vm_map_fork_share(
12458 vm_map_t old_map,
12459 vm_map_entry_t old_entry,
12460 vm_map_t new_map)
12461 {
12462 vm_object_t object;
12463 vm_map_entry_t new_entry;
12464
12465 /*
12466 * New sharing code. New map entry
12467 * references original object. Internal
12468 * objects use asynchronous copy algorithm for
12469 * future copies. First make sure we have
12470 * the right object. If we need a shadow,
12471 * or someone else already has one, then
12472 * make a new shadow and share it.
12473 */
12474
12475 object = VME_OBJECT(old_entry);
12476 if (old_entry->is_sub_map) {
12477 assert(old_entry->wired_count == 0);
12478 #ifndef NO_NESTED_PMAP
12479 if (old_entry->use_pmap) {
12480 kern_return_t result;
12481
12482 result = pmap_nest(new_map->pmap,
12483 (VME_SUBMAP(old_entry))->pmap,
12484 (addr64_t)old_entry->vme_start,
12485 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
12486 if (result) {
12487 panic("vm_map_fork_share: pmap_nest failed!");
12488 }
12489 }
12490 #endif /* NO_NESTED_PMAP */
12491 } else if (object == VM_OBJECT_NULL) {
12492 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
12493 old_entry->vme_start));
12494 VME_OFFSET_SET(old_entry, 0);
12495 VME_OBJECT_SET(old_entry, object);
12496 old_entry->use_pmap = TRUE;
12497 // assert(!old_entry->needs_copy);
12498 } else if (object->copy_strategy !=
12499 MEMORY_OBJECT_COPY_SYMMETRIC) {
12500 /*
12501 * We are already using an asymmetric
12502 * copy, and therefore we already have
12503 * the right object.
12504 */
12505
12506 assert(!old_entry->needs_copy);
12507 } else if (old_entry->needs_copy || /* case 1 */
12508 object->shadowed || /* case 2 */
12509 (!object->true_share && /* case 3 */
12510 !old_entry->is_shared &&
12511 (object->vo_size >
12512 (vm_map_size_t)(old_entry->vme_end -
12513 old_entry->vme_start)))) {
12514 /*
12515 * We need to create a shadow.
12516 * There are three cases here.
12517 * In the first case, we need to
12518 * complete a deferred symmetrical
12519 * copy that we participated in.
12520 * In the second and third cases,
12521 * we need to create the shadow so
12522 * that changes that we make to the
12523 * object do not interfere with
12524 * any symmetrical copies which
12525 * have occured (case 2) or which
12526 * might occur (case 3).
12527 *
12528 * The first case is when we had
12529 * deferred shadow object creation
12530 * via the entry->needs_copy mechanism.
12531 * This mechanism only works when
12532 * only one entry points to the source
12533 * object, and we are about to create
12534 * a second entry pointing to the
12535 * same object. The problem is that
12536 * there is no way of mapping from
12537 * an object to the entries pointing
12538 * to it. (Deferred shadow creation
12539 * works with one entry because occurs
12540 * at fault time, and we walk from the
12541 * entry to the object when handling
12542 * the fault.)
12543 *
12544 * The second case is when the object
12545 * to be shared has already been copied
12546 * with a symmetric copy, but we point
12547 * directly to the object without
12548 * needs_copy set in our entry. (This
12549 * can happen because different ranges
12550 * of an object can be pointed to by
12551 * different entries. In particular,
12552 * a single entry pointing to an object
12553 * can be split by a call to vm_inherit,
12554 * which, combined with task_create, can
12555 * result in the different entries
12556 * having different needs_copy values.)
12557 * The shadowed flag in the object allows
12558 * us to detect this case. The problem
12559 * with this case is that if this object
12560 * has or will have shadows, then we
12561 * must not perform an asymmetric copy
12562 * of this object, since such a copy
12563 * allows the object to be changed, which
12564 * will break the previous symmetrical
12565 * copies (which rely upon the object
12566 * not changing). In a sense, the shadowed
12567 * flag says "don't change this object".
12568 * We fix this by creating a shadow
12569 * object for this object, and sharing
12570 * that. This works because we are free
12571 * to change the shadow object (and thus
12572 * to use an asymmetric copy strategy);
12573 * this is also semantically correct,
12574 * since this object is temporary, and
12575 * therefore a copy of the object is
12576 * as good as the object itself. (This
12577 * is not true for permanent objects,
12578 * since the pager needs to see changes,
12579 * which won't happen if the changes
12580 * are made to a copy.)
12581 *
12582 * The third case is when the object
12583 * to be shared has parts sticking
12584 * outside of the entry we're working
12585 * with, and thus may in the future
12586 * be subject to a symmetrical copy.
12587 * (This is a preemptive version of
12588 * case 2.)
12589 */
12590 VME_OBJECT_SHADOW(old_entry,
12591 (vm_map_size_t) (old_entry->vme_end -
12592 old_entry->vme_start));
12593
12594 /*
12595 * If we're making a shadow for other than
12596 * copy on write reasons, then we have
12597 * to remove write permission.
12598 */
12599
12600 if (!old_entry->needs_copy &&
12601 (old_entry->protection & VM_PROT_WRITE)) {
12602 vm_prot_t prot;
12603
12604 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
12605
12606 prot = old_entry->protection & ~VM_PROT_WRITE;
12607
12608 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
12609
12610 if (override_nx(old_map, VME_ALIAS(old_entry)) && prot) {
12611 prot |= VM_PROT_EXECUTE;
12612 }
12613
12614
12615 if (old_map->mapped_in_other_pmaps) {
12616 vm_object_pmap_protect(
12617 VME_OBJECT(old_entry),
12618 VME_OFFSET(old_entry),
12619 (old_entry->vme_end -
12620 old_entry->vme_start),
12621 PMAP_NULL,
12622 PAGE_SIZE,
12623 old_entry->vme_start,
12624 prot);
12625 } else {
12626 pmap_protect(old_map->pmap,
12627 old_entry->vme_start,
12628 old_entry->vme_end,
12629 prot);
12630 }
12631 }
12632
12633 old_entry->needs_copy = FALSE;
12634 object = VME_OBJECT(old_entry);
12635 }
12636
12637
12638 /*
12639 * If object was using a symmetric copy strategy,
12640 * change its copy strategy to the default
12641 * asymmetric copy strategy, which is copy_delay
12642 * in the non-norma case and copy_call in the
12643 * norma case. Bump the reference count for the
12644 * new entry.
12645 */
12646
12647 if (old_entry->is_sub_map) {
12648 vm_map_lock(VME_SUBMAP(old_entry));
12649 vm_map_reference(VME_SUBMAP(old_entry));
12650 vm_map_unlock(VME_SUBMAP(old_entry));
12651 } else {
12652 vm_object_lock(object);
12653 vm_object_reference_locked(object);
12654 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
12655 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
12656 }
12657 vm_object_unlock(object);
12658 }
12659
12660 /*
12661 * Clone the entry, using object ref from above.
12662 * Mark both entries as shared.
12663 */
12664
12665 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
12666 * map or descendants */
12667 vm_map_entry_copy(old_map, new_entry, old_entry);
12668 old_entry->is_shared = TRUE;
12669 new_entry->is_shared = TRUE;
12670
12671 /*
12672 * We're dealing with a shared mapping, so the resulting mapping
12673 * should inherit some of the original mapping's accounting settings.
12674 * "iokit_acct" should have been cleared in vm_map_entry_copy().
12675 * "use_pmap" should stay the same as before (if it hasn't been reset
12676 * to TRUE when we cleared "iokit_acct").
12677 */
12678 assert(!new_entry->iokit_acct);
12679
12680 /*
12681 * If old entry's inheritence is VM_INHERIT_NONE,
12682 * the new entry is for corpse fork, remove the
12683 * write permission from the new entry.
12684 */
12685 if (old_entry->inheritance == VM_INHERIT_NONE) {
12686 new_entry->protection &= ~VM_PROT_WRITE;
12687 new_entry->max_protection &= ~VM_PROT_WRITE;
12688 }
12689
12690 /*
12691 * Insert the entry into the new map -- we
12692 * know we're inserting at the end of the new
12693 * map.
12694 */
12695
12696 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry,
12697 VM_MAP_KERNEL_FLAGS_NONE);
12698
12699 /*
12700 * Update the physical map
12701 */
12702
12703 if (old_entry->is_sub_map) {
12704 /* Bill Angell pmap support goes here */
12705 } else {
12706 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
12707 old_entry->vme_end - old_entry->vme_start,
12708 old_entry->vme_start);
12709 }
12710 }
12711
12712 static boolean_t
vm_map_fork_copy(vm_map_t old_map,vm_map_entry_t * old_entry_p,vm_map_t new_map,int vm_map_copyin_flags)12713 vm_map_fork_copy(
12714 vm_map_t old_map,
12715 vm_map_entry_t *old_entry_p,
12716 vm_map_t new_map,
12717 int vm_map_copyin_flags)
12718 {
12719 vm_map_entry_t old_entry = *old_entry_p;
12720 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
12721 vm_map_offset_t start = old_entry->vme_start;
12722 vm_map_copy_t copy;
12723 vm_map_entry_t last = vm_map_last_entry(new_map);
12724
12725 vm_map_unlock(old_map);
12726 /*
12727 * Use maxprot version of copyin because we
12728 * care about whether this memory can ever
12729 * be accessed, not just whether it's accessible
12730 * right now.
12731 */
12732 vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
12733 if (vm_map_copyin_internal(old_map, start, entry_size,
12734 vm_map_copyin_flags, ©)
12735 != KERN_SUCCESS) {
12736 /*
12737 * The map might have changed while it
12738 * was unlocked, check it again. Skip
12739 * any blank space or permanently
12740 * unreadable region.
12741 */
12742 vm_map_lock(old_map);
12743 if (!vm_map_lookup_entry(old_map, start, &last) ||
12744 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
12745 last = last->vme_next;
12746 }
12747 *old_entry_p = last;
12748
12749 /*
12750 * XXX For some error returns, want to
12751 * XXX skip to the next element. Note
12752 * that INVALID_ADDRESS and
12753 * PROTECTION_FAILURE are handled above.
12754 */
12755
12756 return FALSE;
12757 }
12758
12759 /*
12760 * Assert that the vm_map_copy is coming from the right
12761 * zone and hasn't been forged
12762 */
12763 vm_map_copy_require(copy);
12764
12765 /*
12766 * Insert the copy into the new map
12767 */
12768 vm_map_copy_insert(new_map, last, copy);
12769
12770 /*
12771 * Pick up the traversal at the end of
12772 * the copied region.
12773 */
12774
12775 vm_map_lock(old_map);
12776 start += entry_size;
12777 if (!vm_map_lookup_entry(old_map, start, &last)) {
12778 last = last->vme_next;
12779 } else {
12780 if (last->vme_start == start) {
12781 /*
12782 * No need to clip here and we don't
12783 * want to cause any unnecessary
12784 * unnesting...
12785 */
12786 } else {
12787 vm_map_clip_start(old_map, last, start);
12788 }
12789 }
12790 *old_entry_p = last;
12791
12792 return TRUE;
12793 }
12794
12795 /*
12796 * vm_map_fork:
12797 *
12798 * Create and return a new map based on the old
12799 * map, according to the inheritance values on the
12800 * regions in that map and the options.
12801 *
12802 * The source map must not be locked.
12803 */
12804 vm_map_t
vm_map_fork(ledger_t ledger,vm_map_t old_map,int options)12805 vm_map_fork(
12806 ledger_t ledger,
12807 vm_map_t old_map,
12808 int options)
12809 {
12810 pmap_t new_pmap;
12811 vm_map_t new_map;
12812 vm_map_entry_t old_entry;
12813 vm_map_size_t new_size = 0, entry_size;
12814 vm_map_entry_t new_entry;
12815 boolean_t src_needs_copy;
12816 boolean_t new_entry_needs_copy;
12817 boolean_t pmap_is64bit;
12818 int vm_map_copyin_flags;
12819 vm_inherit_t old_entry_inheritance;
12820 int map_create_options;
12821 kern_return_t footprint_collect_kr;
12822
12823 if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
12824 VM_MAP_FORK_PRESERVE_PURGEABLE |
12825 VM_MAP_FORK_CORPSE_FOOTPRINT)) {
12826 /* unsupported option */
12827 return VM_MAP_NULL;
12828 }
12829
12830 pmap_is64bit =
12831 #if defined(__i386__) || defined(__x86_64__)
12832 old_map->pmap->pm_task_map != TASK_MAP_32BIT;
12833 #elif defined(__arm64__)
12834 old_map->pmap->is_64bit;
12835 #elif defined(__arm__)
12836 FALSE;
12837 #else
12838 #error Unknown architecture.
12839 #endif
12840
12841 unsigned int pmap_flags = 0;
12842 pmap_flags |= pmap_is64bit ? PMAP_CREATE_64BIT : 0;
12843 #if defined(HAS_APPLE_PAC)
12844 pmap_flags |= old_map->pmap->disable_jop ? PMAP_CREATE_DISABLE_JOP : 0;
12845 #endif
12846 #if PMAP_CREATE_FORCE_4K_PAGES
12847 if (VM_MAP_PAGE_SIZE(old_map) == FOURK_PAGE_SIZE &&
12848 PAGE_SIZE != FOURK_PAGE_SIZE) {
12849 pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
12850 }
12851 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
12852 new_pmap = pmap_create_options(ledger, (vm_map_size_t) 0, pmap_flags);
12853 if (new_pmap == NULL) {
12854 return VM_MAP_NULL;
12855 }
12856
12857 vm_map_reference(old_map);
12858 vm_map_lock(old_map);
12859
12860 map_create_options = 0;
12861 if (old_map->hdr.entries_pageable) {
12862 map_create_options |= VM_MAP_CREATE_PAGEABLE;
12863 }
12864 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12865 map_create_options |= VM_MAP_CREATE_CORPSE_FOOTPRINT;
12866 footprint_collect_kr = KERN_SUCCESS;
12867 }
12868 new_map = vm_map_create_options(new_pmap,
12869 old_map->min_offset,
12870 old_map->max_offset,
12871 map_create_options);
12872 /* inherit cs_enforcement */
12873 vm_map_cs_enforcement_set(new_map, old_map->cs_enforcement);
12874 vm_map_lock(new_map);
12875 vm_commit_pagezero_status(new_map);
12876 /* inherit the parent map's page size */
12877 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
12878
12879 /* ensure PMAP_CS structures are prepared for the fork */
12880 pmap_cs_fork_prepare(old_map->pmap, new_pmap);
12881
12882 for (old_entry = vm_map_first_entry(old_map); old_entry != vm_map_to_entry(old_map);) {
12883 /*
12884 * Abort any corpse collection if the system is shutting down.
12885 */
12886 if ((options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
12887 get_system_inshutdown()) {
12888 vm_map_corpse_footprint_collect_done(new_map);
12889 vm_map_unlock(new_map);
12890 vm_map_unlock(old_map);
12891 vm_map_deallocate(new_map);
12892 vm_map_deallocate(old_map);
12893 printf("Aborting corpse map due to system shutdown\n");
12894 return VM_MAP_NULL;
12895 }
12896
12897 entry_size = old_entry->vme_end - old_entry->vme_start;
12898
12899 old_entry_inheritance = old_entry->inheritance;
12900 /*
12901 * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
12902 * share VM_INHERIT_NONE entries that are not backed by a
12903 * device pager.
12904 */
12905 if (old_entry_inheritance == VM_INHERIT_NONE &&
12906 (options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE) &&
12907 (old_entry->protection & VM_PROT_READ) &&
12908 !(!old_entry->is_sub_map &&
12909 VME_OBJECT(old_entry) != NULL &&
12910 VME_OBJECT(old_entry)->pager != NULL &&
12911 is_device_pager_ops(
12912 VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
12913 old_entry_inheritance = VM_INHERIT_SHARE;
12914 }
12915
12916 if (old_entry_inheritance != VM_INHERIT_NONE &&
12917 (options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
12918 footprint_collect_kr == KERN_SUCCESS) {
12919 /*
12920 * The corpse won't have old_map->pmap to query
12921 * footprint information, so collect that data now
12922 * and store it in new_map->vmmap_corpse_footprint
12923 * for later autopsy.
12924 */
12925 footprint_collect_kr =
12926 vm_map_corpse_footprint_collect(old_map,
12927 old_entry,
12928 new_map);
12929 }
12930
12931 switch (old_entry_inheritance) {
12932 case VM_INHERIT_NONE:
12933 break;
12934
12935 case VM_INHERIT_SHARE:
12936 vm_map_fork_share(old_map, old_entry, new_map);
12937 new_size += entry_size;
12938 break;
12939
12940 case VM_INHERIT_COPY:
12941
12942 /*
12943 * Inline the copy_quickly case;
12944 * upon failure, fall back on call
12945 * to vm_map_fork_copy.
12946 */
12947
12948 if (old_entry->is_sub_map) {
12949 break;
12950 }
12951 if ((old_entry->wired_count != 0) ||
12952 ((VME_OBJECT(old_entry) != NULL) &&
12953 (VME_OBJECT(old_entry)->true_share))) {
12954 goto slow_vm_map_fork_copy;
12955 }
12956
12957 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
12958 vm_map_entry_copy(old_map, new_entry, old_entry);
12959 if (old_entry->permanent) {
12960 /* inherit "permanent" on fork() */
12961 new_entry->permanent = TRUE;
12962 }
12963
12964 if (new_entry->used_for_jit == TRUE && new_map->jit_entry_exists == FALSE) {
12965 new_map->jit_entry_exists = TRUE;
12966 }
12967
12968 if (new_entry->is_sub_map) {
12969 /* clear address space specifics */
12970 new_entry->use_pmap = FALSE;
12971 } else {
12972 /*
12973 * We're dealing with a copy-on-write operation,
12974 * so the resulting mapping should not inherit
12975 * the original mapping's accounting settings.
12976 * "iokit_acct" should have been cleared in
12977 * vm_map_entry_copy().
12978 * "use_pmap" should be reset to its default
12979 * (TRUE) so that the new mapping gets
12980 * accounted for in the task's memory footprint.
12981 */
12982 assert(!new_entry->iokit_acct);
12983 new_entry->use_pmap = TRUE;
12984 }
12985
12986 if (!vm_object_copy_quickly(
12987 VME_OBJECT_PTR(new_entry),
12988 VME_OFFSET(old_entry),
12989 (old_entry->vme_end -
12990 old_entry->vme_start),
12991 &src_needs_copy,
12992 &new_entry_needs_copy)) {
12993 vm_map_entry_dispose(new_map, new_entry);
12994 goto slow_vm_map_fork_copy;
12995 }
12996
12997 /*
12998 * Handle copy-on-write obligations
12999 */
13000
13001 if (src_needs_copy && !old_entry->needs_copy) {
13002 vm_prot_t prot;
13003
13004 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
13005
13006 prot = old_entry->protection & ~VM_PROT_WRITE;
13007
13008 if (override_nx(old_map, VME_ALIAS(old_entry))
13009 && prot) {
13010 prot |= VM_PROT_EXECUTE;
13011 }
13012
13013 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
13014
13015 vm_object_pmap_protect(
13016 VME_OBJECT(old_entry),
13017 VME_OFFSET(old_entry),
13018 (old_entry->vme_end -
13019 old_entry->vme_start),
13020 ((old_entry->is_shared
13021 || old_map->mapped_in_other_pmaps)
13022 ? PMAP_NULL :
13023 old_map->pmap),
13024 VM_MAP_PAGE_SIZE(old_map),
13025 old_entry->vme_start,
13026 prot);
13027
13028 assert(old_entry->wired_count == 0);
13029 old_entry->needs_copy = TRUE;
13030 }
13031 new_entry->needs_copy = new_entry_needs_copy;
13032
13033 /*
13034 * Insert the entry at the end
13035 * of the map.
13036 */
13037
13038 vm_map_store_entry_link(new_map,
13039 vm_map_last_entry(new_map),
13040 new_entry,
13041 VM_MAP_KERNEL_FLAGS_NONE);
13042 new_size += entry_size;
13043 break;
13044
13045 slow_vm_map_fork_copy:
13046 vm_map_copyin_flags = 0;
13047 if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
13048 vm_map_copyin_flags |=
13049 VM_MAP_COPYIN_PRESERVE_PURGEABLE;
13050 }
13051 if (vm_map_fork_copy(old_map,
13052 &old_entry,
13053 new_map,
13054 vm_map_copyin_flags)) {
13055 new_size += entry_size;
13056 }
13057 continue;
13058 }
13059 old_entry = old_entry->vme_next;
13060 }
13061
13062 #if defined(__arm64__)
13063 pmap_insert_sharedpage(new_map->pmap);
13064 #endif /* __arm64__ */
13065
13066 new_map->size = new_size;
13067
13068 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
13069 vm_map_corpse_footprint_collect_done(new_map);
13070 }
13071
13072 /* Propagate JIT entitlement for the pmap layer. */
13073 if (pmap_get_jit_entitled(old_map->pmap)) {
13074 /* Tell the pmap that it supports JIT. */
13075 pmap_set_jit_entitled(new_map->pmap);
13076 }
13077
13078 vm_map_unlock(new_map);
13079 vm_map_unlock(old_map);
13080 vm_map_deallocate(old_map);
13081
13082 return new_map;
13083 }
13084
13085 /*
13086 * vm_map_exec:
13087 *
13088 * Setup the "new_map" with the proper execution environment according
13089 * to the type of executable (platform, 64bit, chroot environment).
13090 * Map the comm page and shared region, etc...
13091 */
13092 kern_return_t
vm_map_exec(vm_map_t new_map,task_t task,boolean_t is64bit,void * fsroot,cpu_type_t cpu,cpu_subtype_t cpu_subtype,boolean_t reslide,boolean_t is_driverkit)13093 vm_map_exec(
13094 vm_map_t new_map,
13095 task_t task,
13096 boolean_t is64bit,
13097 void *fsroot,
13098 cpu_type_t cpu,
13099 cpu_subtype_t cpu_subtype,
13100 boolean_t reslide,
13101 boolean_t is_driverkit)
13102 {
13103 SHARED_REGION_TRACE_DEBUG(
13104 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
13105 (void *)VM_KERNEL_ADDRPERM(current_task()),
13106 (void *)VM_KERNEL_ADDRPERM(new_map),
13107 (void *)VM_KERNEL_ADDRPERM(task),
13108 (void *)VM_KERNEL_ADDRPERM(fsroot),
13109 cpu,
13110 cpu_subtype));
13111 (void) vm_commpage_enter(new_map, task, is64bit);
13112
13113 (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu, cpu_subtype, reslide, is_driverkit);
13114
13115 SHARED_REGION_TRACE_DEBUG(
13116 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
13117 (void *)VM_KERNEL_ADDRPERM(current_task()),
13118 (void *)VM_KERNEL_ADDRPERM(new_map),
13119 (void *)VM_KERNEL_ADDRPERM(task),
13120 (void *)VM_KERNEL_ADDRPERM(fsroot),
13121 cpu,
13122 cpu_subtype));
13123
13124 /*
13125 * Some devices have region(s) of memory that shouldn't get allocated by
13126 * user processes. The following code creates dummy vm_map_entry_t's for each
13127 * of the regions that needs to be reserved to prevent any allocations in
13128 * those regions.
13129 */
13130 kern_return_t kr = KERN_FAILURE;
13131 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
13132 vmk_flags.vmkf_permanent = TRUE;
13133 vmk_flags.vmkf_beyond_max = TRUE;
13134
13135 struct vm_reserved_region *regions = NULL;
13136 size_t num_regions = ml_get_vm_reserved_regions(is64bit, ®ions);
13137 assert((num_regions == 0) || (num_regions > 0 && regions != NULL));
13138
13139 for (size_t i = 0; i < num_regions; ++i) {
13140 kr = vm_map_enter(
13141 new_map,
13142 ®ions[i].vmrr_addr,
13143 regions[i].vmrr_size,
13144 (vm_map_offset_t)0,
13145 VM_FLAGS_FIXED,
13146 vmk_flags,
13147 VM_KERN_MEMORY_NONE,
13148 VM_OBJECT_NULL,
13149 (vm_object_offset_t)0,
13150 FALSE,
13151 VM_PROT_NONE,
13152 VM_PROT_NONE,
13153 VM_INHERIT_COPY);
13154
13155 if (kr != KERN_SUCCESS) {
13156 panic("Failed to reserve %s region in user map %p %d", regions[i].vmrr_name, new_map, kr);
13157 }
13158 }
13159
13160 new_map->reserved_regions = (num_regions ? TRUE : FALSE);
13161
13162 return KERN_SUCCESS;
13163 }
13164
13165 uint64_t vm_map_lookup_locked_copy_slowly_count = 0;
13166 uint64_t vm_map_lookup_locked_copy_slowly_size = 0;
13167 uint64_t vm_map_lookup_locked_copy_slowly_max = 0;
13168 uint64_t vm_map_lookup_locked_copy_slowly_restart = 0;
13169 uint64_t vm_map_lookup_locked_copy_slowly_error = 0;
13170 uint64_t vm_map_lookup_locked_copy_strategically_count = 0;
13171 uint64_t vm_map_lookup_locked_copy_strategically_size = 0;
13172 uint64_t vm_map_lookup_locked_copy_strategically_max = 0;
13173 uint64_t vm_map_lookup_locked_copy_strategically_restart = 0;
13174 uint64_t vm_map_lookup_locked_copy_strategically_error = 0;
13175 uint64_t vm_map_lookup_locked_copy_shadow_count = 0;
13176 uint64_t vm_map_lookup_locked_copy_shadow_size = 0;
13177 uint64_t vm_map_lookup_locked_copy_shadow_max = 0;
13178 /*
13179 * vm_map_lookup_locked:
13180 *
13181 * Finds the VM object, offset, and
13182 * protection for a given virtual address in the
13183 * specified map, assuming a page fault of the
13184 * type specified.
13185 *
13186 * Returns the (object, offset, protection) for
13187 * this address, whether it is wired down, and whether
13188 * this map has the only reference to the data in question.
13189 * In order to later verify this lookup, a "version"
13190 * is returned.
13191 * If contended != NULL, *contended will be set to
13192 * true iff the thread had to spin or block to acquire
13193 * an exclusive lock.
13194 *
13195 * The map MUST be locked by the caller and WILL be
13196 * locked on exit. In order to guarantee the
13197 * existence of the returned object, it is returned
13198 * locked.
13199 *
13200 * If a lookup is requested with "write protection"
13201 * specified, the map may be changed to perform virtual
13202 * copying operations, although the data referenced will
13203 * remain the same.
13204 */
13205 kern_return_t
vm_map_lookup_locked(vm_map_t * var_map,vm_map_offset_t vaddr,vm_prot_t fault_type,int object_lock_type,vm_map_version_t * out_version,vm_object_t * object,vm_object_offset_t * offset,vm_prot_t * out_prot,boolean_t * wired,vm_object_fault_info_t fault_info,vm_map_t * real_map,bool * contended)13206 vm_map_lookup_locked(
13207 vm_map_t *var_map, /* IN/OUT */
13208 vm_map_offset_t vaddr,
13209 vm_prot_t fault_type,
13210 int object_lock_type,
13211 vm_map_version_t *out_version, /* OUT */
13212 vm_object_t *object, /* OUT */
13213 vm_object_offset_t *offset, /* OUT */
13214 vm_prot_t *out_prot, /* OUT */
13215 boolean_t *wired, /* OUT */
13216 vm_object_fault_info_t fault_info, /* OUT */
13217 vm_map_t *real_map, /* OUT */
13218 bool *contended) /* OUT */
13219 {
13220 vm_map_entry_t entry;
13221 vm_map_t map = *var_map;
13222 vm_map_t old_map = *var_map;
13223 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
13224 vm_map_offset_t cow_parent_vaddr = 0;
13225 vm_map_offset_t old_start = 0;
13226 vm_map_offset_t old_end = 0;
13227 vm_prot_t prot;
13228 boolean_t mask_protections;
13229 boolean_t force_copy;
13230 boolean_t no_force_copy_if_executable;
13231 boolean_t submap_needed_copy;
13232 vm_prot_t original_fault_type;
13233 vm_map_size_t fault_page_mask;
13234
13235 /*
13236 * VM_PROT_MASK means that the caller wants us to use "fault_type"
13237 * as a mask against the mapping's actual protections, not as an
13238 * absolute value.
13239 */
13240 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
13241 force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
13242 no_force_copy_if_executable = (fault_type & VM_PROT_COPY_FAIL_IF_EXECUTABLE) ? TRUE : FALSE;
13243 fault_type &= VM_PROT_ALL;
13244 original_fault_type = fault_type;
13245 if (contended) {
13246 *contended = false;
13247 }
13248
13249 *real_map = map;
13250
13251 fault_page_mask = MIN(VM_MAP_PAGE_MASK(map), PAGE_MASK);
13252 vaddr = VM_MAP_TRUNC_PAGE(vaddr, fault_page_mask);
13253
13254 RetryLookup:
13255 fault_type = original_fault_type;
13256
13257 /*
13258 * If the map has an interesting hint, try it before calling
13259 * full blown lookup routine.
13260 */
13261 entry = map->hint;
13262
13263 if ((entry == vm_map_to_entry(map)) ||
13264 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
13265 vm_map_entry_t tmp_entry;
13266
13267 /*
13268 * Entry was either not a valid hint, or the vaddr
13269 * was not contained in the entry, so do a full lookup.
13270 */
13271 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
13272 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13273 vm_map_unlock(cow_sub_map_parent);
13274 }
13275 if ((*real_map != map)
13276 && (*real_map != cow_sub_map_parent)) {
13277 vm_map_unlock(*real_map);
13278 }
13279 return KERN_INVALID_ADDRESS;
13280 }
13281
13282 entry = tmp_entry;
13283 }
13284 if (map == old_map) {
13285 old_start = entry->vme_start;
13286 old_end = entry->vme_end;
13287 }
13288
13289 /*
13290 * Handle submaps. Drop lock on upper map, submap is
13291 * returned locked.
13292 */
13293
13294 submap_needed_copy = FALSE;
13295 submap_recurse:
13296 if (entry->is_sub_map) {
13297 vm_map_offset_t local_vaddr;
13298 vm_map_offset_t end_delta;
13299 vm_map_offset_t start_delta;
13300 vm_map_entry_t submap_entry, saved_submap_entry;
13301 vm_object_offset_t submap_entry_offset;
13302 vm_object_size_t submap_entry_size;
13303 vm_prot_t subentry_protection;
13304 vm_prot_t subentry_max_protection;
13305 boolean_t subentry_no_copy_on_read;
13306 boolean_t mapped_needs_copy = FALSE;
13307 vm_map_version_t version;
13308
13309 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) >= VM_MAP_PAGE_SHIFT(map),
13310 "map %p (%d) entry %p submap %p (%d)\n",
13311 map, VM_MAP_PAGE_SHIFT(map), entry,
13312 VME_SUBMAP(entry), VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
13313
13314 local_vaddr = vaddr;
13315
13316 if ((entry->use_pmap &&
13317 !((fault_type & VM_PROT_WRITE) ||
13318 force_copy))) {
13319 /* if real_map equals map we unlock below */
13320 if ((*real_map != map) &&
13321 (*real_map != cow_sub_map_parent)) {
13322 vm_map_unlock(*real_map);
13323 }
13324 *real_map = VME_SUBMAP(entry);
13325 }
13326
13327 if (entry->needs_copy &&
13328 ((fault_type & VM_PROT_WRITE) ||
13329 force_copy)) {
13330 if (!mapped_needs_copy) {
13331 if (vm_map_lock_read_to_write(map)) {
13332 vm_map_lock_read(map);
13333 *real_map = map;
13334 goto RetryLookup;
13335 }
13336 vm_map_lock_read(VME_SUBMAP(entry));
13337 *var_map = VME_SUBMAP(entry);
13338 cow_sub_map_parent = map;
13339 /* reset base to map before cow object */
13340 /* this is the map which will accept */
13341 /* the new cow object */
13342 old_start = entry->vme_start;
13343 old_end = entry->vme_end;
13344 cow_parent_vaddr = vaddr;
13345 mapped_needs_copy = TRUE;
13346 } else {
13347 vm_map_lock_read(VME_SUBMAP(entry));
13348 *var_map = VME_SUBMAP(entry);
13349 if ((cow_sub_map_parent != map) &&
13350 (*real_map != map)) {
13351 vm_map_unlock(map);
13352 }
13353 }
13354 } else {
13355 if (entry->needs_copy) {
13356 submap_needed_copy = TRUE;
13357 }
13358 vm_map_lock_read(VME_SUBMAP(entry));
13359 *var_map = VME_SUBMAP(entry);
13360 /* leave map locked if it is a target */
13361 /* cow sub_map above otherwise, just */
13362 /* follow the maps down to the object */
13363 /* here we unlock knowing we are not */
13364 /* revisiting the map. */
13365 if ((*real_map != map) && (map != cow_sub_map_parent)) {
13366 vm_map_unlock_read(map);
13367 }
13368 }
13369
13370 map = *var_map;
13371
13372 /* calculate the offset in the submap for vaddr */
13373 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
13374 assertf(VM_MAP_PAGE_ALIGNED(local_vaddr, fault_page_mask),
13375 "local_vaddr 0x%llx entry->vme_start 0x%llx fault_page_mask 0x%llx\n",
13376 (uint64_t)local_vaddr, (uint64_t)entry->vme_start, (uint64_t)fault_page_mask);
13377
13378 RetrySubMap:
13379 if (!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
13380 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13381 vm_map_unlock(cow_sub_map_parent);
13382 }
13383 if ((*real_map != map)
13384 && (*real_map != cow_sub_map_parent)) {
13385 vm_map_unlock(*real_map);
13386 }
13387 *real_map = map;
13388 return KERN_INVALID_ADDRESS;
13389 }
13390
13391 /* find the attenuated shadow of the underlying object */
13392 /* on our target map */
13393
13394 /* in english the submap object may extend beyond the */
13395 /* region mapped by the entry or, may only fill a portion */
13396 /* of it. For our purposes, we only care if the object */
13397 /* doesn't fill. In this case the area which will */
13398 /* ultimately be clipped in the top map will only need */
13399 /* to be as big as the portion of the underlying entry */
13400 /* which is mapped */
13401 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
13402 submap_entry->vme_start - VME_OFFSET(entry) : 0;
13403
13404 end_delta =
13405 (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
13406 submap_entry->vme_end ?
13407 0 : (VME_OFFSET(entry) +
13408 (old_end - old_start))
13409 - submap_entry->vme_end;
13410
13411 old_start += start_delta;
13412 old_end -= end_delta;
13413
13414 if (submap_entry->is_sub_map) {
13415 entry = submap_entry;
13416 vaddr = local_vaddr;
13417 goto submap_recurse;
13418 }
13419
13420 if (((fault_type & VM_PROT_WRITE) ||
13421 force_copy)
13422 && cow_sub_map_parent) {
13423 vm_object_t sub_object, copy_object;
13424 vm_object_offset_t copy_offset;
13425 vm_map_offset_t local_start;
13426 vm_map_offset_t local_end;
13427 boolean_t object_copied = FALSE;
13428 vm_object_offset_t object_copied_offset = 0;
13429 boolean_t object_copied_needs_copy = FALSE;
13430 kern_return_t kr = KERN_SUCCESS;
13431
13432 if (vm_map_lock_read_to_write(map)) {
13433 vm_map_lock_read(map);
13434 old_start -= start_delta;
13435 old_end += end_delta;
13436 goto RetrySubMap;
13437 }
13438
13439
13440 sub_object = VME_OBJECT(submap_entry);
13441 if (sub_object == VM_OBJECT_NULL) {
13442 sub_object =
13443 vm_object_allocate(
13444 (vm_map_size_t)
13445 (submap_entry->vme_end -
13446 submap_entry->vme_start));
13447 VME_OBJECT_SET(submap_entry, sub_object);
13448 VME_OFFSET_SET(submap_entry, 0);
13449 assert(!submap_entry->is_sub_map);
13450 assert(submap_entry->use_pmap);
13451 }
13452 local_start = local_vaddr -
13453 (cow_parent_vaddr - old_start);
13454 local_end = local_vaddr +
13455 (old_end - cow_parent_vaddr);
13456 vm_map_clip_start(map, submap_entry, local_start);
13457 vm_map_clip_end(map, submap_entry, local_end);
13458 if (submap_entry->is_sub_map) {
13459 /* unnesting was done when clipping */
13460 assert(!submap_entry->use_pmap);
13461 }
13462
13463 /* This is the COW case, lets connect */
13464 /* an entry in our space to the underlying */
13465 /* object in the submap, bypassing the */
13466 /* submap. */
13467 submap_entry_offset = VME_OFFSET(submap_entry);
13468 submap_entry_size = submap_entry->vme_end - submap_entry->vme_start;
13469
13470 if ((submap_entry->wired_count != 0 ||
13471 sub_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) &&
13472 (submap_entry->protection & VM_PROT_EXECUTE) &&
13473 no_force_copy_if_executable) {
13474 // printf("FBDP map %p entry %p start 0x%llx end 0x%llx wired %d strat %d\n", map, submap_entry, (uint64_t)local_start, (uint64_t)local_end, submap_entry->wired_count, sub_object->copy_strategy);
13475 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13476 vm_map_unlock(cow_sub_map_parent);
13477 }
13478 if ((*real_map != map)
13479 && (*real_map != cow_sub_map_parent)) {
13480 vm_map_unlock(*real_map);
13481 }
13482 *real_map = map;
13483 kernel_triage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_SUBMAP_NO_COW_ON_EXECUTABLE), 0 /* arg */);
13484 vm_map_lock_write_to_read(map);
13485 kr = KERN_PROTECTION_FAILURE;
13486 DTRACE_VM4(submap_no_copy_executable,
13487 vm_map_t, map,
13488 vm_object_offset_t, submap_entry_offset,
13489 vm_object_size_t, submap_entry_size,
13490 int, kr);
13491 return kr;
13492 }
13493
13494 if (submap_entry->wired_count != 0) {
13495 vm_object_reference(sub_object);
13496
13497 assertf(VM_MAP_PAGE_ALIGNED(VME_OFFSET(submap_entry), VM_MAP_PAGE_MASK(map)),
13498 "submap_entry %p offset 0x%llx\n",
13499 submap_entry, VME_OFFSET(submap_entry));
13500
13501 DTRACE_VM6(submap_copy_slowly,
13502 vm_map_t, cow_sub_map_parent,
13503 vm_map_offset_t, vaddr,
13504 vm_map_t, map,
13505 vm_object_size_t, submap_entry_size,
13506 int, submap_entry->wired_count,
13507 int, sub_object->copy_strategy);
13508
13509 saved_submap_entry = submap_entry;
13510 version.main_timestamp = map->timestamp;
13511 vm_map_unlock(map); /* Increments timestamp by 1 */
13512 submap_entry = VM_MAP_ENTRY_NULL;
13513
13514 vm_object_lock(sub_object);
13515 kr = vm_object_copy_slowly(sub_object,
13516 submap_entry_offset,
13517 submap_entry_size,
13518 FALSE,
13519 ©_object);
13520 object_copied = TRUE;
13521 object_copied_offset = 0;
13522 /* 4k: account for extra offset in physical page */
13523 object_copied_offset += submap_entry_offset - vm_object_trunc_page(submap_entry_offset);
13524 object_copied_needs_copy = FALSE;
13525 vm_object_deallocate(sub_object);
13526
13527 vm_map_lock(map);
13528
13529 if (kr != KERN_SUCCESS &&
13530 kr != KERN_MEMORY_RESTART_COPY) {
13531 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13532 vm_map_unlock(cow_sub_map_parent);
13533 }
13534 if ((*real_map != map)
13535 && (*real_map != cow_sub_map_parent)) {
13536 vm_map_unlock(*real_map);
13537 }
13538 *real_map = map;
13539 vm_object_deallocate(copy_object);
13540 copy_object = VM_OBJECT_NULL;
13541 kernel_triage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_SUBMAP_COPY_SLOWLY_FAILED), 0 /* arg */);
13542 vm_map_lock_write_to_read(map);
13543 DTRACE_VM4(submap_copy_error_slowly,
13544 vm_object_t, sub_object,
13545 vm_object_offset_t, submap_entry_offset,
13546 vm_object_size_t, submap_entry_size,
13547 int, kr);
13548 vm_map_lookup_locked_copy_slowly_error++;
13549 return kr;
13550 }
13551
13552 if ((kr == KERN_SUCCESS) &&
13553 (version.main_timestamp + 1) == map->timestamp) {
13554 submap_entry = saved_submap_entry;
13555 } else {
13556 saved_submap_entry = NULL;
13557 old_start -= start_delta;
13558 old_end += end_delta;
13559 vm_object_deallocate(copy_object);
13560 copy_object = VM_OBJECT_NULL;
13561 vm_map_lock_write_to_read(map);
13562 vm_map_lookup_locked_copy_slowly_restart++;
13563 goto RetrySubMap;
13564 }
13565 vm_map_lookup_locked_copy_slowly_count++;
13566 vm_map_lookup_locked_copy_slowly_size += submap_entry_size;
13567 if (submap_entry_size > vm_map_lookup_locked_copy_slowly_max) {
13568 vm_map_lookup_locked_copy_slowly_max = submap_entry_size;
13569 }
13570 } else if (sub_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
13571 submap_entry_offset = VME_OFFSET(submap_entry);
13572 copy_object = VM_OBJECT_NULL;
13573 object_copied_offset = submap_entry_offset;
13574 object_copied_needs_copy = FALSE;
13575 DTRACE_VM6(submap_copy_strategically,
13576 vm_map_t, cow_sub_map_parent,
13577 vm_map_offset_t, vaddr,
13578 vm_map_t, map,
13579 vm_object_size_t, submap_entry_size,
13580 int, submap_entry->wired_count,
13581 int, sub_object->copy_strategy);
13582 kr = vm_object_copy_strategically(
13583 sub_object,
13584 submap_entry_offset,
13585 submap_entry->vme_end - submap_entry->vme_start,
13586 ©_object,
13587 &object_copied_offset,
13588 &object_copied_needs_copy);
13589 if (kr == KERN_MEMORY_RESTART_COPY) {
13590 old_start -= start_delta;
13591 old_end += end_delta;
13592 vm_object_deallocate(copy_object);
13593 copy_object = VM_OBJECT_NULL;
13594 vm_map_lock_write_to_read(map);
13595 vm_map_lookup_locked_copy_strategically_restart++;
13596 goto RetrySubMap;
13597 }
13598 if (kr != KERN_SUCCESS) {
13599 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13600 vm_map_unlock(cow_sub_map_parent);
13601 }
13602 if ((*real_map != map)
13603 && (*real_map != cow_sub_map_parent)) {
13604 vm_map_unlock(*real_map);
13605 }
13606 *real_map = map;
13607 vm_object_deallocate(copy_object);
13608 copy_object = VM_OBJECT_NULL;
13609 kernel_triage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_SUBMAP_COPY_STRAT_FAILED), 0 /* arg */);
13610 vm_map_lock_write_to_read(map);
13611 DTRACE_VM4(submap_copy_error_strategically,
13612 vm_object_t, sub_object,
13613 vm_object_offset_t, submap_entry_offset,
13614 vm_object_size_t, submap_entry_size,
13615 int, kr);
13616 vm_map_lookup_locked_copy_strategically_error++;
13617 return kr;
13618 }
13619 assert(copy_object != VM_OBJECT_NULL);
13620 assert(copy_object != sub_object);
13621 object_copied = TRUE;
13622 vm_map_lookup_locked_copy_strategically_count++;
13623 vm_map_lookup_locked_copy_strategically_size += submap_entry_size;
13624 if (submap_entry_size > vm_map_lookup_locked_copy_strategically_max) {
13625 vm_map_lookup_locked_copy_strategically_max = submap_entry_size;
13626 }
13627 } else {
13628 /* set up shadow object */
13629 object_copied = FALSE;
13630 copy_object = sub_object;
13631 vm_object_lock(sub_object);
13632 vm_object_reference_locked(sub_object);
13633 sub_object->shadowed = TRUE;
13634 vm_object_unlock(sub_object);
13635
13636 assert(submap_entry->wired_count == 0);
13637 submap_entry->needs_copy = TRUE;
13638
13639 prot = submap_entry->protection;
13640 assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
13641 prot = prot & ~VM_PROT_WRITE;
13642 assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
13643
13644 if (override_nx(old_map,
13645 VME_ALIAS(submap_entry))
13646 && prot) {
13647 prot |= VM_PROT_EXECUTE;
13648 }
13649
13650 vm_object_pmap_protect(
13651 sub_object,
13652 VME_OFFSET(submap_entry),
13653 submap_entry->vme_end -
13654 submap_entry->vme_start,
13655 (submap_entry->is_shared
13656 || map->mapped_in_other_pmaps) ?
13657 PMAP_NULL : map->pmap,
13658 VM_MAP_PAGE_SIZE(map),
13659 submap_entry->vme_start,
13660 prot);
13661 vm_map_lookup_locked_copy_shadow_count++;
13662 vm_map_lookup_locked_copy_shadow_size += submap_entry_size;
13663 if (submap_entry_size > vm_map_lookup_locked_copy_shadow_max) {
13664 vm_map_lookup_locked_copy_shadow_max = submap_entry_size;
13665 }
13666 }
13667
13668 /*
13669 * Adjust the fault offset to the submap entry.
13670 */
13671 copy_offset = (local_vaddr -
13672 submap_entry->vme_start +
13673 VME_OFFSET(submap_entry));
13674
13675 /* This works diffently than the */
13676 /* normal submap case. We go back */
13677 /* to the parent of the cow map and*/
13678 /* clip out the target portion of */
13679 /* the sub_map, substituting the */
13680 /* new copy object, */
13681
13682 subentry_protection = submap_entry->protection;
13683 subentry_max_protection = submap_entry->max_protection;
13684 subentry_no_copy_on_read = submap_entry->vme_no_copy_on_read;
13685 vm_map_unlock(map);
13686 submap_entry = NULL; /* not valid after map unlock */
13687
13688 local_start = old_start;
13689 local_end = old_end;
13690 map = cow_sub_map_parent;
13691 *var_map = cow_sub_map_parent;
13692 vaddr = cow_parent_vaddr;
13693 cow_sub_map_parent = NULL;
13694
13695 if (!vm_map_lookup_entry(map,
13696 vaddr, &entry)) {
13697 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13698 vm_map_unlock(cow_sub_map_parent);
13699 }
13700 if ((*real_map != map)
13701 && (*real_map != cow_sub_map_parent)) {
13702 vm_map_unlock(*real_map);
13703 }
13704 *real_map = map;
13705 vm_object_deallocate(
13706 copy_object);
13707 copy_object = VM_OBJECT_NULL;
13708 vm_map_lock_write_to_read(map);
13709 DTRACE_VM4(submap_lookup_post_unlock,
13710 uint64_t, (uint64_t)entry->vme_start,
13711 uint64_t, (uint64_t)entry->vme_end,
13712 vm_map_offset_t, vaddr,
13713 int, object_copied);
13714 return KERN_INVALID_ADDRESS;
13715 }
13716
13717 /* clip out the portion of space */
13718 /* mapped by the sub map which */
13719 /* corresponds to the underlying */
13720 /* object */
13721
13722 /*
13723 * Clip (and unnest) the smallest nested chunk
13724 * possible around the faulting address...
13725 */
13726 local_start = vaddr & ~(pmap_shared_region_size_min(map->pmap) - 1);
13727 local_end = local_start + pmap_shared_region_size_min(map->pmap);
13728 /*
13729 * ... but don't go beyond the "old_start" to "old_end"
13730 * range, to avoid spanning over another VM region
13731 * with a possibly different VM object and/or offset.
13732 */
13733 if (local_start < old_start) {
13734 local_start = old_start;
13735 }
13736 if (local_end > old_end) {
13737 local_end = old_end;
13738 }
13739 /*
13740 * Adjust copy_offset to the start of the range.
13741 */
13742 copy_offset -= (vaddr - local_start);
13743
13744 vm_map_clip_start(map, entry, local_start);
13745 vm_map_clip_end(map, entry, local_end);
13746 if (entry->is_sub_map) {
13747 /* unnesting was done when clipping */
13748 assert(!entry->use_pmap);
13749 }
13750
13751 /* substitute copy object for */
13752 /* shared map entry */
13753 vm_map_deallocate(VME_SUBMAP(entry));
13754 assert(!entry->iokit_acct);
13755 entry->is_sub_map = FALSE;
13756 entry->use_pmap = TRUE;
13757 VME_OBJECT_SET(entry, copy_object);
13758
13759 /* propagate the submap entry's protections */
13760 if (entry->protection != VM_PROT_READ) {
13761 /*
13762 * Someone has already altered the top entry's
13763 * protections via vm_protect(VM_PROT_COPY).
13764 * Respect these new values and ignore the
13765 * submap entry's protections.
13766 */
13767 } else {
13768 /*
13769 * Regular copy-on-write: propagate the submap
13770 * entry's protections to the top map entry.
13771 */
13772 entry->protection |= subentry_protection;
13773 }
13774 entry->max_protection |= subentry_max_protection;
13775 /* propagate no_copy_on_read */
13776 entry->vme_no_copy_on_read = subentry_no_copy_on_read;
13777
13778 if ((entry->protection & VM_PROT_WRITE) &&
13779 (entry->protection & VM_PROT_EXECUTE) &&
13780 #if XNU_TARGET_OS_OSX
13781 map->pmap != kernel_pmap &&
13782 (vm_map_cs_enforcement(map)
13783 #if __arm64__
13784 || !VM_MAP_IS_EXOTIC(map)
13785 #endif /* __arm64__ */
13786 ) &&
13787 #endif /* XNU_TARGET_OS_OSX */
13788 !(entry->used_for_jit) &&
13789 VM_MAP_POLICY_WX_STRIP_X(map)) {
13790 DTRACE_VM3(cs_wx,
13791 uint64_t, (uint64_t)entry->vme_start,
13792 uint64_t, (uint64_t)entry->vme_end,
13793 vm_prot_t, entry->protection);
13794 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
13795 proc_selfpid(),
13796 (current_task()->bsd_info
13797 ? proc_name_address(current_task()->bsd_info)
13798 : "?"),
13799 __FUNCTION__);
13800 entry->protection &= ~VM_PROT_EXECUTE;
13801 }
13802
13803 if (object_copied) {
13804 VME_OFFSET_SET(entry, local_start - old_start + object_copied_offset);
13805 entry->needs_copy = object_copied_needs_copy;
13806 entry->is_shared = FALSE;
13807 } else {
13808 assert(VME_OBJECT(entry) != VM_OBJECT_NULL);
13809 assert(VME_OBJECT(entry)->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
13810 assert(entry->wired_count == 0);
13811 VME_OFFSET_SET(entry, copy_offset);
13812 entry->needs_copy = TRUE;
13813 if (map != old_map) {
13814 entry->is_shared = TRUE;
13815 }
13816 }
13817 if (entry->inheritance == VM_INHERIT_SHARE) {
13818 entry->inheritance = VM_INHERIT_COPY;
13819 }
13820
13821 vm_map_lock_write_to_read(map);
13822 } else {
13823 if ((cow_sub_map_parent)
13824 && (cow_sub_map_parent != *real_map)
13825 && (cow_sub_map_parent != map)) {
13826 vm_map_unlock(cow_sub_map_parent);
13827 }
13828 entry = submap_entry;
13829 vaddr = local_vaddr;
13830 }
13831 }
13832
13833 /*
13834 * Check whether this task is allowed to have
13835 * this page.
13836 */
13837
13838 prot = entry->protection;
13839
13840 if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
13841 /*
13842 * HACK -- if not a stack, then allow execution
13843 */
13844 prot |= VM_PROT_EXECUTE;
13845 }
13846
13847 if (mask_protections) {
13848 fault_type &= prot;
13849 if (fault_type == VM_PROT_NONE) {
13850 goto protection_failure;
13851 }
13852 }
13853 if (((fault_type & prot) != fault_type)
13854 #if __arm64__
13855 /* prefetch abort in execute-only page */
13856 && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
13857 #elif defined(__x86_64__)
13858 /* Consider the UEXEC bit when handling an EXECUTE fault */
13859 && !((fault_type & VM_PROT_EXECUTE) && !(prot & VM_PROT_EXECUTE) && (prot & VM_PROT_UEXEC))
13860 #endif
13861 ) {
13862 protection_failure:
13863 if (*real_map != map) {
13864 vm_map_unlock(*real_map);
13865 }
13866 *real_map = map;
13867
13868 if ((fault_type & VM_PROT_EXECUTE) && prot) {
13869 log_stack_execution_failure((addr64_t)vaddr, prot);
13870 }
13871
13872 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
13873 DTRACE_VM3(prot_fault_detailed, vm_prot_t, fault_type, vm_prot_t, prot, void *, vaddr);
13874 /*
13875 * Noisy (esp. internally) and can be inferred from CrashReports. So OFF for now.
13876 *
13877 * kernel_triage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_PROTECTION_FAILURE), 0);
13878 */
13879 return KERN_PROTECTION_FAILURE;
13880 }
13881
13882 /*
13883 * If this page is not pageable, we have to get
13884 * it for all possible accesses.
13885 */
13886
13887 *wired = (entry->wired_count != 0);
13888 if (*wired) {
13889 fault_type = prot;
13890 }
13891
13892 /*
13893 * If the entry was copy-on-write, we either ...
13894 */
13895
13896 if (entry->needs_copy) {
13897 /*
13898 * If we want to write the page, we may as well
13899 * handle that now since we've got the map locked.
13900 *
13901 * If we don't need to write the page, we just
13902 * demote the permissions allowed.
13903 */
13904
13905 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
13906 /*
13907 * Make a new object, and place it in the
13908 * object chain. Note that no new references
13909 * have appeared -- one just moved from the
13910 * map to the new object.
13911 */
13912
13913 if (vm_map_lock_read_to_write(map)) {
13914 vm_map_lock_read(map);
13915 goto RetryLookup;
13916 }
13917
13918 if (VME_OBJECT(entry)->shadowed == FALSE) {
13919 vm_object_lock(VME_OBJECT(entry));
13920 VME_OBJECT(entry)->shadowed = TRUE;
13921 vm_object_unlock(VME_OBJECT(entry));
13922 }
13923 VME_OBJECT_SHADOW(entry,
13924 (vm_map_size_t) (entry->vme_end -
13925 entry->vme_start));
13926 entry->needs_copy = FALSE;
13927
13928 vm_map_lock_write_to_read(map);
13929 }
13930 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
13931 /*
13932 * We're attempting to read a copy-on-write
13933 * page -- don't allow writes.
13934 */
13935
13936 prot &= (~VM_PROT_WRITE);
13937 }
13938 }
13939
13940 if (submap_needed_copy && (prot & VM_PROT_WRITE)) {
13941 /*
13942 * We went through a "needs_copy" submap without triggering
13943 * a copy, so granting write access to the page would bypass
13944 * that submap's "needs_copy".
13945 */
13946 assert(!(fault_type & VM_PROT_WRITE));
13947 assert(!*wired);
13948 assert(!force_copy);
13949 // printf("FBDP %d[%s] submap_needed_copy for %p 0x%llx\n", proc_selfpid(), proc_name_address(current_task()->bsd_info), map, vaddr);
13950 prot &= ~VM_PROT_WRITE;
13951 }
13952
13953 /*
13954 * Create an object if necessary.
13955 */
13956 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
13957 if (vm_map_lock_read_to_write(map)) {
13958 vm_map_lock_read(map);
13959 goto RetryLookup;
13960 }
13961
13962 VME_OBJECT_SET(entry,
13963 vm_object_allocate(
13964 (vm_map_size_t)(entry->vme_end -
13965 entry->vme_start)));
13966 VME_OFFSET_SET(entry, 0);
13967 assert(entry->use_pmap);
13968 vm_map_lock_write_to_read(map);
13969 }
13970
13971 /*
13972 * Return the object/offset from this entry. If the entry
13973 * was copy-on-write or empty, it has been fixed up. Also
13974 * return the protection.
13975 */
13976
13977 *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
13978 *object = VME_OBJECT(entry);
13979 *out_prot = prot;
13980 KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_MAP_LOOKUP_OBJECT), VM_KERNEL_UNSLIDE_OR_PERM(*object), (unsigned long) VME_ALIAS(entry), 0, 0);
13981
13982 if (fault_info) {
13983 fault_info->interruptible = THREAD_UNINT; /* for now... */
13984 /* ... the caller will change "interruptible" if needed */
13985 fault_info->cluster_size = 0;
13986 fault_info->user_tag = VME_ALIAS(entry);
13987 fault_info->pmap_options = 0;
13988 if (entry->iokit_acct ||
13989 (!entry->is_sub_map && !entry->use_pmap)) {
13990 fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
13991 }
13992 fault_info->behavior = entry->behavior;
13993 fault_info->lo_offset = VME_OFFSET(entry);
13994 fault_info->hi_offset =
13995 (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
13996 fault_info->no_cache = entry->no_cache;
13997 fault_info->stealth = FALSE;
13998 fault_info->io_sync = FALSE;
13999 if (entry->used_for_jit ||
14000 entry->vme_resilient_codesign) {
14001 fault_info->cs_bypass = TRUE;
14002 } else {
14003 fault_info->cs_bypass = FALSE;
14004 }
14005 fault_info->pmap_cs_associated = FALSE;
14006 #if CONFIG_PMAP_CS
14007 if (entry->pmap_cs_associated) {
14008 /*
14009 * The pmap layer will validate this page
14010 * before allowing it to be executed from.
14011 */
14012 fault_info->pmap_cs_associated = TRUE;
14013 }
14014 #endif /* CONFIG_PMAP_CS */
14015 fault_info->mark_zf_absent = FALSE;
14016 fault_info->batch_pmap_op = FALSE;
14017 fault_info->resilient_media = entry->vme_resilient_media;
14018 fault_info->no_copy_on_read = entry->vme_no_copy_on_read;
14019 if (entry->translated_allow_execute) {
14020 fault_info->pmap_options |= PMAP_OPTIONS_TRANSLATED_ALLOW_EXECUTE;
14021 }
14022 }
14023
14024 /*
14025 * Lock the object to prevent it from disappearing
14026 */
14027 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE) {
14028 if (contended == NULL) {
14029 vm_object_lock(*object);
14030 } else {
14031 *contended = vm_object_lock_check_contended(*object);
14032 }
14033 } else {
14034 vm_object_lock_shared(*object);
14035 }
14036
14037 /*
14038 * Save the version number
14039 */
14040
14041 out_version->main_timestamp = map->timestamp;
14042
14043 return KERN_SUCCESS;
14044 }
14045
14046
14047 /*
14048 * vm_map_verify:
14049 *
14050 * Verifies that the map in question has not changed
14051 * since the given version. The map has to be locked
14052 * ("shared" mode is fine) before calling this function
14053 * and it will be returned locked too.
14054 */
14055 boolean_t
vm_map_verify(vm_map_t map,vm_map_version_t * version)14056 vm_map_verify(
14057 vm_map_t map,
14058 vm_map_version_t *version) /* REF */
14059 {
14060 boolean_t result;
14061
14062 vm_map_lock_assert_held(map);
14063 result = (map->timestamp == version->main_timestamp);
14064
14065 return result;
14066 }
14067
14068 /*
14069 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
14070 * Goes away after regular vm_region_recurse function migrates to
14071 * 64 bits
14072 * vm_region_recurse: A form of vm_region which follows the
14073 * submaps in a target map
14074 *
14075 */
14076
14077 kern_return_t
vm_map_region_recurse_64(vm_map_t map,vm_map_offset_t * address,vm_map_size_t * size,natural_t * nesting_depth,vm_region_submap_info_64_t submap_info,mach_msg_type_number_t * count)14078 vm_map_region_recurse_64(
14079 vm_map_t map,
14080 vm_map_offset_t *address, /* IN/OUT */
14081 vm_map_size_t *size, /* OUT */
14082 natural_t *nesting_depth, /* IN/OUT */
14083 vm_region_submap_info_64_t submap_info, /* IN/OUT */
14084 mach_msg_type_number_t *count) /* IN/OUT */
14085 {
14086 mach_msg_type_number_t original_count;
14087 vm_region_extended_info_data_t extended;
14088 vm_map_entry_t tmp_entry;
14089 vm_map_offset_t user_address;
14090 unsigned int user_max_depth;
14091
14092 /*
14093 * "curr_entry" is the VM map entry preceding or including the
14094 * address we're looking for.
14095 * "curr_map" is the map or sub-map containing "curr_entry".
14096 * "curr_address" is the equivalent of the top map's "user_address"
14097 * in the current map.
14098 * "curr_offset" is the cumulated offset of "curr_map" in the
14099 * target task's address space.
14100 * "curr_depth" is the depth of "curr_map" in the chain of
14101 * sub-maps.
14102 *
14103 * "curr_max_below" and "curr_max_above" limit the range (around
14104 * "curr_address") we should take into account in the current (sub)map.
14105 * They limit the range to what's visible through the map entries
14106 * we've traversed from the top map to the current map.
14107 *
14108 */
14109 vm_map_entry_t curr_entry;
14110 vm_map_address_t curr_address;
14111 vm_map_offset_t curr_offset;
14112 vm_map_t curr_map;
14113 unsigned int curr_depth;
14114 vm_map_offset_t curr_max_below, curr_max_above;
14115 vm_map_offset_t curr_skip;
14116
14117 /*
14118 * "next_" is the same as "curr_" but for the VM region immediately
14119 * after the address we're looking for. We need to keep track of this
14120 * too because we want to return info about that region if the
14121 * address we're looking for is not mapped.
14122 */
14123 vm_map_entry_t next_entry;
14124 vm_map_offset_t next_offset;
14125 vm_map_offset_t next_address;
14126 vm_map_t next_map;
14127 unsigned int next_depth;
14128 vm_map_offset_t next_max_below, next_max_above;
14129 vm_map_offset_t next_skip;
14130
14131 boolean_t look_for_pages;
14132 vm_region_submap_short_info_64_t short_info;
14133 boolean_t do_region_footprint;
14134 int effective_page_size, effective_page_shift;
14135 boolean_t submap_needed_copy;
14136
14137 if (map == VM_MAP_NULL) {
14138 /* no address space to work on */
14139 return KERN_INVALID_ARGUMENT;
14140 }
14141
14142 effective_page_shift = vm_self_region_page_shift(map);
14143 effective_page_size = (1 << effective_page_shift);
14144
14145 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
14146 /*
14147 * "info" structure is not big enough and
14148 * would overflow
14149 */
14150 return KERN_INVALID_ARGUMENT;
14151 }
14152
14153 do_region_footprint = task_self_region_footprint();
14154 original_count = *count;
14155
14156 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
14157 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
14158 look_for_pages = FALSE;
14159 short_info = (vm_region_submap_short_info_64_t) submap_info;
14160 submap_info = NULL;
14161 } else {
14162 look_for_pages = TRUE;
14163 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
14164 short_info = NULL;
14165
14166 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
14167 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
14168 }
14169 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
14170 *count = VM_REGION_SUBMAP_INFO_V2_COUNT_64;
14171 }
14172 }
14173
14174 user_address = *address;
14175 user_max_depth = *nesting_depth;
14176 submap_needed_copy = FALSE;
14177
14178 if (not_in_kdp) {
14179 vm_map_lock_read(map);
14180 }
14181
14182 recurse_again:
14183 curr_entry = NULL;
14184 curr_map = map;
14185 curr_address = user_address;
14186 curr_offset = 0;
14187 curr_skip = 0;
14188 curr_depth = 0;
14189 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
14190 curr_max_below = curr_address;
14191
14192 next_entry = NULL;
14193 next_map = NULL;
14194 next_address = 0;
14195 next_offset = 0;
14196 next_skip = 0;
14197 next_depth = 0;
14198 next_max_above = (vm_map_offset_t) -1;
14199 next_max_below = (vm_map_offset_t) -1;
14200
14201 for (;;) {
14202 if (vm_map_lookup_entry(curr_map,
14203 curr_address,
14204 &tmp_entry)) {
14205 /* tmp_entry contains the address we're looking for */
14206 curr_entry = tmp_entry;
14207 } else {
14208 vm_map_offset_t skip;
14209 /*
14210 * The address is not mapped. "tmp_entry" is the
14211 * map entry preceding the address. We want the next
14212 * one, if it exists.
14213 */
14214 curr_entry = tmp_entry->vme_next;
14215
14216 if (curr_entry == vm_map_to_entry(curr_map) ||
14217 (curr_entry->vme_start >=
14218 curr_address + curr_max_above)) {
14219 /* no next entry at this level: stop looking */
14220 if (not_in_kdp) {
14221 vm_map_unlock_read(curr_map);
14222 }
14223 curr_entry = NULL;
14224 curr_map = NULL;
14225 curr_skip = 0;
14226 curr_offset = 0;
14227 curr_depth = 0;
14228 curr_max_above = 0;
14229 curr_max_below = 0;
14230 break;
14231 }
14232
14233 /* adjust current address and offset */
14234 skip = curr_entry->vme_start - curr_address;
14235 curr_address = curr_entry->vme_start;
14236 curr_skip += skip;
14237 curr_offset += skip;
14238 curr_max_above -= skip;
14239 curr_max_below = 0;
14240 }
14241
14242 /*
14243 * Is the next entry at this level closer to the address (or
14244 * deeper in the submap chain) than the one we had
14245 * so far ?
14246 */
14247 tmp_entry = curr_entry->vme_next;
14248 if (tmp_entry == vm_map_to_entry(curr_map)) {
14249 /* no next entry at this level */
14250 } else if (tmp_entry->vme_start >=
14251 curr_address + curr_max_above) {
14252 /*
14253 * tmp_entry is beyond the scope of what we mapped of
14254 * this submap in the upper level: ignore it.
14255 */
14256 } else if ((next_entry == NULL) ||
14257 (tmp_entry->vme_start + curr_offset <=
14258 next_entry->vme_start + next_offset)) {
14259 /*
14260 * We didn't have a "next_entry" or this one is
14261 * closer to the address we're looking for:
14262 * use this "tmp_entry" as the new "next_entry".
14263 */
14264 if (next_entry != NULL) {
14265 /* unlock the last "next_map" */
14266 if (next_map != curr_map && not_in_kdp) {
14267 vm_map_unlock_read(next_map);
14268 }
14269 }
14270 next_entry = tmp_entry;
14271 next_map = curr_map;
14272 next_depth = curr_depth;
14273 next_address = next_entry->vme_start;
14274 next_skip = curr_skip;
14275 next_skip += (next_address - curr_address);
14276 next_offset = curr_offset;
14277 next_offset += (next_address - curr_address);
14278 next_max_above = MIN(next_max_above, curr_max_above);
14279 next_max_above = MIN(next_max_above,
14280 next_entry->vme_end - next_address);
14281 next_max_below = MIN(next_max_below, curr_max_below);
14282 next_max_below = MIN(next_max_below,
14283 next_address - next_entry->vme_start);
14284 }
14285
14286 /*
14287 * "curr_max_{above,below}" allow us to keep track of the
14288 * portion of the submap that is actually mapped at this level:
14289 * the rest of that submap is irrelevant to us, since it's not
14290 * mapped here.
14291 * The relevant portion of the map starts at
14292 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
14293 */
14294 curr_max_above = MIN(curr_max_above,
14295 curr_entry->vme_end - curr_address);
14296 curr_max_below = MIN(curr_max_below,
14297 curr_address - curr_entry->vme_start);
14298
14299 if (!curr_entry->is_sub_map ||
14300 curr_depth >= user_max_depth) {
14301 /*
14302 * We hit a leaf map or we reached the maximum depth
14303 * we could, so stop looking. Keep the current map
14304 * locked.
14305 */
14306 break;
14307 }
14308
14309 /*
14310 * Get down to the next submap level.
14311 */
14312
14313 if (curr_entry->needs_copy) {
14314 /* everything below this is effectively copy-on-write */
14315 submap_needed_copy = TRUE;
14316 }
14317
14318 /*
14319 * Lock the next level and unlock the current level,
14320 * unless we need to keep it locked to access the "next_entry"
14321 * later.
14322 */
14323 if (not_in_kdp) {
14324 vm_map_lock_read(VME_SUBMAP(curr_entry));
14325 }
14326 if (curr_map == next_map) {
14327 /* keep "next_map" locked in case we need it */
14328 } else {
14329 /* release this map */
14330 if (not_in_kdp) {
14331 vm_map_unlock_read(curr_map);
14332 }
14333 }
14334
14335 /*
14336 * Adjust the offset. "curr_entry" maps the submap
14337 * at relative address "curr_entry->vme_start" in the
14338 * curr_map but skips the first "VME_OFFSET(curr_entry)"
14339 * bytes of the submap.
14340 * "curr_offset" always represents the offset of a virtual
14341 * address in the curr_map relative to the absolute address
14342 * space (i.e. the top-level VM map).
14343 */
14344 curr_offset +=
14345 (VME_OFFSET(curr_entry) - curr_entry->vme_start);
14346 curr_address = user_address + curr_offset;
14347 /* switch to the submap */
14348 curr_map = VME_SUBMAP(curr_entry);
14349 curr_depth++;
14350 curr_entry = NULL;
14351 }
14352
14353 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
14354 // so probably should be a real 32b ID vs. ptr.
14355 // Current users just check for equality
14356
14357 if (curr_entry == NULL) {
14358 /* no VM region contains the address... */
14359
14360 if (do_region_footprint && /* we want footprint numbers */
14361 next_entry == NULL && /* & there are no more regions */
14362 /* & we haven't already provided our fake region: */
14363 user_address <= vm_map_last_entry(map)->vme_end) {
14364 ledger_amount_t ledger_resident, ledger_compressed;
14365
14366 /*
14367 * Add a fake memory region to account for
14368 * purgeable and/or ledger-tagged memory that
14369 * counts towards this task's memory footprint,
14370 * i.e. the resident/compressed pages of non-volatile
14371 * objects owned by that task.
14372 */
14373 task_ledgers_footprint(map->pmap->ledger,
14374 &ledger_resident,
14375 &ledger_compressed);
14376 if (ledger_resident + ledger_compressed == 0) {
14377 /* no purgeable memory usage to report */
14378 return KERN_INVALID_ADDRESS;
14379 }
14380 /* fake region to show nonvolatile footprint */
14381 if (look_for_pages) {
14382 submap_info->protection = VM_PROT_DEFAULT;
14383 submap_info->max_protection = VM_PROT_DEFAULT;
14384 submap_info->inheritance = VM_INHERIT_DEFAULT;
14385 submap_info->offset = 0;
14386 submap_info->user_tag = -1;
14387 submap_info->pages_resident = (unsigned int) (ledger_resident / effective_page_size);
14388 submap_info->pages_shared_now_private = 0;
14389 submap_info->pages_swapped_out = (unsigned int) (ledger_compressed / effective_page_size);
14390 submap_info->pages_dirtied = submap_info->pages_resident;
14391 submap_info->ref_count = 1;
14392 submap_info->shadow_depth = 0;
14393 submap_info->external_pager = 0;
14394 submap_info->share_mode = SM_PRIVATE;
14395 if (submap_needed_copy) {
14396 submap_info->share_mode = SM_COW;
14397 }
14398 submap_info->is_submap = 0;
14399 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
14400 submap_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
14401 submap_info->user_wired_count = 0;
14402 submap_info->pages_reusable = 0;
14403 } else {
14404 short_info->user_tag = -1;
14405 short_info->offset = 0;
14406 short_info->protection = VM_PROT_DEFAULT;
14407 short_info->inheritance = VM_INHERIT_DEFAULT;
14408 short_info->max_protection = VM_PROT_DEFAULT;
14409 short_info->behavior = VM_BEHAVIOR_DEFAULT;
14410 short_info->user_wired_count = 0;
14411 short_info->is_submap = 0;
14412 short_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
14413 short_info->external_pager = 0;
14414 short_info->shadow_depth = 0;
14415 short_info->share_mode = SM_PRIVATE;
14416 if (submap_needed_copy) {
14417 short_info->share_mode = SM_COW;
14418 }
14419 short_info->ref_count = 1;
14420 }
14421 *nesting_depth = 0;
14422 *size = (vm_map_size_t) (ledger_resident + ledger_compressed);
14423 // *address = user_address;
14424 *address = vm_map_last_entry(map)->vme_end;
14425 return KERN_SUCCESS;
14426 }
14427
14428 if (next_entry == NULL) {
14429 /* ... and no VM region follows it either */
14430 return KERN_INVALID_ADDRESS;
14431 }
14432 /* ... gather info about the next VM region */
14433 curr_entry = next_entry;
14434 curr_map = next_map; /* still locked ... */
14435 curr_address = next_address;
14436 curr_skip = next_skip;
14437 curr_offset = next_offset;
14438 curr_depth = next_depth;
14439 curr_max_above = next_max_above;
14440 curr_max_below = next_max_below;
14441 } else {
14442 /* we won't need "next_entry" after all */
14443 if (next_entry != NULL) {
14444 /* release "next_map" */
14445 if (next_map != curr_map && not_in_kdp) {
14446 vm_map_unlock_read(next_map);
14447 }
14448 }
14449 }
14450 next_entry = NULL;
14451 next_map = NULL;
14452 next_offset = 0;
14453 next_skip = 0;
14454 next_depth = 0;
14455 next_max_below = -1;
14456 next_max_above = -1;
14457
14458 if (curr_entry->is_sub_map &&
14459 curr_depth < user_max_depth) {
14460 /*
14461 * We're not as deep as we could be: we must have
14462 * gone back up after not finding anything mapped
14463 * below the original top-level map entry's.
14464 * Let's move "curr_address" forward and recurse again.
14465 */
14466 user_address = curr_address;
14467 goto recurse_again;
14468 }
14469
14470 *nesting_depth = curr_depth;
14471 *size = curr_max_above + curr_max_below;
14472 *address = user_address + curr_skip - curr_max_below;
14473
14474 if (look_for_pages) {
14475 submap_info->user_tag = VME_ALIAS(curr_entry);
14476 submap_info->offset = VME_OFFSET(curr_entry);
14477 submap_info->protection = curr_entry->protection;
14478 submap_info->inheritance = curr_entry->inheritance;
14479 submap_info->max_protection = curr_entry->max_protection;
14480 submap_info->behavior = curr_entry->behavior;
14481 submap_info->user_wired_count = curr_entry->user_wired_count;
14482 submap_info->is_submap = curr_entry->is_sub_map;
14483 submap_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
14484 } else {
14485 short_info->user_tag = VME_ALIAS(curr_entry);
14486 short_info->offset = VME_OFFSET(curr_entry);
14487 short_info->protection = curr_entry->protection;
14488 short_info->inheritance = curr_entry->inheritance;
14489 short_info->max_protection = curr_entry->max_protection;
14490 short_info->behavior = curr_entry->behavior;
14491 short_info->user_wired_count = curr_entry->user_wired_count;
14492 short_info->is_submap = curr_entry->is_sub_map;
14493 short_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
14494 }
14495
14496 extended.pages_resident = 0;
14497 extended.pages_swapped_out = 0;
14498 extended.pages_shared_now_private = 0;
14499 extended.pages_dirtied = 0;
14500 extended.pages_reusable = 0;
14501 extended.external_pager = 0;
14502 extended.shadow_depth = 0;
14503 extended.share_mode = SM_EMPTY;
14504 extended.ref_count = 0;
14505
14506 if (not_in_kdp) {
14507 if (!curr_entry->is_sub_map) {
14508 vm_map_offset_t range_start, range_end;
14509 range_start = MAX((curr_address - curr_max_below),
14510 curr_entry->vme_start);
14511 range_end = MIN((curr_address + curr_max_above),
14512 curr_entry->vme_end);
14513 vm_map_region_walk(curr_map,
14514 range_start,
14515 curr_entry,
14516 (VME_OFFSET(curr_entry) +
14517 (range_start -
14518 curr_entry->vme_start)),
14519 range_end - range_start,
14520 &extended,
14521 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
14522 if (extended.external_pager &&
14523 extended.ref_count == 2 &&
14524 extended.share_mode == SM_SHARED) {
14525 extended.share_mode = SM_PRIVATE;
14526 }
14527 if (submap_needed_copy) {
14528 extended.share_mode = SM_COW;
14529 }
14530 } else {
14531 if (curr_entry->use_pmap) {
14532 extended.share_mode = SM_TRUESHARED;
14533 } else {
14534 extended.share_mode = SM_PRIVATE;
14535 }
14536 extended.ref_count = os_ref_get_count(&VME_SUBMAP(curr_entry)->map_refcnt);
14537 }
14538 }
14539
14540 if (look_for_pages) {
14541 submap_info->pages_resident = extended.pages_resident;
14542 submap_info->pages_swapped_out = extended.pages_swapped_out;
14543 submap_info->pages_shared_now_private =
14544 extended.pages_shared_now_private;
14545 submap_info->pages_dirtied = extended.pages_dirtied;
14546 submap_info->external_pager = extended.external_pager;
14547 submap_info->shadow_depth = extended.shadow_depth;
14548 submap_info->share_mode = extended.share_mode;
14549 submap_info->ref_count = extended.ref_count;
14550
14551 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
14552 submap_info->pages_reusable = extended.pages_reusable;
14553 }
14554 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
14555 submap_info->object_id_full = (vm_object_id_t) (VME_OBJECT(curr_entry) != NULL) ? VM_KERNEL_ADDRPERM(VME_OBJECT(curr_entry)) : 0ULL;
14556 }
14557 } else {
14558 short_info->external_pager = extended.external_pager;
14559 short_info->shadow_depth = extended.shadow_depth;
14560 short_info->share_mode = extended.share_mode;
14561 short_info->ref_count = extended.ref_count;
14562 }
14563
14564 if (not_in_kdp) {
14565 vm_map_unlock_read(curr_map);
14566 }
14567
14568 return KERN_SUCCESS;
14569 }
14570
14571 /*
14572 * vm_region:
14573 *
14574 * User call to obtain information about a region in
14575 * a task's address map. Currently, only one flavor is
14576 * supported.
14577 *
14578 * XXX The reserved and behavior fields cannot be filled
14579 * in until the vm merge from the IK is completed, and
14580 * vm_reserve is implemented.
14581 */
14582
14583 kern_return_t
vm_map_region(vm_map_t map,vm_map_offset_t * address,vm_map_size_t * size,vm_region_flavor_t flavor,vm_region_info_t info,mach_msg_type_number_t * count,mach_port_t * object_name)14584 vm_map_region(
14585 vm_map_t map,
14586 vm_map_offset_t *address, /* IN/OUT */
14587 vm_map_size_t *size, /* OUT */
14588 vm_region_flavor_t flavor, /* IN */
14589 vm_region_info_t info, /* OUT */
14590 mach_msg_type_number_t *count, /* IN/OUT */
14591 mach_port_t *object_name) /* OUT */
14592 {
14593 vm_map_entry_t tmp_entry;
14594 vm_map_entry_t entry;
14595 vm_map_offset_t start;
14596
14597 if (map == VM_MAP_NULL) {
14598 return KERN_INVALID_ARGUMENT;
14599 }
14600
14601 switch (flavor) {
14602 case VM_REGION_BASIC_INFO:
14603 /* legacy for old 32-bit objects info */
14604 {
14605 vm_region_basic_info_t basic;
14606
14607 if (*count < VM_REGION_BASIC_INFO_COUNT) {
14608 return KERN_INVALID_ARGUMENT;
14609 }
14610
14611 basic = (vm_region_basic_info_t) info;
14612 *count = VM_REGION_BASIC_INFO_COUNT;
14613
14614 vm_map_lock_read(map);
14615
14616 start = *address;
14617 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14618 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14619 vm_map_unlock_read(map);
14620 return KERN_INVALID_ADDRESS;
14621 }
14622 } else {
14623 entry = tmp_entry;
14624 }
14625
14626 start = entry->vme_start;
14627
14628 basic->offset = (uint32_t)VME_OFFSET(entry);
14629 basic->protection = entry->protection;
14630 basic->inheritance = entry->inheritance;
14631 basic->max_protection = entry->max_protection;
14632 basic->behavior = entry->behavior;
14633 basic->user_wired_count = entry->user_wired_count;
14634 basic->reserved = entry->is_sub_map;
14635 *address = start;
14636 *size = (entry->vme_end - start);
14637
14638 if (object_name) {
14639 *object_name = IP_NULL;
14640 }
14641 if (entry->is_sub_map) {
14642 basic->shared = FALSE;
14643 } else {
14644 basic->shared = entry->is_shared;
14645 }
14646
14647 vm_map_unlock_read(map);
14648 return KERN_SUCCESS;
14649 }
14650
14651 case VM_REGION_BASIC_INFO_64:
14652 {
14653 vm_region_basic_info_64_t basic;
14654
14655 if (*count < VM_REGION_BASIC_INFO_COUNT_64) {
14656 return KERN_INVALID_ARGUMENT;
14657 }
14658
14659 basic = (vm_region_basic_info_64_t) info;
14660 *count = VM_REGION_BASIC_INFO_COUNT_64;
14661
14662 vm_map_lock_read(map);
14663
14664 start = *address;
14665 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14666 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14667 vm_map_unlock_read(map);
14668 return KERN_INVALID_ADDRESS;
14669 }
14670 } else {
14671 entry = tmp_entry;
14672 }
14673
14674 start = entry->vme_start;
14675
14676 basic->offset = VME_OFFSET(entry);
14677 basic->protection = entry->protection;
14678 basic->inheritance = entry->inheritance;
14679 basic->max_protection = entry->max_protection;
14680 basic->behavior = entry->behavior;
14681 basic->user_wired_count = entry->user_wired_count;
14682 basic->reserved = entry->is_sub_map;
14683 *address = start;
14684 *size = (entry->vme_end - start);
14685
14686 if (object_name) {
14687 *object_name = IP_NULL;
14688 }
14689 if (entry->is_sub_map) {
14690 basic->shared = FALSE;
14691 } else {
14692 basic->shared = entry->is_shared;
14693 }
14694
14695 vm_map_unlock_read(map);
14696 return KERN_SUCCESS;
14697 }
14698 case VM_REGION_EXTENDED_INFO:
14699 if (*count < VM_REGION_EXTENDED_INFO_COUNT) {
14700 return KERN_INVALID_ARGUMENT;
14701 }
14702 OS_FALLTHROUGH;
14703 case VM_REGION_EXTENDED_INFO__legacy:
14704 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy) {
14705 return KERN_INVALID_ARGUMENT;
14706 }
14707
14708 {
14709 vm_region_extended_info_t extended;
14710 mach_msg_type_number_t original_count;
14711 int effective_page_size, effective_page_shift;
14712
14713 extended = (vm_region_extended_info_t) info;
14714
14715 effective_page_shift = vm_self_region_page_shift(map);
14716 effective_page_size = (1 << effective_page_shift);
14717
14718 vm_map_lock_read(map);
14719
14720 start = *address;
14721 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14722 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14723 vm_map_unlock_read(map);
14724 return KERN_INVALID_ADDRESS;
14725 }
14726 } else {
14727 entry = tmp_entry;
14728 }
14729 start = entry->vme_start;
14730
14731 extended->protection = entry->protection;
14732 extended->user_tag = VME_ALIAS(entry);
14733 extended->pages_resident = 0;
14734 extended->pages_swapped_out = 0;
14735 extended->pages_shared_now_private = 0;
14736 extended->pages_dirtied = 0;
14737 extended->external_pager = 0;
14738 extended->shadow_depth = 0;
14739
14740 original_count = *count;
14741 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
14742 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
14743 } else {
14744 extended->pages_reusable = 0;
14745 *count = VM_REGION_EXTENDED_INFO_COUNT;
14746 }
14747
14748 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
14749
14750 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) {
14751 extended->share_mode = SM_PRIVATE;
14752 }
14753
14754 if (object_name) {
14755 *object_name = IP_NULL;
14756 }
14757 *address = start;
14758 *size = (entry->vme_end - start);
14759
14760 vm_map_unlock_read(map);
14761 return KERN_SUCCESS;
14762 }
14763 case VM_REGION_TOP_INFO:
14764 {
14765 vm_region_top_info_t top;
14766
14767 if (*count < VM_REGION_TOP_INFO_COUNT) {
14768 return KERN_INVALID_ARGUMENT;
14769 }
14770
14771 top = (vm_region_top_info_t) info;
14772 *count = VM_REGION_TOP_INFO_COUNT;
14773
14774 vm_map_lock_read(map);
14775
14776 start = *address;
14777 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14778 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14779 vm_map_unlock_read(map);
14780 return KERN_INVALID_ADDRESS;
14781 }
14782 } else {
14783 entry = tmp_entry;
14784 }
14785 start = entry->vme_start;
14786
14787 top->private_pages_resident = 0;
14788 top->shared_pages_resident = 0;
14789
14790 vm_map_region_top_walk(entry, top);
14791
14792 if (object_name) {
14793 *object_name = IP_NULL;
14794 }
14795 *address = start;
14796 *size = (entry->vme_end - start);
14797
14798 vm_map_unlock_read(map);
14799 return KERN_SUCCESS;
14800 }
14801 default:
14802 return KERN_INVALID_ARGUMENT;
14803 }
14804 }
14805
14806 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
14807 MIN((entry_size), \
14808 ((obj)->all_reusable ? \
14809 (obj)->wired_page_count : \
14810 (obj)->resident_page_count - (obj)->reusable_page_count))
14811
14812 void
vm_map_region_top_walk(vm_map_entry_t entry,vm_region_top_info_t top)14813 vm_map_region_top_walk(
14814 vm_map_entry_t entry,
14815 vm_region_top_info_t top)
14816 {
14817 if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
14818 top->share_mode = SM_EMPTY;
14819 top->ref_count = 0;
14820 top->obj_id = 0;
14821 return;
14822 }
14823
14824 {
14825 struct vm_object *obj, *tmp_obj;
14826 int ref_count;
14827 uint32_t entry_size;
14828
14829 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
14830
14831 obj = VME_OBJECT(entry);
14832
14833 vm_object_lock(obj);
14834
14835 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14836 ref_count--;
14837 }
14838
14839 assert(obj->reusable_page_count <= obj->resident_page_count);
14840 if (obj->shadow) {
14841 if (ref_count == 1) {
14842 top->private_pages_resident =
14843 OBJ_RESIDENT_COUNT(obj, entry_size);
14844 } else {
14845 top->shared_pages_resident =
14846 OBJ_RESIDENT_COUNT(obj, entry_size);
14847 }
14848 top->ref_count = ref_count;
14849 top->share_mode = SM_COW;
14850
14851 while ((tmp_obj = obj->shadow)) {
14852 vm_object_lock(tmp_obj);
14853 vm_object_unlock(obj);
14854 obj = tmp_obj;
14855
14856 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14857 ref_count--;
14858 }
14859
14860 assert(obj->reusable_page_count <= obj->resident_page_count);
14861 top->shared_pages_resident +=
14862 OBJ_RESIDENT_COUNT(obj, entry_size);
14863 top->ref_count += ref_count - 1;
14864 }
14865 } else {
14866 if (entry->superpage_size) {
14867 top->share_mode = SM_LARGE_PAGE;
14868 top->shared_pages_resident = 0;
14869 top->private_pages_resident = entry_size;
14870 } else if (entry->needs_copy) {
14871 top->share_mode = SM_COW;
14872 top->shared_pages_resident =
14873 OBJ_RESIDENT_COUNT(obj, entry_size);
14874 } else {
14875 if (ref_count == 1 ||
14876 (ref_count == 2 && obj->named)) {
14877 top->share_mode = SM_PRIVATE;
14878 top->private_pages_resident =
14879 OBJ_RESIDENT_COUNT(obj,
14880 entry_size);
14881 } else {
14882 top->share_mode = SM_SHARED;
14883 top->shared_pages_resident =
14884 OBJ_RESIDENT_COUNT(obj,
14885 entry_size);
14886 }
14887 }
14888 top->ref_count = ref_count;
14889 }
14890 /* XXX K64: obj_id will be truncated */
14891 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
14892
14893 vm_object_unlock(obj);
14894 }
14895 }
14896
14897 void
vm_map_region_walk(vm_map_t map,vm_map_offset_t va,vm_map_entry_t entry,vm_object_offset_t offset,vm_object_size_t range,vm_region_extended_info_t extended,boolean_t look_for_pages,mach_msg_type_number_t count)14898 vm_map_region_walk(
14899 vm_map_t map,
14900 vm_map_offset_t va,
14901 vm_map_entry_t entry,
14902 vm_object_offset_t offset,
14903 vm_object_size_t range,
14904 vm_region_extended_info_t extended,
14905 boolean_t look_for_pages,
14906 mach_msg_type_number_t count)
14907 {
14908 struct vm_object *obj, *tmp_obj;
14909 vm_map_offset_t last_offset;
14910 int i;
14911 int ref_count;
14912 struct vm_object *shadow_object;
14913 unsigned short shadow_depth;
14914 boolean_t do_region_footprint;
14915 int effective_page_size, effective_page_shift;
14916 vm_map_offset_t effective_page_mask;
14917
14918 do_region_footprint = task_self_region_footprint();
14919
14920 if ((VME_OBJECT(entry) == 0) ||
14921 (entry->is_sub_map) ||
14922 (VME_OBJECT(entry)->phys_contiguous &&
14923 !entry->superpage_size)) {
14924 extended->share_mode = SM_EMPTY;
14925 extended->ref_count = 0;
14926 return;
14927 }
14928
14929 if (entry->superpage_size) {
14930 extended->shadow_depth = 0;
14931 extended->share_mode = SM_LARGE_PAGE;
14932 extended->ref_count = 1;
14933 extended->external_pager = 0;
14934
14935 /* TODO4K: Superpage in 4k mode? */
14936 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
14937 extended->shadow_depth = 0;
14938 return;
14939 }
14940
14941 effective_page_shift = vm_self_region_page_shift(map);
14942 effective_page_size = (1 << effective_page_shift);
14943 effective_page_mask = effective_page_size - 1;
14944
14945 offset = vm_map_trunc_page(offset, effective_page_mask);
14946
14947 obj = VME_OBJECT(entry);
14948
14949 vm_object_lock(obj);
14950
14951 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14952 ref_count--;
14953 }
14954
14955 if (look_for_pages) {
14956 for (last_offset = offset + range;
14957 offset < last_offset;
14958 offset += effective_page_size, va += effective_page_size) {
14959 if (do_region_footprint) {
14960 int disp;
14961
14962 disp = 0;
14963 if (map->has_corpse_footprint) {
14964 /*
14965 * Query the page info data we saved
14966 * while forking the corpse.
14967 */
14968 vm_map_corpse_footprint_query_page_info(
14969 map,
14970 va,
14971 &disp);
14972 } else {
14973 /*
14974 * Query the pmap.
14975 */
14976 vm_map_footprint_query_page_info(
14977 map,
14978 entry,
14979 va,
14980 &disp);
14981 }
14982 if (disp & VM_PAGE_QUERY_PAGE_PRESENT) {
14983 extended->pages_resident++;
14984 }
14985 if (disp & VM_PAGE_QUERY_PAGE_REUSABLE) {
14986 extended->pages_reusable++;
14987 }
14988 if (disp & VM_PAGE_QUERY_PAGE_DIRTY) {
14989 extended->pages_dirtied++;
14990 }
14991 if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
14992 extended->pages_swapped_out++;
14993 }
14994 continue;
14995 }
14996
14997 vm_map_region_look_for_page(map, va, obj,
14998 vm_object_trunc_page(offset), ref_count,
14999 0, extended, count);
15000 }
15001
15002 if (do_region_footprint) {
15003 goto collect_object_info;
15004 }
15005 } else {
15006 collect_object_info:
15007 shadow_object = obj->shadow;
15008 shadow_depth = 0;
15009
15010 if (!(obj->internal)) {
15011 extended->external_pager = 1;
15012 }
15013
15014 if (shadow_object != VM_OBJECT_NULL) {
15015 vm_object_lock(shadow_object);
15016 for (;
15017 shadow_object != VM_OBJECT_NULL;
15018 shadow_depth++) {
15019 vm_object_t next_shadow;
15020
15021 if (!(shadow_object->internal)) {
15022 extended->external_pager = 1;
15023 }
15024
15025 next_shadow = shadow_object->shadow;
15026 if (next_shadow) {
15027 vm_object_lock(next_shadow);
15028 }
15029 vm_object_unlock(shadow_object);
15030 shadow_object = next_shadow;
15031 }
15032 }
15033 extended->shadow_depth = shadow_depth;
15034 }
15035
15036 if (extended->shadow_depth || entry->needs_copy) {
15037 extended->share_mode = SM_COW;
15038 } else {
15039 if (ref_count == 1) {
15040 extended->share_mode = SM_PRIVATE;
15041 } else {
15042 if (obj->true_share) {
15043 extended->share_mode = SM_TRUESHARED;
15044 } else {
15045 extended->share_mode = SM_SHARED;
15046 }
15047 }
15048 }
15049 extended->ref_count = ref_count - extended->shadow_depth;
15050
15051 for (i = 0; i < extended->shadow_depth; i++) {
15052 if ((tmp_obj = obj->shadow) == 0) {
15053 break;
15054 }
15055 vm_object_lock(tmp_obj);
15056 vm_object_unlock(obj);
15057
15058 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) {
15059 ref_count--;
15060 }
15061
15062 extended->ref_count += ref_count;
15063 obj = tmp_obj;
15064 }
15065 vm_object_unlock(obj);
15066
15067 if (extended->share_mode == SM_SHARED) {
15068 vm_map_entry_t cur;
15069 vm_map_entry_t last;
15070 int my_refs;
15071
15072 obj = VME_OBJECT(entry);
15073 last = vm_map_to_entry(map);
15074 my_refs = 0;
15075
15076 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
15077 ref_count--;
15078 }
15079 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) {
15080 my_refs += vm_map_region_count_obj_refs(cur, obj);
15081 }
15082
15083 if (my_refs == ref_count) {
15084 extended->share_mode = SM_PRIVATE_ALIASED;
15085 } else if (my_refs > 1) {
15086 extended->share_mode = SM_SHARED_ALIASED;
15087 }
15088 }
15089 }
15090
15091
15092 /* object is locked on entry and locked on return */
15093
15094
15095 static void
vm_map_region_look_for_page(__unused vm_map_t map,__unused vm_map_offset_t va,vm_object_t object,vm_object_offset_t offset,int max_refcnt,unsigned short depth,vm_region_extended_info_t extended,mach_msg_type_number_t count)15096 vm_map_region_look_for_page(
15097 __unused vm_map_t map,
15098 __unused vm_map_offset_t va,
15099 vm_object_t object,
15100 vm_object_offset_t offset,
15101 int max_refcnt,
15102 unsigned short depth,
15103 vm_region_extended_info_t extended,
15104 mach_msg_type_number_t count)
15105 {
15106 vm_page_t p;
15107 vm_object_t shadow;
15108 int ref_count;
15109 vm_object_t caller_object;
15110
15111 shadow = object->shadow;
15112 caller_object = object;
15113
15114
15115 while (TRUE) {
15116 if (!(object->internal)) {
15117 extended->external_pager = 1;
15118 }
15119
15120 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
15121 if (shadow && (max_refcnt == 1)) {
15122 extended->pages_shared_now_private++;
15123 }
15124
15125 if (!p->vmp_fictitious &&
15126 (p->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p)))) {
15127 extended->pages_dirtied++;
15128 } else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
15129 if (p->vmp_reusable || object->all_reusable) {
15130 extended->pages_reusable++;
15131 }
15132 }
15133
15134 extended->pages_resident++;
15135
15136 if (object != caller_object) {
15137 vm_object_unlock(object);
15138 }
15139
15140 return;
15141 }
15142 if (object->internal &&
15143 object->alive &&
15144 !object->terminating &&
15145 object->pager_ready) {
15146 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
15147 == VM_EXTERNAL_STATE_EXISTS) {
15148 /* the pager has that page */
15149 extended->pages_swapped_out++;
15150 if (object != caller_object) {
15151 vm_object_unlock(object);
15152 }
15153 return;
15154 }
15155 }
15156
15157 if (shadow) {
15158 vm_object_lock(shadow);
15159
15160 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) {
15161 ref_count--;
15162 }
15163
15164 if (++depth > extended->shadow_depth) {
15165 extended->shadow_depth = depth;
15166 }
15167
15168 if (ref_count > max_refcnt) {
15169 max_refcnt = ref_count;
15170 }
15171
15172 if (object != caller_object) {
15173 vm_object_unlock(object);
15174 }
15175
15176 offset = offset + object->vo_shadow_offset;
15177 object = shadow;
15178 shadow = object->shadow;
15179 continue;
15180 }
15181 if (object != caller_object) {
15182 vm_object_unlock(object);
15183 }
15184 break;
15185 }
15186 }
15187
15188 static int
vm_map_region_count_obj_refs(vm_map_entry_t entry,vm_object_t object)15189 vm_map_region_count_obj_refs(
15190 vm_map_entry_t entry,
15191 vm_object_t object)
15192 {
15193 int ref_count;
15194 vm_object_t chk_obj;
15195 vm_object_t tmp_obj;
15196
15197 if (VME_OBJECT(entry) == 0) {
15198 return 0;
15199 }
15200
15201 if (entry->is_sub_map) {
15202 return 0;
15203 } else {
15204 ref_count = 0;
15205
15206 chk_obj = VME_OBJECT(entry);
15207 vm_object_lock(chk_obj);
15208
15209 while (chk_obj) {
15210 if (chk_obj == object) {
15211 ref_count++;
15212 }
15213 tmp_obj = chk_obj->shadow;
15214 if (tmp_obj) {
15215 vm_object_lock(tmp_obj);
15216 }
15217 vm_object_unlock(chk_obj);
15218
15219 chk_obj = tmp_obj;
15220 }
15221 }
15222 return ref_count;
15223 }
15224
15225
15226 /*
15227 * Routine: vm_map_simplify
15228 *
15229 * Description:
15230 * Attempt to simplify the map representation in
15231 * the vicinity of the given starting address.
15232 * Note:
15233 * This routine is intended primarily to keep the
15234 * kernel maps more compact -- they generally don't
15235 * benefit from the "expand a map entry" technology
15236 * at allocation time because the adjacent entry
15237 * is often wired down.
15238 */
15239 void
vm_map_simplify_entry(vm_map_t map,vm_map_entry_t this_entry)15240 vm_map_simplify_entry(
15241 vm_map_t map,
15242 vm_map_entry_t this_entry)
15243 {
15244 vm_map_entry_t prev_entry;
15245
15246 prev_entry = this_entry->vme_prev;
15247
15248 if ((this_entry != vm_map_to_entry(map)) &&
15249 (prev_entry != vm_map_to_entry(map)) &&
15250
15251 (prev_entry->vme_end == this_entry->vme_start) &&
15252
15253 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
15254 (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
15255 ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
15256 prev_entry->vme_start))
15257 == VME_OFFSET(this_entry)) &&
15258
15259 (prev_entry->behavior == this_entry->behavior) &&
15260 (prev_entry->needs_copy == this_entry->needs_copy) &&
15261 (prev_entry->protection == this_entry->protection) &&
15262 (prev_entry->max_protection == this_entry->max_protection) &&
15263 (prev_entry->inheritance == this_entry->inheritance) &&
15264 (prev_entry->use_pmap == this_entry->use_pmap) &&
15265 (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
15266 (prev_entry->no_cache == this_entry->no_cache) &&
15267 (prev_entry->permanent == this_entry->permanent) &&
15268 (prev_entry->map_aligned == this_entry->map_aligned) &&
15269 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
15270 (prev_entry->used_for_jit == this_entry->used_for_jit) &&
15271 (prev_entry->pmap_cs_associated == this_entry->pmap_cs_associated) &&
15272 (prev_entry->iokit_acct == this_entry->iokit_acct) &&
15273 (prev_entry->vme_resilient_codesign ==
15274 this_entry->vme_resilient_codesign) &&
15275 (prev_entry->vme_resilient_media ==
15276 this_entry->vme_resilient_media) &&
15277 (prev_entry->vme_no_copy_on_read == this_entry->vme_no_copy_on_read) &&
15278
15279 (prev_entry->wired_count == this_entry->wired_count) &&
15280 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
15281
15282 ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
15283 (prev_entry->in_transition == FALSE) &&
15284 (this_entry->in_transition == FALSE) &&
15285 (prev_entry->needs_wakeup == FALSE) &&
15286 (this_entry->needs_wakeup == FALSE) &&
15287 (prev_entry->is_shared == this_entry->is_shared) &&
15288 (prev_entry->superpage_size == FALSE) &&
15289 (this_entry->superpage_size == FALSE)
15290 ) {
15291 vm_map_store_entry_unlink(map, prev_entry);
15292 assert(prev_entry->vme_start < this_entry->vme_end);
15293 if (prev_entry->map_aligned) {
15294 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
15295 VM_MAP_PAGE_MASK(map)));
15296 }
15297 this_entry->vme_start = prev_entry->vme_start;
15298 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
15299
15300 if (map->holelistenabled) {
15301 vm_map_store_update_first_free(map, this_entry, TRUE);
15302 }
15303
15304 if (prev_entry->is_sub_map) {
15305 vm_map_deallocate(VME_SUBMAP(prev_entry));
15306 } else {
15307 vm_object_deallocate(VME_OBJECT(prev_entry));
15308 }
15309 vm_map_entry_dispose(map, prev_entry);
15310 SAVE_HINT_MAP_WRITE(map, this_entry);
15311 }
15312 }
15313
15314 void
vm_map_simplify(vm_map_t map,vm_map_offset_t start)15315 vm_map_simplify(
15316 vm_map_t map,
15317 vm_map_offset_t start)
15318 {
15319 vm_map_entry_t this_entry;
15320
15321 vm_map_lock(map);
15322 if (vm_map_lookup_entry(map, start, &this_entry)) {
15323 vm_map_simplify_entry(map, this_entry);
15324 vm_map_simplify_entry(map, this_entry->vme_next);
15325 }
15326 vm_map_unlock(map);
15327 }
15328
15329 static void
vm_map_simplify_range(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)15330 vm_map_simplify_range(
15331 vm_map_t map,
15332 vm_map_offset_t start,
15333 vm_map_offset_t end)
15334 {
15335 vm_map_entry_t entry;
15336
15337 /*
15338 * The map should be locked (for "write") by the caller.
15339 */
15340
15341 if (start >= end) {
15342 /* invalid address range */
15343 return;
15344 }
15345
15346 start = vm_map_trunc_page(start,
15347 VM_MAP_PAGE_MASK(map));
15348 end = vm_map_round_page(end,
15349 VM_MAP_PAGE_MASK(map));
15350
15351 if (!vm_map_lookup_entry(map, start, &entry)) {
15352 /* "start" is not mapped and "entry" ends before "start" */
15353 if (entry == vm_map_to_entry(map)) {
15354 /* start with first entry in the map */
15355 entry = vm_map_first_entry(map);
15356 } else {
15357 /* start with next entry */
15358 entry = entry->vme_next;
15359 }
15360 }
15361
15362 while (entry != vm_map_to_entry(map) &&
15363 entry->vme_start <= end) {
15364 /* try and coalesce "entry" with its previous entry */
15365 vm_map_simplify_entry(map, entry);
15366 entry = entry->vme_next;
15367 }
15368 }
15369
15370
15371 /*
15372 * Routine: vm_map_machine_attribute
15373 * Purpose:
15374 * Provide machine-specific attributes to mappings,
15375 * such as cachability etc. for machines that provide
15376 * them. NUMA architectures and machines with big/strange
15377 * caches will use this.
15378 * Note:
15379 * Responsibilities for locking and checking are handled here,
15380 * everything else in the pmap module. If any non-volatile
15381 * information must be kept, the pmap module should handle
15382 * it itself. [This assumes that attributes do not
15383 * need to be inherited, which seems ok to me]
15384 */
15385 kern_return_t
vm_map_machine_attribute(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_machine_attribute_t attribute,vm_machine_attribute_val_t * value)15386 vm_map_machine_attribute(
15387 vm_map_t map,
15388 vm_map_offset_t start,
15389 vm_map_offset_t end,
15390 vm_machine_attribute_t attribute,
15391 vm_machine_attribute_val_t* value) /* IN/OUT */
15392 {
15393 kern_return_t ret;
15394 vm_map_size_t sync_size;
15395 vm_map_entry_t entry;
15396
15397 if (start < vm_map_min(map) || end > vm_map_max(map)) {
15398 return KERN_INVALID_ADDRESS;
15399 }
15400
15401 /* Figure how much memory we need to flush (in page increments) */
15402 sync_size = end - start;
15403
15404 vm_map_lock(map);
15405
15406 if (attribute != MATTR_CACHE) {
15407 /* If we don't have to find physical addresses, we */
15408 /* don't have to do an explicit traversal here. */
15409 ret = pmap_attribute(map->pmap, start, end - start,
15410 attribute, value);
15411 vm_map_unlock(map);
15412 return ret;
15413 }
15414
15415 ret = KERN_SUCCESS; /* Assume it all worked */
15416
15417 while (sync_size) {
15418 if (vm_map_lookup_entry(map, start, &entry)) {
15419 vm_map_size_t sub_size;
15420 if ((entry->vme_end - start) > sync_size) {
15421 sub_size = sync_size;
15422 sync_size = 0;
15423 } else {
15424 sub_size = entry->vme_end - start;
15425 sync_size -= sub_size;
15426 }
15427 if (entry->is_sub_map) {
15428 vm_map_offset_t sub_start;
15429 vm_map_offset_t sub_end;
15430
15431 sub_start = (start - entry->vme_start)
15432 + VME_OFFSET(entry);
15433 sub_end = sub_start + sub_size;
15434 vm_map_machine_attribute(
15435 VME_SUBMAP(entry),
15436 sub_start,
15437 sub_end,
15438 attribute, value);
15439 } else {
15440 if (VME_OBJECT(entry)) {
15441 vm_page_t m;
15442 vm_object_t object;
15443 vm_object_t base_object;
15444 vm_object_t last_object;
15445 vm_object_offset_t offset;
15446 vm_object_offset_t base_offset;
15447 vm_map_size_t range;
15448 range = sub_size;
15449 offset = (start - entry->vme_start)
15450 + VME_OFFSET(entry);
15451 offset = vm_object_trunc_page(offset);
15452 base_offset = offset;
15453 object = VME_OBJECT(entry);
15454 base_object = object;
15455 last_object = NULL;
15456
15457 vm_object_lock(object);
15458
15459 while (range) {
15460 m = vm_page_lookup(
15461 object, offset);
15462
15463 if (m && !m->vmp_fictitious) {
15464 ret =
15465 pmap_attribute_cache_sync(
15466 VM_PAGE_GET_PHYS_PAGE(m),
15467 PAGE_SIZE,
15468 attribute, value);
15469 } else if (object->shadow) {
15470 offset = offset + object->vo_shadow_offset;
15471 last_object = object;
15472 object = object->shadow;
15473 vm_object_lock(last_object->shadow);
15474 vm_object_unlock(last_object);
15475 continue;
15476 }
15477 if (range < PAGE_SIZE) {
15478 range = 0;
15479 } else {
15480 range -= PAGE_SIZE;
15481 }
15482
15483 if (base_object != object) {
15484 vm_object_unlock(object);
15485 vm_object_lock(base_object);
15486 object = base_object;
15487 }
15488 /* Bump to the next page */
15489 base_offset += PAGE_SIZE;
15490 offset = base_offset;
15491 }
15492 vm_object_unlock(object);
15493 }
15494 }
15495 start += sub_size;
15496 } else {
15497 vm_map_unlock(map);
15498 return KERN_FAILURE;
15499 }
15500 }
15501
15502 vm_map_unlock(map);
15503
15504 return ret;
15505 }
15506
15507 /*
15508 * vm_map_behavior_set:
15509 *
15510 * Sets the paging reference behavior of the specified address
15511 * range in the target map. Paging reference behavior affects
15512 * how pagein operations resulting from faults on the map will be
15513 * clustered.
15514 */
15515 kern_return_t
vm_map_behavior_set(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_behavior_t new_behavior)15516 vm_map_behavior_set(
15517 vm_map_t map,
15518 vm_map_offset_t start,
15519 vm_map_offset_t end,
15520 vm_behavior_t new_behavior)
15521 {
15522 vm_map_entry_t entry;
15523 vm_map_entry_t temp_entry;
15524
15525 if (start > end ||
15526 start < vm_map_min(map) ||
15527 end > vm_map_max(map)) {
15528 return KERN_NO_SPACE;
15529 }
15530
15531 switch (new_behavior) {
15532 /*
15533 * This first block of behaviors all set a persistent state on the specified
15534 * memory range. All we have to do here is to record the desired behavior
15535 * in the vm_map_entry_t's.
15536 */
15537
15538 case VM_BEHAVIOR_DEFAULT:
15539 case VM_BEHAVIOR_RANDOM:
15540 case VM_BEHAVIOR_SEQUENTIAL:
15541 case VM_BEHAVIOR_RSEQNTL:
15542 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
15543 vm_map_lock(map);
15544
15545 /*
15546 * The entire address range must be valid for the map.
15547 * Note that vm_map_range_check() does a
15548 * vm_map_lookup_entry() internally and returns the
15549 * entry containing the start of the address range if
15550 * the entire range is valid.
15551 */
15552 if (vm_map_range_check(map, start, end, &temp_entry)) {
15553 entry = temp_entry;
15554 vm_map_clip_start(map, entry, start);
15555 } else {
15556 vm_map_unlock(map);
15557 return KERN_INVALID_ADDRESS;
15558 }
15559
15560 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
15561 vm_map_clip_end(map, entry, end);
15562 if (entry->is_sub_map) {
15563 assert(!entry->use_pmap);
15564 }
15565
15566 if (new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES) {
15567 entry->zero_wired_pages = TRUE;
15568 } else {
15569 entry->behavior = new_behavior;
15570 }
15571 entry = entry->vme_next;
15572 }
15573
15574 vm_map_unlock(map);
15575 break;
15576
15577 /*
15578 * The rest of these are different from the above in that they cause
15579 * an immediate action to take place as opposed to setting a behavior that
15580 * affects future actions.
15581 */
15582
15583 case VM_BEHAVIOR_WILLNEED:
15584 return vm_map_willneed(map, start, end);
15585
15586 case VM_BEHAVIOR_DONTNEED:
15587 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
15588
15589 case VM_BEHAVIOR_FREE:
15590 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
15591
15592 case VM_BEHAVIOR_REUSABLE:
15593 return vm_map_reusable_pages(map, start, end);
15594
15595 case VM_BEHAVIOR_REUSE:
15596 return vm_map_reuse_pages(map, start, end);
15597
15598 case VM_BEHAVIOR_CAN_REUSE:
15599 return vm_map_can_reuse(map, start, end);
15600
15601 #if MACH_ASSERT
15602 case VM_BEHAVIOR_PAGEOUT:
15603 return vm_map_pageout(map, start, end);
15604 #endif /* MACH_ASSERT */
15605
15606 default:
15607 return KERN_INVALID_ARGUMENT;
15608 }
15609
15610 return KERN_SUCCESS;
15611 }
15612
15613
15614 /*
15615 * Internals for madvise(MADV_WILLNEED) system call.
15616 *
15617 * The implementation is to do:-
15618 * a) read-ahead if the mapping corresponds to a mapped regular file
15619 * b) or, fault in the pages (zero-fill, decompress etc) if it's an anonymous mapping
15620 */
15621
15622
15623 static kern_return_t
vm_map_willneed(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)15624 vm_map_willneed(
15625 vm_map_t map,
15626 vm_map_offset_t start,
15627 vm_map_offset_t end
15628 )
15629 {
15630 vm_map_entry_t entry;
15631 vm_object_t object;
15632 memory_object_t pager;
15633 struct vm_object_fault_info fault_info = {};
15634 kern_return_t kr;
15635 vm_object_size_t len;
15636 vm_object_offset_t offset;
15637
15638 fault_info.interruptible = THREAD_UNINT; /* ignored value */
15639 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
15640 fault_info.stealth = TRUE;
15641
15642 /*
15643 * The MADV_WILLNEED operation doesn't require any changes to the
15644 * vm_map_entry_t's, so the read lock is sufficient.
15645 */
15646
15647 vm_map_lock_read(map);
15648
15649 /*
15650 * The madvise semantics require that the address range be fully
15651 * allocated with no holes. Otherwise, we're required to return
15652 * an error.
15653 */
15654
15655 if (!vm_map_range_check(map, start, end, &entry)) {
15656 vm_map_unlock_read(map);
15657 return KERN_INVALID_ADDRESS;
15658 }
15659
15660 /*
15661 * Examine each vm_map_entry_t in the range.
15662 */
15663 for (; entry != vm_map_to_entry(map) && start < end;) {
15664 /*
15665 * The first time through, the start address could be anywhere
15666 * within the vm_map_entry we found. So adjust the offset to
15667 * correspond. After that, the offset will always be zero to
15668 * correspond to the beginning of the current vm_map_entry.
15669 */
15670 offset = (start - entry->vme_start) + VME_OFFSET(entry);
15671
15672 /*
15673 * Set the length so we don't go beyond the end of the
15674 * map_entry or beyond the end of the range we were given.
15675 * This range could span also multiple map entries all of which
15676 * map different files, so make sure we only do the right amount
15677 * of I/O for each object. Note that it's possible for there
15678 * to be multiple map entries all referring to the same object
15679 * but with different page permissions, but it's not worth
15680 * trying to optimize that case.
15681 */
15682 len = MIN(entry->vme_end - start, end - start);
15683
15684 if ((vm_size_t) len != len) {
15685 /* 32-bit overflow */
15686 len = (vm_size_t) (0 - PAGE_SIZE);
15687 }
15688 fault_info.cluster_size = (vm_size_t) len;
15689 fault_info.lo_offset = offset;
15690 fault_info.hi_offset = offset + len;
15691 fault_info.user_tag = VME_ALIAS(entry);
15692 fault_info.pmap_options = 0;
15693 if (entry->iokit_acct ||
15694 (!entry->is_sub_map && !entry->use_pmap)) {
15695 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
15696 }
15697
15698 /*
15699 * If the entry is a submap OR there's no read permission
15700 * to this mapping, then just skip it.
15701 */
15702 if ((entry->is_sub_map) || (entry->protection & VM_PROT_READ) == 0) {
15703 entry = entry->vme_next;
15704 start = entry->vme_start;
15705 continue;
15706 }
15707
15708 object = VME_OBJECT(entry);
15709
15710 if (object == NULL ||
15711 (object && object->internal)) {
15712 /*
15713 * Memory range backed by anonymous memory.
15714 */
15715 vm_size_t region_size = 0, effective_page_size = 0;
15716 vm_map_offset_t addr = 0, effective_page_mask = 0;
15717
15718 region_size = len;
15719 addr = start;
15720
15721 effective_page_mask = MIN(vm_map_page_mask(current_map()), PAGE_MASK);
15722 effective_page_size = effective_page_mask + 1;
15723
15724 vm_map_unlock_read(map);
15725
15726 while (region_size) {
15727 vm_pre_fault(
15728 vm_map_trunc_page(addr, effective_page_mask),
15729 VM_PROT_READ | VM_PROT_WRITE);
15730
15731 region_size -= effective_page_size;
15732 addr += effective_page_size;
15733 }
15734 } else {
15735 /*
15736 * Find the file object backing this map entry. If there is
15737 * none, then we simply ignore the "will need" advice for this
15738 * entry and go on to the next one.
15739 */
15740 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
15741 entry = entry->vme_next;
15742 start = entry->vme_start;
15743 continue;
15744 }
15745
15746 vm_object_paging_begin(object);
15747 pager = object->pager;
15748 vm_object_unlock(object);
15749
15750 /*
15751 * The data_request() could take a long time, so let's
15752 * release the map lock to avoid blocking other threads.
15753 */
15754 vm_map_unlock_read(map);
15755
15756 /*
15757 * Get the data from the object asynchronously.
15758 *
15759 * Note that memory_object_data_request() places limits on the
15760 * amount of I/O it will do. Regardless of the len we
15761 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
15762 * silently truncates the len to that size. This isn't
15763 * necessarily bad since madvise shouldn't really be used to
15764 * page in unlimited amounts of data. Other Unix variants
15765 * limit the willneed case as well. If this turns out to be an
15766 * issue for developers, then we can always adjust the policy
15767 * here and still be backwards compatible since this is all
15768 * just "advice".
15769 */
15770 kr = memory_object_data_request(
15771 pager,
15772 vm_object_trunc_page(offset) + object->paging_offset,
15773 0, /* ignored */
15774 VM_PROT_READ,
15775 (memory_object_fault_info_t)&fault_info);
15776
15777 vm_object_lock(object);
15778 vm_object_paging_end(object);
15779 vm_object_unlock(object);
15780
15781 /*
15782 * If we couldn't do the I/O for some reason, just give up on
15783 * the madvise. We still return success to the user since
15784 * madvise isn't supposed to fail when the advice can't be
15785 * taken.
15786 */
15787
15788 if (kr != KERN_SUCCESS) {
15789 return KERN_SUCCESS;
15790 }
15791 }
15792
15793 start += len;
15794 if (start >= end) {
15795 /* done */
15796 return KERN_SUCCESS;
15797 }
15798
15799 /* look up next entry */
15800 vm_map_lock_read(map);
15801 if (!vm_map_lookup_entry(map, start, &entry)) {
15802 /*
15803 * There's a new hole in the address range.
15804 */
15805 vm_map_unlock_read(map);
15806 return KERN_INVALID_ADDRESS;
15807 }
15808 }
15809
15810 vm_map_unlock_read(map);
15811 return KERN_SUCCESS;
15812 }
15813
15814 static boolean_t
vm_map_entry_is_reusable(vm_map_entry_t entry)15815 vm_map_entry_is_reusable(
15816 vm_map_entry_t entry)
15817 {
15818 /* Only user map entries */
15819
15820 vm_object_t object;
15821
15822 if (entry->is_sub_map) {
15823 return FALSE;
15824 }
15825
15826 switch (VME_ALIAS(entry)) {
15827 case VM_MEMORY_MALLOC:
15828 case VM_MEMORY_MALLOC_SMALL:
15829 case VM_MEMORY_MALLOC_LARGE:
15830 case VM_MEMORY_REALLOC:
15831 case VM_MEMORY_MALLOC_TINY:
15832 case VM_MEMORY_MALLOC_LARGE_REUSABLE:
15833 case VM_MEMORY_MALLOC_LARGE_REUSED:
15834 /*
15835 * This is a malloc() memory region: check if it's still
15836 * in its original state and can be re-used for more
15837 * malloc() allocations.
15838 */
15839 break;
15840 default:
15841 /*
15842 * Not a malloc() memory region: let the caller decide if
15843 * it's re-usable.
15844 */
15845 return TRUE;
15846 }
15847
15848 if (/*entry->is_shared ||*/
15849 entry->is_sub_map ||
15850 entry->in_transition ||
15851 entry->protection != VM_PROT_DEFAULT ||
15852 entry->max_protection != VM_PROT_ALL ||
15853 entry->inheritance != VM_INHERIT_DEFAULT ||
15854 entry->no_cache ||
15855 entry->permanent ||
15856 entry->superpage_size != FALSE ||
15857 entry->zero_wired_pages ||
15858 entry->wired_count != 0 ||
15859 entry->user_wired_count != 0) {
15860 return FALSE;
15861 }
15862
15863 object = VME_OBJECT(entry);
15864 if (object == VM_OBJECT_NULL) {
15865 return TRUE;
15866 }
15867 if (
15868 #if 0
15869 /*
15870 * Let's proceed even if the VM object is potentially
15871 * shared.
15872 * We check for this later when processing the actual
15873 * VM pages, so the contents will be safe if shared.
15874 *
15875 * But we can still mark this memory region as "reusable" to
15876 * acknowledge that the caller did let us know that the memory
15877 * could be re-used and should not be penalized for holding
15878 * on to it. This allows its "resident size" to not include
15879 * the reusable range.
15880 */
15881 object->ref_count == 1 &&
15882 #endif
15883 object->wired_page_count == 0 &&
15884 object->copy == VM_OBJECT_NULL &&
15885 object->shadow == VM_OBJECT_NULL &&
15886 object->internal &&
15887 object->purgable == VM_PURGABLE_DENY &&
15888 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
15889 !object->code_signed) {
15890 return TRUE;
15891 }
15892 return FALSE;
15893 }
15894
15895 static kern_return_t
vm_map_reuse_pages(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)15896 vm_map_reuse_pages(
15897 vm_map_t map,
15898 vm_map_offset_t start,
15899 vm_map_offset_t end)
15900 {
15901 vm_map_entry_t entry;
15902 vm_object_t object;
15903 vm_object_offset_t start_offset, end_offset;
15904
15905 /*
15906 * The MADV_REUSE operation doesn't require any changes to the
15907 * vm_map_entry_t's, so the read lock is sufficient.
15908 */
15909
15910 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
15911 /*
15912 * XXX TODO4K
15913 * need to figure out what reusable means for a
15914 * portion of a native page.
15915 */
15916 return KERN_SUCCESS;
15917 }
15918
15919 vm_map_lock_read(map);
15920 assert(map->pmap != kernel_pmap); /* protect alias access */
15921
15922 /*
15923 * The madvise semantics require that the address range be fully
15924 * allocated with no holes. Otherwise, we're required to return
15925 * an error.
15926 */
15927
15928 if (!vm_map_range_check(map, start, end, &entry)) {
15929 vm_map_unlock_read(map);
15930 vm_page_stats_reusable.reuse_pages_failure++;
15931 return KERN_INVALID_ADDRESS;
15932 }
15933
15934 /*
15935 * Examine each vm_map_entry_t in the range.
15936 */
15937 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15938 entry = entry->vme_next) {
15939 /*
15940 * Sanity check on the VM map entry.
15941 */
15942 if (!vm_map_entry_is_reusable(entry)) {
15943 vm_map_unlock_read(map);
15944 vm_page_stats_reusable.reuse_pages_failure++;
15945 return KERN_INVALID_ADDRESS;
15946 }
15947
15948 /*
15949 * The first time through, the start address could be anywhere
15950 * within the vm_map_entry we found. So adjust the offset to
15951 * correspond.
15952 */
15953 if (entry->vme_start < start) {
15954 start_offset = start - entry->vme_start;
15955 } else {
15956 start_offset = 0;
15957 }
15958 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
15959 start_offset += VME_OFFSET(entry);
15960 end_offset += VME_OFFSET(entry);
15961
15962 assert(!entry->is_sub_map);
15963 object = VME_OBJECT(entry);
15964 if (object != VM_OBJECT_NULL) {
15965 vm_object_lock(object);
15966 vm_object_reuse_pages(object, start_offset, end_offset,
15967 TRUE);
15968 vm_object_unlock(object);
15969 }
15970
15971 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
15972 /*
15973 * XXX
15974 * We do not hold the VM map exclusively here.
15975 * The "alias" field is not that critical, so it's
15976 * safe to update it here, as long as it is the only
15977 * one that can be modified while holding the VM map
15978 * "shared".
15979 */
15980 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
15981 }
15982 }
15983
15984 vm_map_unlock_read(map);
15985 vm_page_stats_reusable.reuse_pages_success++;
15986 return KERN_SUCCESS;
15987 }
15988
15989
15990 static kern_return_t
vm_map_reusable_pages(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)15991 vm_map_reusable_pages(
15992 vm_map_t map,
15993 vm_map_offset_t start,
15994 vm_map_offset_t end)
15995 {
15996 vm_map_entry_t entry;
15997 vm_object_t object;
15998 vm_object_offset_t start_offset, end_offset;
15999 vm_map_offset_t pmap_offset;
16000
16001 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
16002 /*
16003 * XXX TODO4K
16004 * need to figure out what reusable means for a portion
16005 * of a native page.
16006 */
16007 return KERN_SUCCESS;
16008 }
16009
16010 /*
16011 * The MADV_REUSABLE operation doesn't require any changes to the
16012 * vm_map_entry_t's, so the read lock is sufficient.
16013 */
16014
16015 vm_map_lock_read(map);
16016 assert(map->pmap != kernel_pmap); /* protect alias access */
16017
16018 /*
16019 * The madvise semantics require that the address range be fully
16020 * allocated with no holes. Otherwise, we're required to return
16021 * an error.
16022 */
16023
16024 if (!vm_map_range_check(map, start, end, &entry)) {
16025 vm_map_unlock_read(map);
16026 vm_page_stats_reusable.reusable_pages_failure++;
16027 return KERN_INVALID_ADDRESS;
16028 }
16029
16030 /*
16031 * Examine each vm_map_entry_t in the range.
16032 */
16033 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16034 entry = entry->vme_next) {
16035 int kill_pages = 0;
16036
16037 /*
16038 * Sanity check on the VM map entry.
16039 */
16040 if (!vm_map_entry_is_reusable(entry)) {
16041 vm_map_unlock_read(map);
16042 vm_page_stats_reusable.reusable_pages_failure++;
16043 return KERN_INVALID_ADDRESS;
16044 }
16045
16046 if (!(entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
16047 /* not writable: can't discard contents */
16048 vm_map_unlock_read(map);
16049 vm_page_stats_reusable.reusable_nonwritable++;
16050 vm_page_stats_reusable.reusable_pages_failure++;
16051 return KERN_PROTECTION_FAILURE;
16052 }
16053
16054 /*
16055 * The first time through, the start address could be anywhere
16056 * within the vm_map_entry we found. So adjust the offset to
16057 * correspond.
16058 */
16059 if (entry->vme_start < start) {
16060 start_offset = start - entry->vme_start;
16061 pmap_offset = start;
16062 } else {
16063 start_offset = 0;
16064 pmap_offset = entry->vme_start;
16065 }
16066 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
16067 start_offset += VME_OFFSET(entry);
16068 end_offset += VME_OFFSET(entry);
16069
16070 assert(!entry->is_sub_map);
16071 object = VME_OBJECT(entry);
16072 if (object == VM_OBJECT_NULL) {
16073 continue;
16074 }
16075
16076
16077 vm_object_lock(object);
16078 if (((object->ref_count == 1) ||
16079 (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
16080 object->copy == VM_OBJECT_NULL)) &&
16081 object->shadow == VM_OBJECT_NULL &&
16082 /*
16083 * "iokit_acct" entries are billed for their virtual size
16084 * (rather than for their resident pages only), so they
16085 * wouldn't benefit from making pages reusable, and it
16086 * would be hard to keep track of pages that are both
16087 * "iokit_acct" and "reusable" in the pmap stats and
16088 * ledgers.
16089 */
16090 !(entry->iokit_acct ||
16091 (!entry->is_sub_map && !entry->use_pmap))) {
16092 if (object->ref_count != 1) {
16093 vm_page_stats_reusable.reusable_shared++;
16094 }
16095 kill_pages = 1;
16096 } else {
16097 kill_pages = -1;
16098 }
16099 if (kill_pages != -1) {
16100 vm_object_deactivate_pages(object,
16101 start_offset,
16102 end_offset - start_offset,
16103 kill_pages,
16104 TRUE /*reusable_pages*/,
16105 map->pmap,
16106 pmap_offset);
16107 } else {
16108 vm_page_stats_reusable.reusable_pages_shared++;
16109 DTRACE_VM4(vm_map_reusable_pages_shared,
16110 unsigned int, VME_ALIAS(entry),
16111 vm_map_t, map,
16112 vm_map_entry_t, entry,
16113 vm_object_t, object);
16114 }
16115 vm_object_unlock(object);
16116
16117 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
16118 VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
16119 /*
16120 * XXX
16121 * We do not hold the VM map exclusively here.
16122 * The "alias" field is not that critical, so it's
16123 * safe to update it here, as long as it is the only
16124 * one that can be modified while holding the VM map
16125 * "shared".
16126 */
16127 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
16128 }
16129 }
16130
16131 vm_map_unlock_read(map);
16132 vm_page_stats_reusable.reusable_pages_success++;
16133 return KERN_SUCCESS;
16134 }
16135
16136
16137 static kern_return_t
vm_map_can_reuse(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)16138 vm_map_can_reuse(
16139 vm_map_t map,
16140 vm_map_offset_t start,
16141 vm_map_offset_t end)
16142 {
16143 vm_map_entry_t entry;
16144
16145 /*
16146 * The MADV_REUSABLE operation doesn't require any changes to the
16147 * vm_map_entry_t's, so the read lock is sufficient.
16148 */
16149
16150 vm_map_lock_read(map);
16151 assert(map->pmap != kernel_pmap); /* protect alias access */
16152
16153 /*
16154 * The madvise semantics require that the address range be fully
16155 * allocated with no holes. Otherwise, we're required to return
16156 * an error.
16157 */
16158
16159 if (!vm_map_range_check(map, start, end, &entry)) {
16160 vm_map_unlock_read(map);
16161 vm_page_stats_reusable.can_reuse_failure++;
16162 return KERN_INVALID_ADDRESS;
16163 }
16164
16165 /*
16166 * Examine each vm_map_entry_t in the range.
16167 */
16168 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16169 entry = entry->vme_next) {
16170 /*
16171 * Sanity check on the VM map entry.
16172 */
16173 if (!vm_map_entry_is_reusable(entry)) {
16174 vm_map_unlock_read(map);
16175 vm_page_stats_reusable.can_reuse_failure++;
16176 return KERN_INVALID_ADDRESS;
16177 }
16178 }
16179
16180 vm_map_unlock_read(map);
16181 vm_page_stats_reusable.can_reuse_success++;
16182 return KERN_SUCCESS;
16183 }
16184
16185
16186 #if MACH_ASSERT
16187 static kern_return_t
vm_map_pageout(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)16188 vm_map_pageout(
16189 vm_map_t map,
16190 vm_map_offset_t start,
16191 vm_map_offset_t end)
16192 {
16193 vm_map_entry_t entry;
16194
16195 /*
16196 * The MADV_PAGEOUT operation doesn't require any changes to the
16197 * vm_map_entry_t's, so the read lock is sufficient.
16198 */
16199
16200 vm_map_lock_read(map);
16201
16202 /*
16203 * The madvise semantics require that the address range be fully
16204 * allocated with no holes. Otherwise, we're required to return
16205 * an error.
16206 */
16207
16208 if (!vm_map_range_check(map, start, end, &entry)) {
16209 vm_map_unlock_read(map);
16210 return KERN_INVALID_ADDRESS;
16211 }
16212
16213 /*
16214 * Examine each vm_map_entry_t in the range.
16215 */
16216 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16217 entry = entry->vme_next) {
16218 vm_object_t object;
16219
16220 /*
16221 * Sanity check on the VM map entry.
16222 */
16223 if (entry->is_sub_map) {
16224 vm_map_t submap;
16225 vm_map_offset_t submap_start;
16226 vm_map_offset_t submap_end;
16227 vm_map_entry_t submap_entry;
16228
16229 submap = VME_SUBMAP(entry);
16230 submap_start = VME_OFFSET(entry);
16231 submap_end = submap_start + (entry->vme_end -
16232 entry->vme_start);
16233
16234 vm_map_lock_read(submap);
16235
16236 if (!vm_map_range_check(submap,
16237 submap_start,
16238 submap_end,
16239 &submap_entry)) {
16240 vm_map_unlock_read(submap);
16241 vm_map_unlock_read(map);
16242 return KERN_INVALID_ADDRESS;
16243 }
16244
16245 object = VME_OBJECT(submap_entry);
16246 if (submap_entry->is_sub_map ||
16247 object == VM_OBJECT_NULL ||
16248 !object->internal) {
16249 vm_map_unlock_read(submap);
16250 continue;
16251 }
16252
16253 vm_object_pageout(object);
16254
16255 vm_map_unlock_read(submap);
16256 submap = VM_MAP_NULL;
16257 submap_entry = VM_MAP_ENTRY_NULL;
16258 continue;
16259 }
16260
16261 object = VME_OBJECT(entry);
16262 if (entry->is_sub_map ||
16263 object == VM_OBJECT_NULL ||
16264 !object->internal) {
16265 continue;
16266 }
16267
16268 vm_object_pageout(object);
16269 }
16270
16271 vm_map_unlock_read(map);
16272 return KERN_SUCCESS;
16273 }
16274 #endif /* MACH_ASSERT */
16275
16276
16277 /*
16278 * Routine: vm_map_entry_insert
16279 *
16280 * Description: This routine inserts a new vm_entry in a locked map.
16281 */
16282 vm_map_entry_t
vm_map_entry_insert(vm_map_t map,vm_map_entry_t insp_entry,vm_map_offset_t start,vm_map_offset_t end,vm_object_t object,vm_object_offset_t offset,vm_map_kernel_flags_t vmk_flags,boolean_t needs_copy,boolean_t is_shared,boolean_t in_transition,vm_prot_t cur_protection,vm_prot_t max_protection,vm_behavior_t behavior,vm_inherit_t inheritance,unsigned short wired_count,boolean_t no_cache,boolean_t permanent,boolean_t no_copy_on_read,unsigned int superpage_size,boolean_t clear_map_aligned,boolean_t is_submap,boolean_t used_for_jit,int alias,boolean_t translated_allow_execute)16283 vm_map_entry_insert(
16284 vm_map_t map,
16285 vm_map_entry_t insp_entry,
16286 vm_map_offset_t start,
16287 vm_map_offset_t end,
16288 vm_object_t object,
16289 vm_object_offset_t offset,
16290 vm_map_kernel_flags_t vmk_flags,
16291 boolean_t needs_copy,
16292 boolean_t is_shared,
16293 boolean_t in_transition,
16294 vm_prot_t cur_protection,
16295 vm_prot_t max_protection,
16296 vm_behavior_t behavior,
16297 vm_inherit_t inheritance,
16298 unsigned short wired_count,
16299 boolean_t no_cache,
16300 boolean_t permanent,
16301 boolean_t no_copy_on_read,
16302 unsigned int superpage_size,
16303 boolean_t clear_map_aligned,
16304 boolean_t is_submap,
16305 boolean_t used_for_jit,
16306 int alias,
16307 boolean_t translated_allow_execute)
16308 {
16309 vm_map_entry_t new_entry;
16310
16311 assert(insp_entry != (vm_map_entry_t)0);
16312 vm_map_lock_assert_exclusive(map);
16313
16314 #if DEVELOPMENT || DEBUG
16315 vm_object_offset_t end_offset = 0;
16316 assertf(!os_add_overflow(end - start, offset, &end_offset), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end - start), offset);
16317 #endif /* DEVELOPMENT || DEBUG */
16318
16319 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
16320
16321 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
16322 new_entry->map_aligned = TRUE;
16323 } else {
16324 new_entry->map_aligned = FALSE;
16325 }
16326 if (clear_map_aligned &&
16327 (!VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
16328 !VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
16329 new_entry->map_aligned = FALSE;
16330 }
16331
16332 new_entry->vme_start = start;
16333 new_entry->vme_end = end;
16334 if (new_entry->map_aligned) {
16335 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
16336 VM_MAP_PAGE_MASK(map)));
16337 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
16338 VM_MAP_PAGE_MASK(map)));
16339 } else {
16340 assert(page_aligned(new_entry->vme_start));
16341 assert(page_aligned(new_entry->vme_end));
16342 }
16343 assert(new_entry->vme_start < new_entry->vme_end);
16344
16345 VME_OBJECT_SET(new_entry, object);
16346 VME_OFFSET_SET(new_entry, offset);
16347 new_entry->is_shared = is_shared;
16348 new_entry->is_sub_map = is_submap;
16349 new_entry->needs_copy = needs_copy;
16350 new_entry->in_transition = in_transition;
16351 new_entry->needs_wakeup = FALSE;
16352 new_entry->inheritance = inheritance;
16353 new_entry->protection = cur_protection;
16354 new_entry->max_protection = max_protection;
16355 new_entry->behavior = behavior;
16356 new_entry->wired_count = wired_count;
16357 new_entry->user_wired_count = 0;
16358 if (is_submap) {
16359 /*
16360 * submap: "use_pmap" means "nested".
16361 * default: false.
16362 */
16363 new_entry->use_pmap = FALSE;
16364 } else {
16365 /*
16366 * object: "use_pmap" means "use pmap accounting" for footprint.
16367 * default: true.
16368 */
16369 new_entry->use_pmap = TRUE;
16370 }
16371 VME_ALIAS_SET(new_entry, alias);
16372 new_entry->zero_wired_pages = FALSE;
16373 new_entry->no_cache = no_cache;
16374 new_entry->permanent = permanent;
16375 if (superpage_size) {
16376 new_entry->superpage_size = TRUE;
16377 } else {
16378 new_entry->superpage_size = FALSE;
16379 }
16380 if (used_for_jit) {
16381 if (!(map->jit_entry_exists) ||
16382 VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map)) {
16383 new_entry->used_for_jit = TRUE;
16384 map->jit_entry_exists = TRUE;
16385 }
16386 } else {
16387 new_entry->used_for_jit = FALSE;
16388 }
16389 if (translated_allow_execute) {
16390 new_entry->translated_allow_execute = TRUE;
16391 } else {
16392 new_entry->translated_allow_execute = FALSE;
16393 }
16394 new_entry->pmap_cs_associated = FALSE;
16395 new_entry->iokit_acct = FALSE;
16396 new_entry->vme_resilient_codesign = FALSE;
16397 new_entry->vme_resilient_media = FALSE;
16398 new_entry->vme_atomic = FALSE;
16399 new_entry->vme_no_copy_on_read = no_copy_on_read;
16400
16401 /*
16402 * Insert the new entry into the list.
16403 */
16404
16405 vm_map_store_entry_link(map, insp_entry, new_entry, vmk_flags);
16406 map->size += end - start;
16407
16408 /*
16409 * Update the free space hint and the lookup hint.
16410 */
16411
16412 SAVE_HINT_MAP_WRITE(map, new_entry);
16413 return new_entry;
16414 }
16415
16416 /*
16417 * Routine: vm_map_remap_extract
16418 *
16419 * Description: This routine returns a vm_entry list from a map.
16420 */
16421 static kern_return_t
vm_map_remap_extract(vm_map_t map,vm_map_offset_t addr,vm_map_size_t size,boolean_t copy,struct vm_map_header * map_header,vm_prot_t * cur_protection,vm_prot_t * max_protection,vm_inherit_t inheritance,vm_map_kernel_flags_t vmk_flags)16422 vm_map_remap_extract(
16423 vm_map_t map,
16424 vm_map_offset_t addr,
16425 vm_map_size_t size,
16426 boolean_t copy,
16427 struct vm_map_header *map_header,
16428 vm_prot_t *cur_protection, /* IN/OUT */
16429 vm_prot_t *max_protection, /* IN/OUT */
16430 /* What, no behavior? */
16431 vm_inherit_t inheritance,
16432 vm_map_kernel_flags_t vmk_flags)
16433 {
16434 kern_return_t result;
16435 vm_map_size_t mapped_size;
16436 vm_map_size_t tmp_size;
16437 vm_map_entry_t src_entry; /* result of last map lookup */
16438 vm_map_entry_t new_entry;
16439 vm_object_offset_t offset;
16440 vm_map_offset_t map_address;
16441 vm_map_offset_t src_start; /* start of entry to map */
16442 vm_map_offset_t src_end; /* end of region to be mapped */
16443 vm_object_t object;
16444 vm_map_version_t version;
16445 boolean_t src_needs_copy;
16446 boolean_t new_entry_needs_copy;
16447 vm_map_entry_t saved_src_entry;
16448 boolean_t src_entry_was_wired;
16449 vm_prot_t max_prot_for_prot_copy;
16450 vm_map_offset_t effective_page_mask;
16451 boolean_t pageable, same_map;
16452 boolean_t vm_remap_legacy;
16453 vm_prot_t required_cur_prot, required_max_prot;
16454
16455 pageable = vmk_flags.vmkf_copy_pageable;
16456 same_map = vmk_flags.vmkf_copy_same_map;
16457
16458 effective_page_mask = MIN(PAGE_MASK, VM_MAP_PAGE_MASK(map));
16459
16460 assert(map != VM_MAP_NULL);
16461 assert(size != 0);
16462 assert(size == vm_map_round_page(size, effective_page_mask));
16463 assert(inheritance == VM_INHERIT_NONE ||
16464 inheritance == VM_INHERIT_COPY ||
16465 inheritance == VM_INHERIT_SHARE);
16466 assert(!(*cur_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)));
16467 assert(!(*max_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)));
16468 assert((*cur_protection & *max_protection) == *cur_protection);
16469
16470 /*
16471 * Compute start and end of region.
16472 */
16473 src_start = vm_map_trunc_page(addr, effective_page_mask);
16474 src_end = vm_map_round_page(src_start + size, effective_page_mask);
16475
16476 /*
16477 * Initialize map_header.
16478 */
16479 map_header->links.next = CAST_TO_VM_MAP_ENTRY(&map_header->links);
16480 map_header->links.prev = CAST_TO_VM_MAP_ENTRY(&map_header->links);
16481 map_header->nentries = 0;
16482 map_header->entries_pageable = pageable;
16483 // map_header->page_shift = MIN(VM_MAP_PAGE_SHIFT(map), PAGE_SHIFT);
16484 map_header->page_shift = VM_MAP_PAGE_SHIFT(map);
16485 map_header->rb_head_store.rbh_root = (void *)(int)SKIP_RB_TREE;
16486
16487 vm_map_store_init( map_header );
16488
16489 if (copy && vmk_flags.vmkf_remap_prot_copy) {
16490 /*
16491 * Special case for vm_map_protect(VM_PROT_COPY):
16492 * we want to set the new mappings' max protection to the
16493 * specified *max_protection...
16494 */
16495 max_prot_for_prot_copy = *max_protection & (VM_PROT_ALL | VM_PROT_ALLEXEC);
16496 /* ... but we want to use the vm_remap() legacy mode */
16497 *max_protection = VM_PROT_NONE;
16498 *cur_protection = VM_PROT_NONE;
16499 } else {
16500 max_prot_for_prot_copy = VM_PROT_NONE;
16501 }
16502
16503 if (*cur_protection == VM_PROT_NONE &&
16504 *max_protection == VM_PROT_NONE) {
16505 /*
16506 * vm_remap() legacy mode:
16507 * Extract all memory regions in the specified range and
16508 * collect the strictest set of protections allowed on the
16509 * entire range, so the caller knows what they can do with
16510 * the remapped range.
16511 * We start with VM_PROT_ALL and we'll remove the protections
16512 * missing from each memory region.
16513 */
16514 vm_remap_legacy = TRUE;
16515 *cur_protection = VM_PROT_ALL;
16516 *max_protection = VM_PROT_ALL;
16517 required_cur_prot = VM_PROT_NONE;
16518 required_max_prot = VM_PROT_NONE;
16519 } else {
16520 /*
16521 * vm_remap_new() mode:
16522 * Extract all memory regions in the specified range and
16523 * ensure that they have at least the protections specified
16524 * by the caller via *cur_protection and *max_protection.
16525 * The resulting mapping should have these protections.
16526 */
16527 vm_remap_legacy = FALSE;
16528 if (copy) {
16529 required_cur_prot = VM_PROT_NONE;
16530 required_max_prot = VM_PROT_READ;
16531 } else {
16532 required_cur_prot = *cur_protection;
16533 required_max_prot = *max_protection;
16534 }
16535 }
16536
16537 map_address = 0;
16538 mapped_size = 0;
16539 result = KERN_SUCCESS;
16540
16541 /*
16542 * The specified source virtual space might correspond to
16543 * multiple map entries, need to loop on them.
16544 */
16545 vm_map_lock(map);
16546 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
16547 /*
16548 * This address space uses sub-pages so the range might
16549 * not be re-mappable in an address space with larger
16550 * pages. Re-assemble any broken-up VM map entries to
16551 * improve our chances of making it work.
16552 */
16553 vm_map_simplify_range(map, src_start, src_end);
16554 }
16555 while (mapped_size != size) {
16556 vm_map_size_t entry_size;
16557
16558 /*
16559 * Find the beginning of the region.
16560 */
16561 if (!vm_map_lookup_entry(map, src_start, &src_entry)) {
16562 result = KERN_INVALID_ADDRESS;
16563 break;
16564 }
16565
16566 if (src_start < src_entry->vme_start ||
16567 (mapped_size && src_start != src_entry->vme_start)) {
16568 result = KERN_INVALID_ADDRESS;
16569 break;
16570 }
16571
16572 tmp_size = size - mapped_size;
16573 if (src_end > src_entry->vme_end) {
16574 tmp_size -= (src_end - src_entry->vme_end);
16575 }
16576
16577 entry_size = (vm_map_size_t)(src_entry->vme_end -
16578 src_entry->vme_start);
16579
16580 if (src_entry->is_sub_map &&
16581 vmk_flags.vmkf_copy_single_object) {
16582 vm_map_t submap;
16583 vm_map_offset_t submap_start;
16584 vm_map_size_t submap_size;
16585 boolean_t submap_needs_copy;
16586
16587 /*
16588 * No check for "required protection" on "src_entry"
16589 * because the protections that matter are the ones
16590 * on the submap's VM map entry, which will be checked
16591 * during the call to vm_map_remap_extract() below.
16592 */
16593 submap_size = src_entry->vme_end - src_start;
16594 if (submap_size > size) {
16595 submap_size = size;
16596 }
16597 submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
16598 submap = VME_SUBMAP(src_entry);
16599 if (copy) {
16600 /*
16601 * The caller wants a copy-on-write re-mapping,
16602 * so let's extract from the submap accordingly.
16603 */
16604 submap_needs_copy = TRUE;
16605 } else if (src_entry->needs_copy) {
16606 /*
16607 * The caller wants a shared re-mapping but the
16608 * submap is mapped with "needs_copy", so its
16609 * contents can't be shared as is. Extract the
16610 * contents of the submap as "copy-on-write".
16611 * The re-mapping won't be shared with the
16612 * original mapping but this is equivalent to
16613 * what happened with the original "remap from
16614 * submap" code.
16615 * The shared region is mapped "needs_copy", for
16616 * example.
16617 */
16618 submap_needs_copy = TRUE;
16619 } else {
16620 /*
16621 * The caller wants a shared re-mapping and
16622 * this mapping can be shared (no "needs_copy"),
16623 * so let's extract from the submap accordingly.
16624 * Kernel submaps are mapped without
16625 * "needs_copy", for example.
16626 */
16627 submap_needs_copy = FALSE;
16628 }
16629 vm_map_reference(submap);
16630 vm_map_unlock(map);
16631 src_entry = NULL;
16632 if (vm_remap_legacy) {
16633 *cur_protection = VM_PROT_NONE;
16634 *max_protection = VM_PROT_NONE;
16635 }
16636
16637 DTRACE_VM7(remap_submap_recurse,
16638 vm_map_t, map,
16639 vm_map_offset_t, addr,
16640 vm_map_size_t, size,
16641 boolean_t, copy,
16642 vm_map_offset_t, submap_start,
16643 vm_map_size_t, submap_size,
16644 boolean_t, submap_needs_copy);
16645
16646 result = vm_map_remap_extract(submap,
16647 submap_start,
16648 submap_size,
16649 submap_needs_copy,
16650 map_header,
16651 cur_protection,
16652 max_protection,
16653 inheritance,
16654 vmk_flags);
16655 vm_map_deallocate(submap);
16656 return result;
16657 }
16658
16659 if (src_entry->is_sub_map) {
16660 /* protections for submap mapping are irrelevant here */
16661 } else if (((src_entry->protection & required_cur_prot) !=
16662 required_cur_prot) ||
16663 ((src_entry->max_protection & required_max_prot) !=
16664 required_max_prot)) {
16665 if (vmk_flags.vmkf_copy_single_object &&
16666 mapped_size != 0) {
16667 /*
16668 * Single object extraction.
16669 * We can't extract more with the required
16670 * protection but we've extracted some, so
16671 * stop there and declare success.
16672 * The caller should check the size of
16673 * the copy entry we've extracted.
16674 */
16675 result = KERN_SUCCESS;
16676 } else {
16677 /*
16678 * VM range extraction.
16679 * Required proctection is not available
16680 * for this part of the range: fail.
16681 */
16682 result = KERN_PROTECTION_FAILURE;
16683 }
16684 break;
16685 }
16686
16687 if (src_entry->is_sub_map) {
16688 vm_map_t submap;
16689 vm_map_offset_t submap_start;
16690 vm_map_size_t submap_size;
16691 vm_map_copy_t submap_copy;
16692 vm_prot_t submap_curprot, submap_maxprot;
16693 boolean_t submap_needs_copy;
16694
16695 /*
16696 * No check for "required protection" on "src_entry"
16697 * because the protections that matter are the ones
16698 * on the submap's VM map entry, which will be checked
16699 * during the call to vm_map_copy_extract() below.
16700 */
16701 object = VM_OBJECT_NULL;
16702 submap_copy = VM_MAP_COPY_NULL;
16703
16704 /* find equivalent range in the submap */
16705 submap = VME_SUBMAP(src_entry);
16706 submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
16707 submap_size = tmp_size;
16708 if (copy) {
16709 /*
16710 * The caller wants a copy-on-write re-mapping,
16711 * so let's extract from the submap accordingly.
16712 */
16713 submap_needs_copy = TRUE;
16714 } else if (src_entry->needs_copy) {
16715 /*
16716 * The caller wants a shared re-mapping but the
16717 * submap is mapped with "needs_copy", so its
16718 * contents can't be shared as is. Extract the
16719 * contents of the submap as "copy-on-write".
16720 * The re-mapping won't be shared with the
16721 * original mapping but this is equivalent to
16722 * what happened with the original "remap from
16723 * submap" code.
16724 * The shared region is mapped "needs_copy", for
16725 * example.
16726 */
16727 submap_needs_copy = TRUE;
16728 } else {
16729 /*
16730 * The caller wants a shared re-mapping and
16731 * this mapping can be shared (no "needs_copy"),
16732 * so let's extract from the submap accordingly.
16733 * Kernel submaps are mapped without
16734 * "needs_copy", for example.
16735 */
16736 submap_needs_copy = FALSE;
16737 }
16738 /* extra ref to keep submap alive */
16739 vm_map_reference(submap);
16740
16741 DTRACE_VM7(remap_submap_recurse,
16742 vm_map_t, map,
16743 vm_map_offset_t, addr,
16744 vm_map_size_t, size,
16745 boolean_t, copy,
16746 vm_map_offset_t, submap_start,
16747 vm_map_size_t, submap_size,
16748 boolean_t, submap_needs_copy);
16749
16750 /*
16751 * The map can be safely unlocked since we
16752 * already hold a reference on the submap.
16753 *
16754 * No timestamp since we don't care if the map
16755 * gets modified while we're down in the submap.
16756 * We'll resume the extraction at src_start + tmp_size
16757 * anyway.
16758 */
16759 vm_map_unlock(map);
16760 src_entry = NULL; /* not valid once map is unlocked */
16761
16762 if (vm_remap_legacy) {
16763 submap_curprot = VM_PROT_NONE;
16764 submap_maxprot = VM_PROT_NONE;
16765 if (max_prot_for_prot_copy) {
16766 submap_maxprot = max_prot_for_prot_copy;
16767 }
16768 } else {
16769 assert(!max_prot_for_prot_copy);
16770 submap_curprot = *cur_protection;
16771 submap_maxprot = *max_protection;
16772 }
16773 result = vm_map_copy_extract(submap,
16774 submap_start,
16775 submap_size,
16776 submap_needs_copy,
16777 &submap_copy,
16778 &submap_curprot,
16779 &submap_maxprot,
16780 inheritance,
16781 vmk_flags);
16782
16783 /* release extra ref on submap */
16784 vm_map_deallocate(submap);
16785 submap = VM_MAP_NULL;
16786
16787 if (result != KERN_SUCCESS) {
16788 vm_map_lock(map);
16789 break;
16790 }
16791
16792 /* transfer submap_copy entries to map_header */
16793 while (vm_map_copy_first_entry(submap_copy) !=
16794 vm_map_copy_to_entry(submap_copy)) {
16795 vm_map_entry_t copy_entry;
16796 vm_map_size_t copy_entry_size;
16797
16798 copy_entry = vm_map_copy_first_entry(submap_copy);
16799 assert(!copy_entry->is_sub_map);
16800 object = VME_OBJECT(copy_entry);
16801
16802 /*
16803 * Prevent kernel_object from being exposed to
16804 * user space.
16805 */
16806 if (__improbable(object == kernel_object)) {
16807 printf("%d[%s]: rejecting attempt to extract from kernel_object\n",
16808 proc_selfpid(),
16809 (current_task()->bsd_info
16810 ? proc_name_address(current_task()->bsd_info)
16811 : "?"));
16812 DTRACE_VM(extract_kernel_only);
16813 result = KERN_INVALID_RIGHT;
16814 vm_map_copy_discard(submap_copy);
16815 submap_copy = VM_MAP_COPY_NULL;
16816 vm_map_lock(map);
16817 break;
16818 }
16819
16820 vm_map_copy_entry_unlink(submap_copy, copy_entry);
16821 copy_entry_size = copy_entry->vme_end - copy_entry->vme_start;
16822 copy_entry->vme_start = map_address;
16823 copy_entry->vme_end = map_address + copy_entry_size;
16824 map_address += copy_entry_size;
16825 mapped_size += copy_entry_size;
16826 src_start += copy_entry_size;
16827 assert(src_start <= src_end);
16828 _vm_map_store_entry_link(map_header,
16829 map_header->links.prev,
16830 copy_entry);
16831 }
16832 /* done with submap_copy */
16833 vm_map_copy_discard(submap_copy);
16834
16835 if (vm_remap_legacy) {
16836 *cur_protection &= submap_curprot;
16837 *max_protection &= submap_maxprot;
16838 }
16839
16840 /* re-acquire the map lock and continue to next entry */
16841 vm_map_lock(map);
16842 continue;
16843 } else {
16844 object = VME_OBJECT(src_entry);
16845
16846 /*
16847 * Prevent kernel_object from being exposed to
16848 * user space.
16849 */
16850 if (__improbable(object == kernel_object)) {
16851 printf("%d[%s]: rejecting attempt to extract from kernel_object\n",
16852 proc_selfpid(),
16853 (current_task()->bsd_info
16854 ? proc_name_address(current_task()->bsd_info)
16855 : "?"));
16856 DTRACE_VM(extract_kernel_only);
16857 result = KERN_INVALID_RIGHT;
16858 break;
16859 }
16860
16861 if (src_entry->iokit_acct) {
16862 /*
16863 * This entry uses "IOKit accounting".
16864 */
16865 } else if (object != VM_OBJECT_NULL &&
16866 (object->purgable != VM_PURGABLE_DENY ||
16867 object->vo_ledger_tag != VM_LEDGER_TAG_NONE)) {
16868 /*
16869 * Purgeable objects have their own accounting:
16870 * no pmap accounting for them.
16871 */
16872 assertf(!src_entry->use_pmap,
16873 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
16874 map,
16875 src_entry,
16876 (uint64_t)src_entry->vme_start,
16877 (uint64_t)src_entry->vme_end,
16878 src_entry->protection,
16879 src_entry->max_protection,
16880 VME_ALIAS(src_entry));
16881 } else {
16882 /*
16883 * Not IOKit or purgeable:
16884 * must be accounted by pmap stats.
16885 */
16886 assertf(src_entry->use_pmap,
16887 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
16888 map,
16889 src_entry,
16890 (uint64_t)src_entry->vme_start,
16891 (uint64_t)src_entry->vme_end,
16892 src_entry->protection,
16893 src_entry->max_protection,
16894 VME_ALIAS(src_entry));
16895 }
16896
16897 if (object == VM_OBJECT_NULL) {
16898 assert(!src_entry->needs_copy);
16899 object = vm_object_allocate(entry_size);
16900 VME_OFFSET_SET(src_entry, 0);
16901 VME_OBJECT_SET(src_entry, object);
16902 assert(src_entry->use_pmap);
16903 assert(!map->mapped_in_other_pmaps);
16904 } else if (src_entry->wired_count ||
16905 object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
16906 /*
16907 * A wired memory region should not have
16908 * any pending copy-on-write and needs to
16909 * keep pointing at the VM object that
16910 * contains the wired pages.
16911 * If we're sharing this memory (copy=false),
16912 * we'll share this VM object.
16913 * If we're copying this memory (copy=true),
16914 * we'll call vm_object_copy_slowly() below
16915 * and use the new VM object for the remapping.
16916 *
16917 * Or, we are already using an asymmetric
16918 * copy, and therefore we already have
16919 * the right object.
16920 */
16921 assert(!src_entry->needs_copy);
16922 } else if (src_entry->needs_copy || object->shadowed ||
16923 (object->internal && !object->true_share &&
16924 !src_entry->is_shared &&
16925 object->vo_size > entry_size)) {
16926 VME_OBJECT_SHADOW(src_entry, entry_size);
16927 assert(src_entry->use_pmap);
16928
16929 if (!src_entry->needs_copy &&
16930 (src_entry->protection & VM_PROT_WRITE)) {
16931 vm_prot_t prot;
16932
16933 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
16934
16935 prot = src_entry->protection & ~VM_PROT_WRITE;
16936
16937 if (override_nx(map,
16938 VME_ALIAS(src_entry))
16939 && prot) {
16940 prot |= VM_PROT_EXECUTE;
16941 }
16942
16943 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
16944
16945 if (map->mapped_in_other_pmaps) {
16946 vm_object_pmap_protect(
16947 VME_OBJECT(src_entry),
16948 VME_OFFSET(src_entry),
16949 entry_size,
16950 PMAP_NULL,
16951 PAGE_SIZE,
16952 src_entry->vme_start,
16953 prot);
16954 #if MACH_ASSERT
16955 } else if (__improbable(map->pmap == PMAP_NULL)) {
16956 extern boolean_t vm_tests_in_progress;
16957 assert(vm_tests_in_progress);
16958 /*
16959 * Some VM tests (in vm_tests.c)
16960 * sometimes want to use a VM
16961 * map without a pmap.
16962 * Otherwise, this should never
16963 * happen.
16964 */
16965 #endif /* MACH_ASSERT */
16966 } else {
16967 pmap_protect(vm_map_pmap(map),
16968 src_entry->vme_start,
16969 src_entry->vme_end,
16970 prot);
16971 }
16972 }
16973
16974 object = VME_OBJECT(src_entry);
16975 src_entry->needs_copy = FALSE;
16976 }
16977
16978
16979 vm_object_lock(object);
16980 vm_object_reference_locked(object); /* object ref. for new entry */
16981 assert(!src_entry->needs_copy);
16982 if (object->copy_strategy ==
16983 MEMORY_OBJECT_COPY_SYMMETRIC) {
16984 /*
16985 * If we want to share this object (copy==0),
16986 * it needs to be COPY_DELAY.
16987 * If we want to copy this object (copy==1),
16988 * we can't just set "needs_copy" on our side
16989 * and expect the other side to do the same
16990 * (symmetrically), so we can't let the object
16991 * stay COPY_SYMMETRIC.
16992 * So we always switch from COPY_SYMMETRIC to
16993 * COPY_DELAY.
16994 */
16995 object->copy_strategy =
16996 MEMORY_OBJECT_COPY_DELAY;
16997 object->true_share = TRUE;
16998 }
16999 vm_object_unlock(object);
17000 }
17001
17002 offset = (VME_OFFSET(src_entry) +
17003 (src_start - src_entry->vme_start));
17004
17005 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
17006 vm_map_entry_copy(map, new_entry, src_entry);
17007 if (new_entry->is_sub_map) {
17008 /* clr address space specifics */
17009 new_entry->use_pmap = FALSE;
17010 } else if (copy) {
17011 /*
17012 * We're dealing with a copy-on-write operation,
17013 * so the resulting mapping should not inherit the
17014 * original mapping's accounting settings.
17015 * "use_pmap" should be reset to its default (TRUE)
17016 * so that the new mapping gets accounted for in
17017 * the task's memory footprint.
17018 */
17019 new_entry->use_pmap = TRUE;
17020 }
17021 /* "iokit_acct" was cleared in vm_map_entry_copy() */
17022 assert(!new_entry->iokit_acct);
17023
17024 new_entry->map_aligned = FALSE;
17025
17026 new_entry->vme_start = map_address;
17027 new_entry->vme_end = map_address + tmp_size;
17028 assert(new_entry->vme_start < new_entry->vme_end);
17029 if (copy && vmk_flags.vmkf_remap_prot_copy) {
17030 /*
17031 * Remapping for vm_map_protect(VM_PROT_COPY)
17032 * to convert a read-only mapping into a
17033 * copy-on-write version of itself but
17034 * with write access:
17035 * keep the original inheritance and add
17036 * VM_PROT_WRITE to the max protection.
17037 */
17038 new_entry->inheritance = src_entry->inheritance;
17039 new_entry->protection &= max_prot_for_prot_copy;
17040 new_entry->max_protection |= VM_PROT_WRITE;
17041 } else {
17042 new_entry->inheritance = inheritance;
17043 if (!vm_remap_legacy) {
17044 new_entry->protection = *cur_protection;
17045 new_entry->max_protection = *max_protection;
17046 }
17047 }
17048 VME_OFFSET_SET(new_entry, offset);
17049
17050 /*
17051 * The new region has to be copied now if required.
17052 */
17053 RestartCopy:
17054 if (!copy) {
17055 if (src_entry->used_for_jit == TRUE) {
17056 if (same_map) {
17057 } else if (!VM_MAP_POLICY_ALLOW_JIT_SHARING(map)) {
17058 /*
17059 * Cannot allow an entry describing a JIT
17060 * region to be shared across address spaces.
17061 */
17062 result = KERN_INVALID_ARGUMENT;
17063 break;
17064 }
17065 }
17066
17067 src_entry->is_shared = TRUE;
17068 new_entry->is_shared = TRUE;
17069 if (!(new_entry->is_sub_map)) {
17070 new_entry->needs_copy = FALSE;
17071 }
17072 } else if (src_entry->is_sub_map) {
17073 /* make this a COW sub_map if not already */
17074 assert(new_entry->wired_count == 0);
17075 new_entry->needs_copy = TRUE;
17076 object = VM_OBJECT_NULL;
17077 } else if (src_entry->wired_count == 0 &&
17078 !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) &&
17079 vm_object_copy_quickly(VME_OBJECT_PTR(new_entry),
17080 VME_OFFSET(new_entry),
17081 (new_entry->vme_end -
17082 new_entry->vme_start),
17083 &src_needs_copy,
17084 &new_entry_needs_copy)) {
17085 new_entry->needs_copy = new_entry_needs_copy;
17086 new_entry->is_shared = FALSE;
17087 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
17088
17089 /*
17090 * Handle copy_on_write semantics.
17091 */
17092 if (src_needs_copy && !src_entry->needs_copy) {
17093 vm_prot_t prot;
17094
17095 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
17096
17097 prot = src_entry->protection & ~VM_PROT_WRITE;
17098
17099 if (override_nx(map,
17100 VME_ALIAS(src_entry))
17101 && prot) {
17102 prot |= VM_PROT_EXECUTE;
17103 }
17104
17105 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
17106
17107 vm_object_pmap_protect(object,
17108 offset,
17109 entry_size,
17110 ((src_entry->is_shared
17111 || map->mapped_in_other_pmaps) ?
17112 PMAP_NULL : map->pmap),
17113 VM_MAP_PAGE_SIZE(map),
17114 src_entry->vme_start,
17115 prot);
17116
17117 assert(src_entry->wired_count == 0);
17118 src_entry->needs_copy = TRUE;
17119 }
17120 /*
17121 * Throw away the old object reference of the new entry.
17122 */
17123 vm_object_deallocate(object);
17124 } else {
17125 new_entry->is_shared = FALSE;
17126 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
17127
17128 src_entry_was_wired = (src_entry->wired_count > 0);
17129 saved_src_entry = src_entry;
17130 src_entry = VM_MAP_ENTRY_NULL;
17131
17132 /*
17133 * The map can be safely unlocked since we
17134 * already hold a reference on the object.
17135 *
17136 * Record the timestamp of the map for later
17137 * verification, and unlock the map.
17138 */
17139 version.main_timestamp = map->timestamp;
17140 vm_map_unlock(map); /* Increments timestamp once! */
17141
17142 /*
17143 * Perform the copy.
17144 */
17145 if (src_entry_was_wired > 0 ||
17146 (debug4k_no_cow_copyin &&
17147 VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT)) {
17148 vm_object_lock(object);
17149 result = vm_object_copy_slowly(
17150 object,
17151 offset,
17152 (new_entry->vme_end -
17153 new_entry->vme_start),
17154 THREAD_UNINT,
17155 VME_OBJECT_PTR(new_entry));
17156
17157 VME_OFFSET_SET(new_entry, offset - vm_object_trunc_page(offset));
17158 new_entry->needs_copy = FALSE;
17159 } else {
17160 vm_object_offset_t new_offset;
17161
17162 new_offset = VME_OFFSET(new_entry);
17163 result = vm_object_copy_strategically(
17164 object,
17165 offset,
17166 (new_entry->vme_end -
17167 new_entry->vme_start),
17168 VME_OBJECT_PTR(new_entry),
17169 &new_offset,
17170 &new_entry_needs_copy);
17171 if (new_offset != VME_OFFSET(new_entry)) {
17172 VME_OFFSET_SET(new_entry, new_offset);
17173 }
17174
17175 new_entry->needs_copy = new_entry_needs_copy;
17176 }
17177
17178 /*
17179 * Throw away the old object reference of the new entry.
17180 */
17181 vm_object_deallocate(object);
17182
17183 if (result != KERN_SUCCESS &&
17184 result != KERN_MEMORY_RESTART_COPY) {
17185 _vm_map_entry_dispose(map_header, new_entry);
17186 vm_map_lock(map);
17187 break;
17188 }
17189
17190 /*
17191 * Verify that the map has not substantially
17192 * changed while the copy was being made.
17193 */
17194
17195 vm_map_lock(map);
17196 if (version.main_timestamp + 1 != map->timestamp) {
17197 /*
17198 * Simple version comparison failed.
17199 *
17200 * Retry the lookup and verify that the
17201 * same object/offset are still present.
17202 */
17203 saved_src_entry = VM_MAP_ENTRY_NULL;
17204 vm_object_deallocate(VME_OBJECT(new_entry));
17205 _vm_map_entry_dispose(map_header, new_entry);
17206 if (result == KERN_MEMORY_RESTART_COPY) {
17207 result = KERN_SUCCESS;
17208 }
17209 continue;
17210 }
17211 /* map hasn't changed: src_entry is still valid */
17212 src_entry = saved_src_entry;
17213 saved_src_entry = VM_MAP_ENTRY_NULL;
17214
17215 if (result == KERN_MEMORY_RESTART_COPY) {
17216 vm_object_reference(object);
17217 goto RestartCopy;
17218 }
17219 }
17220
17221 _vm_map_store_entry_link(map_header,
17222 map_header->links.prev, new_entry);
17223
17224 /* protections for submap mapping are irrelevant here */
17225 if (vm_remap_legacy && !src_entry->is_sub_map) {
17226 *cur_protection &= src_entry->protection;
17227 *max_protection &= src_entry->max_protection;
17228 }
17229
17230 map_address += tmp_size;
17231 mapped_size += tmp_size;
17232 src_start += tmp_size;
17233
17234 if (vmk_flags.vmkf_copy_single_object) {
17235 if (mapped_size != size) {
17236 DEBUG4K_SHARE("map %p addr 0x%llx size 0x%llx clipped copy at mapped_size 0x%llx\n", map, (uint64_t)addr, (uint64_t)size, (uint64_t)mapped_size);
17237 if (src_entry->vme_next != vm_map_to_entry(map) &&
17238 VME_OBJECT(src_entry->vme_next) == VME_OBJECT(src_entry)) {
17239 /* XXX TODO4K */
17240 DEBUG4K_ERROR("could have extended copy to next entry...\n");
17241 }
17242 }
17243 break;
17244 }
17245 } /* end while */
17246
17247 vm_map_unlock(map);
17248 if (result != KERN_SUCCESS) {
17249 /*
17250 * Free all allocated elements.
17251 */
17252 for (src_entry = map_header->links.next;
17253 src_entry != CAST_TO_VM_MAP_ENTRY(&map_header->links);
17254 src_entry = new_entry) {
17255 new_entry = src_entry->vme_next;
17256 _vm_map_store_entry_unlink(map_header, src_entry);
17257 if (src_entry->is_sub_map) {
17258 vm_map_deallocate(VME_SUBMAP(src_entry));
17259 } else {
17260 vm_object_deallocate(VME_OBJECT(src_entry));
17261 }
17262 _vm_map_entry_dispose(map_header, src_entry);
17263 }
17264 }
17265 return result;
17266 }
17267
17268 bool
vm_map_is_exotic(vm_map_t map)17269 vm_map_is_exotic(
17270 vm_map_t map)
17271 {
17272 return VM_MAP_IS_EXOTIC(map);
17273 }
17274
17275 bool
vm_map_is_alien(vm_map_t map)17276 vm_map_is_alien(
17277 vm_map_t map)
17278 {
17279 return VM_MAP_IS_ALIEN(map);
17280 }
17281
17282 #if XNU_TARGET_OS_OSX
17283 void
vm_map_mark_alien(vm_map_t map)17284 vm_map_mark_alien(
17285 vm_map_t map)
17286 {
17287 vm_map_lock(map);
17288 map->is_alien = true;
17289 vm_map_unlock(map);
17290 }
17291
17292 void
vm_map_single_jit(vm_map_t map)17293 vm_map_single_jit(
17294 vm_map_t map)
17295 {
17296 vm_map_lock(map);
17297 map->single_jit = true;
17298 vm_map_unlock(map);
17299 }
17300 #endif /* XNU_TARGET_OS_OSX */
17301
17302 static kern_return_t
vm_map_copy_to_physcopy(vm_map_copy_t copy_map,vm_map_t target_map)17303 vm_map_copy_to_physcopy(
17304 vm_map_copy_t copy_map,
17305 vm_map_t target_map)
17306 {
17307 vm_map_size_t size;
17308 vm_map_entry_t entry;
17309 vm_map_entry_t new_entry;
17310 vm_object_t new_object;
17311 unsigned int pmap_flags;
17312 pmap_t new_pmap;
17313 vm_map_t new_map;
17314 vm_map_address_t src_start, src_end, src_cur;
17315 vm_map_address_t dst_start, dst_end, dst_cur;
17316 kern_return_t kr;
17317 void *kbuf;
17318
17319 /*
17320 * Perform the equivalent of vm_allocate() and memcpy().
17321 * Replace the mappings in "copy_map" with the newly allocated mapping.
17322 */
17323 DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) BEFORE\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
17324
17325 assert(copy_map->cpy_hdr.page_shift != VM_MAP_PAGE_MASK(target_map));
17326
17327 /* create a new pmap to map "copy_map" */
17328 pmap_flags = 0;
17329 assert(copy_map->cpy_hdr.page_shift == FOURK_PAGE_SHIFT);
17330 #if PMAP_CREATE_FORCE_4K_PAGES
17331 pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
17332 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
17333 pmap_flags |= PMAP_CREATE_64BIT;
17334 new_pmap = pmap_create_options(NULL, (vm_map_size_t)0, pmap_flags);
17335 if (new_pmap == NULL) {
17336 return KERN_RESOURCE_SHORTAGE;
17337 }
17338
17339 /* allocate new VM object */
17340 size = VM_MAP_ROUND_PAGE(copy_map->size, PAGE_MASK);
17341 new_object = vm_object_allocate(size);
17342 assert(new_object);
17343
17344 /* allocate new VM map entry */
17345 new_entry = vm_map_copy_entry_create(copy_map, FALSE);
17346 assert(new_entry);
17347
17348 /* finish initializing new VM map entry */
17349 new_entry->protection = VM_PROT_DEFAULT;
17350 new_entry->max_protection = VM_PROT_DEFAULT;
17351 new_entry->use_pmap = TRUE;
17352
17353 /* make new VM map entry point to new VM object */
17354 new_entry->vme_start = 0;
17355 new_entry->vme_end = size;
17356 VME_OBJECT_SET(new_entry, new_object);
17357 VME_OFFSET_SET(new_entry, 0);
17358
17359 /* create a new pageable VM map to map "copy_map" */
17360 new_map = vm_map_create(new_pmap, 0, MACH_VM_MAX_ADDRESS, TRUE);
17361 assert(new_map);
17362 vm_map_set_page_shift(new_map, copy_map->cpy_hdr.page_shift);
17363
17364 /* map "copy_map" in the new VM map */
17365 src_start = 0;
17366 kr = vm_map_copyout_internal(
17367 new_map,
17368 &src_start,
17369 copy_map,
17370 copy_map->size,
17371 FALSE, /* consume_on_success */
17372 VM_PROT_DEFAULT,
17373 VM_PROT_DEFAULT,
17374 VM_INHERIT_DEFAULT);
17375 assert(kr == KERN_SUCCESS);
17376 src_end = src_start + copy_map->size;
17377
17378 /* map "new_object" in the new VM map */
17379 vm_object_reference(new_object);
17380 dst_start = 0;
17381 kr = vm_map_enter(new_map,
17382 &dst_start,
17383 size,
17384 0, /* mask */
17385 VM_FLAGS_ANYWHERE,
17386 VM_MAP_KERNEL_FLAGS_NONE,
17387 VM_KERN_MEMORY_OSFMK,
17388 new_object,
17389 0, /* offset */
17390 FALSE, /* needs copy */
17391 VM_PROT_DEFAULT,
17392 VM_PROT_DEFAULT,
17393 VM_INHERIT_DEFAULT);
17394 assert(kr == KERN_SUCCESS);
17395 dst_end = dst_start + size;
17396
17397 /* get a kernel buffer */
17398 kbuf = kalloc_data(PAGE_SIZE, Z_WAITOK | Z_NOFAIL);
17399
17400 /* physically copy "copy_map" mappings to new VM object */
17401 for (src_cur = src_start, dst_cur = dst_start;
17402 src_cur < src_end;
17403 src_cur += PAGE_SIZE, dst_cur += PAGE_SIZE) {
17404 vm_size_t bytes;
17405
17406 bytes = PAGE_SIZE;
17407 if (src_cur + PAGE_SIZE > src_end) {
17408 /* partial copy for last page */
17409 bytes = src_end - src_cur;
17410 assert(bytes > 0 && bytes < PAGE_SIZE);
17411 /* rest of dst page should be zero-filled */
17412 }
17413 /* get bytes from src mapping */
17414 kr = copyinmap(new_map, src_cur, kbuf, bytes);
17415 if (kr != KERN_SUCCESS) {
17416 DEBUG4K_COPY("copyinmap(%p, 0x%llx, %p, 0x%llx) kr 0x%x\n", new_map, (uint64_t)src_cur, kbuf, (uint64_t)bytes, kr);
17417 }
17418 /* put bytes in dst mapping */
17419 assert(dst_cur < dst_end);
17420 assert(dst_cur + bytes <= dst_end);
17421 kr = copyoutmap(new_map, kbuf, dst_cur, bytes);
17422 if (kr != KERN_SUCCESS) {
17423 DEBUG4K_COPY("copyoutmap(%p, %p, 0x%llx, 0x%llx) kr 0x%x\n", new_map, kbuf, (uint64_t)dst_cur, (uint64_t)bytes, kr);
17424 }
17425 }
17426
17427 /* free kernel buffer */
17428 kfree_data(kbuf, PAGE_SIZE);
17429
17430 /* destroy new map */
17431 vm_map_destroy(new_map, VM_MAP_REMOVE_NO_FLAGS);
17432 new_map = VM_MAP_NULL;
17433
17434 /* dispose of the old map entries in "copy_map" */
17435 while (vm_map_copy_first_entry(copy_map) !=
17436 vm_map_copy_to_entry(copy_map)) {
17437 entry = vm_map_copy_first_entry(copy_map);
17438 vm_map_copy_entry_unlink(copy_map, entry);
17439 if (entry->is_sub_map) {
17440 vm_map_deallocate(VME_SUBMAP(entry));
17441 } else {
17442 vm_object_deallocate(VME_OBJECT(entry));
17443 }
17444 vm_map_copy_entry_dispose(copy_map, entry);
17445 }
17446
17447 /* change "copy_map"'s page_size to match "target_map" */
17448 copy_map->cpy_hdr.page_shift = VM_MAP_PAGE_SHIFT(target_map);
17449 copy_map->offset = 0;
17450 copy_map->size = size;
17451
17452 /* insert new map entry in "copy_map" */
17453 assert(vm_map_copy_last_entry(copy_map) == vm_map_copy_to_entry(copy_map));
17454 vm_map_copy_entry_link(copy_map, vm_map_copy_last_entry(copy_map), new_entry);
17455
17456 DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) AFTER\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
17457 return KERN_SUCCESS;
17458 }
17459
17460 void
17461 vm_map_copy_adjust_get_target_copy_map(
17462 vm_map_copy_t copy_map,
17463 vm_map_copy_t *target_copy_map_p);
17464 void
vm_map_copy_adjust_get_target_copy_map(vm_map_copy_t copy_map,vm_map_copy_t * target_copy_map_p)17465 vm_map_copy_adjust_get_target_copy_map(
17466 vm_map_copy_t copy_map,
17467 vm_map_copy_t *target_copy_map_p)
17468 {
17469 vm_map_copy_t target_copy_map;
17470 vm_map_entry_t entry, target_entry;
17471
17472 if (*target_copy_map_p != VM_MAP_COPY_NULL) {
17473 /* the caller already has a "target_copy_map": use it */
17474 return;
17475 }
17476
17477 /* the caller wants us to create a new copy of "copy_map" */
17478 target_copy_map = vm_map_copy_allocate();
17479 target_copy_map->type = copy_map->type;
17480 assert(target_copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17481 target_copy_map->offset = copy_map->offset;
17482 target_copy_map->size = copy_map->size;
17483 target_copy_map->cpy_hdr.page_shift = copy_map->cpy_hdr.page_shift;
17484 vm_map_store_init(&target_copy_map->cpy_hdr);
17485 for (entry = vm_map_copy_first_entry(copy_map);
17486 entry != vm_map_copy_to_entry(copy_map);
17487 entry = entry->vme_next) {
17488 target_entry = vm_map_copy_entry_create(target_copy_map, FALSE);
17489 vm_map_entry_copy_full(target_entry, entry);
17490 if (target_entry->is_sub_map) {
17491 vm_map_reference(VME_SUBMAP(target_entry));
17492 } else {
17493 vm_object_reference(VME_OBJECT(target_entry));
17494 }
17495 vm_map_copy_entry_link(
17496 target_copy_map,
17497 vm_map_copy_last_entry(target_copy_map),
17498 target_entry);
17499 }
17500 entry = VM_MAP_ENTRY_NULL;
17501 *target_copy_map_p = target_copy_map;
17502 }
17503
17504 void
17505 vm_map_copy_trim(
17506 vm_map_copy_t copy_map,
17507 int new_page_shift,
17508 vm_map_offset_t trim_start,
17509 vm_map_offset_t trim_end);
17510 void
vm_map_copy_trim(vm_map_copy_t copy_map,int new_page_shift,vm_map_offset_t trim_start,vm_map_offset_t trim_end)17511 vm_map_copy_trim(
17512 vm_map_copy_t copy_map,
17513 int new_page_shift,
17514 vm_map_offset_t trim_start,
17515 vm_map_offset_t trim_end)
17516 {
17517 int copy_page_shift;
17518 vm_map_entry_t entry, next_entry;
17519
17520 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17521 assert(copy_map->cpy_hdr.nentries > 0);
17522
17523 trim_start += vm_map_copy_first_entry(copy_map)->vme_start;
17524 trim_end += vm_map_copy_first_entry(copy_map)->vme_start;
17525
17526 /* use the new page_shift to do the clipping */
17527 copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
17528 copy_map->cpy_hdr.page_shift = new_page_shift;
17529
17530 for (entry = vm_map_copy_first_entry(copy_map);
17531 entry != vm_map_copy_to_entry(copy_map);
17532 entry = next_entry) {
17533 next_entry = entry->vme_next;
17534 if (entry->vme_end <= trim_start) {
17535 /* entry fully before trim range: skip */
17536 continue;
17537 }
17538 if (entry->vme_start >= trim_end) {
17539 /* entry fully after trim range: done */
17540 break;
17541 }
17542 /* clip entry if needed */
17543 vm_map_copy_clip_start(copy_map, entry, trim_start);
17544 vm_map_copy_clip_end(copy_map, entry, trim_end);
17545 /* dispose of entry */
17546 copy_map->size -= entry->vme_end - entry->vme_start;
17547 vm_map_copy_entry_unlink(copy_map, entry);
17548 if (entry->is_sub_map) {
17549 vm_map_deallocate(VME_SUBMAP(entry));
17550 } else {
17551 vm_object_deallocate(VME_OBJECT(entry));
17552 }
17553 vm_map_copy_entry_dispose(copy_map, entry);
17554 entry = VM_MAP_ENTRY_NULL;
17555 }
17556
17557 /* restore copy_map's original page_shift */
17558 copy_map->cpy_hdr.page_shift = copy_page_shift;
17559 }
17560
17561 /*
17562 * Make any necessary adjustments to "copy_map" to allow it to be
17563 * mapped into "target_map".
17564 * If no changes were necessary, "target_copy_map" points to the
17565 * untouched "copy_map".
17566 * If changes are necessary, changes will be made to "target_copy_map".
17567 * If "target_copy_map" was NULL, we create a new "vm_map_copy_t" and
17568 * copy the original "copy_map" to it before applying the changes.
17569 * The caller should discard "target_copy_map" if it's not the same as
17570 * the original "copy_map".
17571 */
17572 /* TODO4K: also adjust to sub-range in the copy_map -> add start&end? */
17573 kern_return_t
vm_map_copy_adjust_to_target(vm_map_copy_t src_copy_map,vm_map_offset_t offset,vm_map_size_t size,vm_map_t target_map,boolean_t copy,vm_map_copy_t * target_copy_map_p,vm_map_offset_t * overmap_start_p,vm_map_offset_t * overmap_end_p,vm_map_offset_t * trimmed_start_p)17574 vm_map_copy_adjust_to_target(
17575 vm_map_copy_t src_copy_map,
17576 vm_map_offset_t offset,
17577 vm_map_size_t size,
17578 vm_map_t target_map,
17579 boolean_t copy,
17580 vm_map_copy_t *target_copy_map_p,
17581 vm_map_offset_t *overmap_start_p,
17582 vm_map_offset_t *overmap_end_p,
17583 vm_map_offset_t *trimmed_start_p)
17584 {
17585 vm_map_copy_t copy_map, target_copy_map;
17586 vm_map_size_t target_size;
17587 vm_map_size_t src_copy_map_size;
17588 vm_map_size_t overmap_start, overmap_end;
17589 int misalignments;
17590 vm_map_entry_t entry, target_entry;
17591 vm_map_offset_t addr_adjustment;
17592 vm_map_offset_t new_start, new_end;
17593 int copy_page_mask, target_page_mask;
17594 int copy_page_shift, target_page_shift;
17595 vm_map_offset_t trimmed_end;
17596
17597 /*
17598 * Assert that the vm_map_copy is coming from the right
17599 * zone and hasn't been forged
17600 */
17601 vm_map_copy_require(src_copy_map);
17602 assert(src_copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17603
17604 /*
17605 * Start working with "src_copy_map" but we'll switch
17606 * to "target_copy_map" as soon as we start making adjustments.
17607 */
17608 copy_map = src_copy_map;
17609 src_copy_map_size = src_copy_map->size;
17610
17611 copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
17612 copy_page_mask = VM_MAP_COPY_PAGE_MASK(copy_map);
17613 target_page_shift = VM_MAP_PAGE_SHIFT(target_map);
17614 target_page_mask = VM_MAP_PAGE_MASK(target_map);
17615
17616 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p...\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, *target_copy_map_p);
17617
17618 target_copy_map = *target_copy_map_p;
17619 if (target_copy_map != VM_MAP_COPY_NULL) {
17620 vm_map_copy_require(target_copy_map);
17621 }
17622
17623 if (offset + size > copy_map->size) {
17624 DEBUG4K_ERROR("copy_map %p (%d->%d) copy_map->size 0x%llx offset 0x%llx size 0x%llx KERN_INVALID_ARGUMENT\n", copy_map, copy_page_shift, target_page_shift, (uint64_t)copy_map->size, (uint64_t)offset, (uint64_t)size);
17625 return KERN_INVALID_ARGUMENT;
17626 }
17627
17628 /* trim the end */
17629 trimmed_end = 0;
17630 new_end = VM_MAP_ROUND_PAGE(offset + size, target_page_mask);
17631 if (new_end < copy_map->size) {
17632 trimmed_end = src_copy_map_size - new_end;
17633 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim end from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)new_end, (uint64_t)copy_map->size);
17634 /* get "target_copy_map" if needed and adjust it */
17635 vm_map_copy_adjust_get_target_copy_map(copy_map,
17636 &target_copy_map);
17637 copy_map = target_copy_map;
17638 vm_map_copy_trim(target_copy_map, target_page_shift,
17639 new_end, copy_map->size);
17640 }
17641
17642 /* trim the start */
17643 new_start = VM_MAP_TRUNC_PAGE(offset, target_page_mask);
17644 if (new_start != 0) {
17645 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim start from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)0, (uint64_t)new_start);
17646 /* get "target_copy_map" if needed and adjust it */
17647 vm_map_copy_adjust_get_target_copy_map(copy_map,
17648 &target_copy_map);
17649 copy_map = target_copy_map;
17650 vm_map_copy_trim(target_copy_map, target_page_shift,
17651 0, new_start);
17652 }
17653 *trimmed_start_p = new_start;
17654
17655 /* target_size starts with what's left after trimming */
17656 target_size = copy_map->size;
17657 assertf(target_size == src_copy_map_size - *trimmed_start_p - trimmed_end,
17658 "target_size 0x%llx src_copy_map_size 0x%llx trimmed_start 0x%llx trimmed_end 0x%llx\n",
17659 (uint64_t)target_size, (uint64_t)src_copy_map_size,
17660 (uint64_t)*trimmed_start_p, (uint64_t)trimmed_end);
17661
17662 /* check for misalignments but don't adjust yet */
17663 misalignments = 0;
17664 overmap_start = 0;
17665 overmap_end = 0;
17666 if (copy_page_shift < target_page_shift) {
17667 /*
17668 * Remapping from 4K to 16K: check the VM object alignments
17669 * throughout the range.
17670 * If the start and end of the range are mis-aligned, we can
17671 * over-map to re-align, and adjust the "overmap" start/end
17672 * and "target_size" of the range accordingly.
17673 * If there is any mis-alignment within the range:
17674 * if "copy":
17675 * we can do immediate-copy instead of copy-on-write,
17676 * else:
17677 * no way to remap and share; fail.
17678 */
17679 for (entry = vm_map_copy_first_entry(copy_map);
17680 entry != vm_map_copy_to_entry(copy_map);
17681 entry = entry->vme_next) {
17682 vm_object_offset_t object_offset_start, object_offset_end;
17683
17684 object_offset_start = VME_OFFSET(entry);
17685 object_offset_end = object_offset_start;
17686 object_offset_end += entry->vme_end - entry->vme_start;
17687 if (object_offset_start & target_page_mask) {
17688 if (entry == vm_map_copy_first_entry(copy_map) && !copy) {
17689 overmap_start++;
17690 } else {
17691 misalignments++;
17692 }
17693 }
17694 if (object_offset_end & target_page_mask) {
17695 if (entry->vme_next == vm_map_copy_to_entry(copy_map) && !copy) {
17696 overmap_end++;
17697 } else {
17698 misalignments++;
17699 }
17700 }
17701 }
17702 }
17703 entry = VM_MAP_ENTRY_NULL;
17704
17705 /* decide how to deal with misalignments */
17706 assert(overmap_start <= 1);
17707 assert(overmap_end <= 1);
17708 if (!overmap_start && !overmap_end && !misalignments) {
17709 /* copy_map is properly aligned for target_map ... */
17710 if (*trimmed_start_p) {
17711 /* ... but we trimmed it, so still need to adjust */
17712 } else {
17713 /* ... and we didn't trim anything: we're done */
17714 if (target_copy_map == VM_MAP_COPY_NULL) {
17715 target_copy_map = copy_map;
17716 }
17717 *target_copy_map_p = target_copy_map;
17718 *overmap_start_p = 0;
17719 *overmap_end_p = 0;
17720 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17721 return KERN_SUCCESS;
17722 }
17723 } else if (misalignments && !copy) {
17724 /* can't "share" if misaligned */
17725 DEBUG4K_ADJUST("unsupported sharing\n");
17726 #if MACH_ASSERT
17727 if (debug4k_panic_on_misaligned_sharing) {
17728 panic("DEBUG4k %s:%d unsupported sharing", __FUNCTION__, __LINE__);
17729 }
17730 #endif /* MACH_ASSERT */
17731 DEBUG4K_ADJUST("copy_map %p (%d) target_map %p (%d) copy %d target_copy_map %p -> KERN_NOT_SUPPORTED\n", copy_map, copy_page_shift, target_map, target_page_shift, copy, *target_copy_map_p);
17732 return KERN_NOT_SUPPORTED;
17733 } else {
17734 /* can't virtual-copy if misaligned (but can physical-copy) */
17735 DEBUG4K_ADJUST("mis-aligned copying\n");
17736 }
17737
17738 /* get a "target_copy_map" if needed and switch to it */
17739 vm_map_copy_adjust_get_target_copy_map(copy_map, &target_copy_map);
17740 copy_map = target_copy_map;
17741
17742 if (misalignments && copy) {
17743 vm_map_size_t target_copy_map_size;
17744
17745 /*
17746 * Can't do copy-on-write with misaligned mappings.
17747 * Replace the mappings with a physical copy of the original
17748 * mappings' contents.
17749 */
17750 target_copy_map_size = target_copy_map->size;
17751 kern_return_t kr = vm_map_copy_to_physcopy(target_copy_map, target_map);
17752 if (kr != KERN_SUCCESS) {
17753 return kr;
17754 }
17755 *target_copy_map_p = target_copy_map;
17756 *overmap_start_p = 0;
17757 *overmap_end_p = target_copy_map->size - target_copy_map_size;
17758 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx)-> trimmed 0x%llx overmap start 0x%llx end 0x%llx PHYSCOPY\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17759 return KERN_SUCCESS;
17760 }
17761
17762 /* apply the adjustments */
17763 misalignments = 0;
17764 overmap_start = 0;
17765 overmap_end = 0;
17766 /* remove copy_map->offset, so that everything starts at offset 0 */
17767 addr_adjustment = copy_map->offset;
17768 /* also remove whatever we trimmed from the start */
17769 addr_adjustment += *trimmed_start_p;
17770 for (target_entry = vm_map_copy_first_entry(target_copy_map);
17771 target_entry != vm_map_copy_to_entry(target_copy_map);
17772 target_entry = target_entry->vme_next) {
17773 vm_object_offset_t object_offset_start, object_offset_end;
17774
17775 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx BEFORE\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17776 object_offset_start = VME_OFFSET(target_entry);
17777 if (object_offset_start & target_page_mask) {
17778 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at start\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17779 if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
17780 /*
17781 * start of 1st entry is mis-aligned:
17782 * re-adjust by over-mapping.
17783 */
17784 overmap_start = object_offset_start - trunc_page_mask_64(object_offset_start, target_page_mask);
17785 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_start 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_start);
17786 VME_OFFSET_SET(target_entry, VME_OFFSET(target_entry) - overmap_start);
17787 } else {
17788 misalignments++;
17789 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
17790 assert(copy);
17791 }
17792 }
17793
17794 if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
17795 target_size += overmap_start;
17796 } else {
17797 target_entry->vme_start += overmap_start;
17798 }
17799 target_entry->vme_end += overmap_start;
17800
17801 object_offset_end = VME_OFFSET(target_entry) + target_entry->vme_end - target_entry->vme_start;
17802 if (object_offset_end & target_page_mask) {
17803 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at end\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17804 if (target_entry->vme_next == vm_map_copy_to_entry(target_copy_map)) {
17805 /*
17806 * end of last entry is mis-aligned: re-adjust by over-mapping.
17807 */
17808 overmap_end = round_page_mask_64(object_offset_end, target_page_mask) - object_offset_end;
17809 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_end 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_end);
17810 target_entry->vme_end += overmap_end;
17811 target_size += overmap_end;
17812 } else {
17813 misalignments++;
17814 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
17815 assert(copy);
17816 }
17817 }
17818 target_entry->vme_start -= addr_adjustment;
17819 target_entry->vme_end -= addr_adjustment;
17820 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx AFTER\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17821 }
17822
17823 target_copy_map->size = target_size;
17824 target_copy_map->offset += overmap_start;
17825 target_copy_map->offset -= addr_adjustment;
17826 target_copy_map->cpy_hdr.page_shift = target_page_shift;
17827
17828 // assert(VM_MAP_PAGE_ALIGNED(target_copy_map->size, target_page_mask));
17829 // assert(VM_MAP_PAGE_ALIGNED(target_copy_map->offset, FOURK_PAGE_MASK));
17830 assert(overmap_start < VM_MAP_PAGE_SIZE(target_map));
17831 assert(overmap_end < VM_MAP_PAGE_SIZE(target_map));
17832
17833 *target_copy_map_p = target_copy_map;
17834 *overmap_start_p = overmap_start;
17835 *overmap_end_p = overmap_end;
17836
17837 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17838 return KERN_SUCCESS;
17839 }
17840
17841 kern_return_t
vm_map_range_physical_size(vm_map_t map,vm_map_address_t start,mach_vm_size_t size,mach_vm_size_t * phys_size)17842 vm_map_range_physical_size(
17843 vm_map_t map,
17844 vm_map_address_t start,
17845 mach_vm_size_t size,
17846 mach_vm_size_t * phys_size)
17847 {
17848 kern_return_t kr;
17849 vm_map_copy_t copy_map, target_copy_map;
17850 vm_map_offset_t adjusted_start, adjusted_end;
17851 vm_map_size_t adjusted_size;
17852 vm_prot_t cur_prot, max_prot;
17853 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
17854 vm_map_kernel_flags_t vmk_flags;
17855
17856 adjusted_start = vm_map_trunc_page(start, VM_MAP_PAGE_MASK(map));
17857 adjusted_end = vm_map_round_page(start + size, VM_MAP_PAGE_MASK(map));
17858 adjusted_size = adjusted_end - adjusted_start;
17859 *phys_size = adjusted_size;
17860 if (VM_MAP_PAGE_SIZE(map) == PAGE_SIZE) {
17861 return KERN_SUCCESS;
17862 }
17863 if (start == 0) {
17864 adjusted_start = vm_map_trunc_page(start, PAGE_MASK);
17865 adjusted_end = vm_map_round_page(start + size, PAGE_MASK);
17866 adjusted_size = adjusted_end - adjusted_start;
17867 *phys_size = adjusted_size;
17868 return KERN_SUCCESS;
17869 }
17870 if (adjusted_size == 0) {
17871 DEBUG4K_SHARE("map %p start 0x%llx size 0x%llx adjusted 0x%llx -> phys_size 0!\n", map, (uint64_t)start, (uint64_t)size, (uint64_t)adjusted_size);
17872 *phys_size = 0;
17873 return KERN_SUCCESS;
17874 }
17875
17876 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
17877 vmk_flags.vmkf_copy_pageable = TRUE;
17878 vmk_flags.vmkf_copy_same_map = TRUE;
17879 assert(adjusted_size != 0);
17880 cur_prot = VM_PROT_NONE; /* legacy mode */
17881 max_prot = VM_PROT_NONE; /* legacy mode */
17882 kr = vm_map_copy_extract(map, adjusted_start, adjusted_size,
17883 FALSE /* copy */,
17884 ©_map,
17885 &cur_prot, &max_prot, VM_INHERIT_DEFAULT,
17886 vmk_flags);
17887 if (kr != KERN_SUCCESS) {
17888 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
17889 //assert(0);
17890 *phys_size = 0;
17891 return kr;
17892 }
17893 assert(copy_map != VM_MAP_COPY_NULL);
17894 target_copy_map = copy_map;
17895 DEBUG4K_ADJUST("adjusting...\n");
17896 kr = vm_map_copy_adjust_to_target(
17897 copy_map,
17898 start - adjusted_start, /* offset */
17899 size, /* size */
17900 kernel_map,
17901 FALSE, /* copy */
17902 &target_copy_map,
17903 &overmap_start,
17904 &overmap_end,
17905 &trimmed_start);
17906 if (kr == KERN_SUCCESS) {
17907 if (target_copy_map->size != *phys_size) {
17908 DEBUG4K_ADJUST("map %p (%d) start 0x%llx size 0x%llx adjusted_start 0x%llx adjusted_end 0x%llx overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx phys_size 0x%llx -> 0x%llx\n", map, VM_MAP_PAGE_SHIFT(map), (uint64_t)start, (uint64_t)size, (uint64_t)adjusted_start, (uint64_t)adjusted_end, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start, (uint64_t)*phys_size, (uint64_t)target_copy_map->size);
17909 }
17910 *phys_size = target_copy_map->size;
17911 } else {
17912 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
17913 //assert(0);
17914 *phys_size = 0;
17915 }
17916 vm_map_copy_discard(copy_map);
17917 copy_map = VM_MAP_COPY_NULL;
17918
17919 return kr;
17920 }
17921
17922
17923 kern_return_t
memory_entry_check_for_adjustment(vm_map_t src_map,ipc_port_t port,vm_map_offset_t * overmap_start,vm_map_offset_t * overmap_end)17924 memory_entry_check_for_adjustment(
17925 vm_map_t src_map,
17926 ipc_port_t port,
17927 vm_map_offset_t *overmap_start,
17928 vm_map_offset_t *overmap_end)
17929 {
17930 kern_return_t kr = KERN_SUCCESS;
17931 vm_map_copy_t copy_map = VM_MAP_COPY_NULL, target_copy_map = VM_MAP_COPY_NULL;
17932
17933 assert(port);
17934 assertf(ip_kotype(port) == IKOT_NAMED_ENTRY, "Port Type expected: %d...received:%d\n", IKOT_NAMED_ENTRY, ip_kotype(port));
17935
17936 vm_named_entry_t named_entry;
17937
17938 named_entry = mach_memory_entry_from_port(port);
17939 named_entry_lock(named_entry);
17940 copy_map = named_entry->backing.copy;
17941 target_copy_map = copy_map;
17942
17943 if (src_map && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT) {
17944 vm_map_offset_t trimmed_start;
17945
17946 trimmed_start = 0;
17947 DEBUG4K_ADJUST("adjusting...\n");
17948 kr = vm_map_copy_adjust_to_target(
17949 copy_map,
17950 0, /* offset */
17951 copy_map->size, /* size */
17952 src_map,
17953 FALSE, /* copy */
17954 &target_copy_map,
17955 overmap_start,
17956 overmap_end,
17957 &trimmed_start);
17958 assert(trimmed_start == 0);
17959 }
17960 named_entry_unlock(named_entry);
17961
17962 return kr;
17963 }
17964
17965
17966 /*
17967 * Routine: vm_remap
17968 *
17969 * Map portion of a task's address space.
17970 * Mapped region must not overlap more than
17971 * one vm memory object. Protections and
17972 * inheritance attributes remain the same
17973 * as in the original task and are out parameters.
17974 * Source and Target task can be identical
17975 * Other attributes are identical as for vm_map()
17976 */
17977 kern_return_t
vm_map_remap(vm_map_t target_map,vm_map_address_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,vm_map_t src_map,vm_map_offset_t memory_address,boolean_t copy,vm_prot_t * cur_protection,vm_prot_t * max_protection,vm_inherit_t inheritance)17978 vm_map_remap(
17979 vm_map_t target_map,
17980 vm_map_address_t *address,
17981 vm_map_size_t size,
17982 vm_map_offset_t mask,
17983 int flags,
17984 vm_map_kernel_flags_t vmk_flags,
17985 vm_tag_t tag,
17986 vm_map_t src_map,
17987 vm_map_offset_t memory_address,
17988 boolean_t copy,
17989 vm_prot_t *cur_protection, /* IN/OUT */
17990 vm_prot_t *max_protection, /* IN/OUT */
17991 vm_inherit_t inheritance)
17992 {
17993 kern_return_t result;
17994 vm_map_entry_t entry;
17995 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
17996 vm_map_entry_t new_entry;
17997 vm_map_copy_t copy_map;
17998 vm_map_offset_t offset_in_mapping;
17999 vm_map_size_t target_size = 0;
18000 vm_map_size_t src_page_mask, target_page_mask;
18001 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
18002 vm_map_offset_t initial_memory_address;
18003 vm_map_size_t initial_size;
18004
18005 if (target_map == VM_MAP_NULL) {
18006 return KERN_INVALID_ARGUMENT;
18007 }
18008
18009 initial_memory_address = memory_address;
18010 initial_size = size;
18011 src_page_mask = VM_MAP_PAGE_MASK(src_map);
18012 target_page_mask = VM_MAP_PAGE_MASK(target_map);
18013
18014 switch (inheritance) {
18015 case VM_INHERIT_NONE:
18016 case VM_INHERIT_COPY:
18017 case VM_INHERIT_SHARE:
18018 if (size != 0 && src_map != VM_MAP_NULL) {
18019 break;
18020 }
18021 OS_FALLTHROUGH;
18022 default:
18023 return KERN_INVALID_ARGUMENT;
18024 }
18025
18026 if (src_page_mask != target_page_mask) {
18027 if (copy) {
18028 DEBUG4K_COPY("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map, VM_MAP_PAGE_SIZE(src_map), (uint64_t)memory_address, (uint64_t)size, copy, target_map, VM_MAP_PAGE_SIZE(target_map));
18029 } else {
18030 DEBUG4K_SHARE("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map, VM_MAP_PAGE_SIZE(src_map), (uint64_t)memory_address, (uint64_t)size, copy, target_map, VM_MAP_PAGE_SIZE(target_map));
18031 }
18032 }
18033
18034 /*
18035 * If the user is requesting that we return the address of the
18036 * first byte of the data (rather than the base of the page),
18037 * then we use different rounding semantics: specifically,
18038 * we assume that (memory_address, size) describes a region
18039 * all of whose pages we must cover, rather than a base to be truncated
18040 * down and a size to be added to that base. So we figure out
18041 * the highest page that the requested region includes and make
18042 * sure that the size will cover it.
18043 *
18044 * The key example we're worried about it is of the form:
18045 *
18046 * memory_address = 0x1ff0, size = 0x20
18047 *
18048 * With the old semantics, we round down the memory_address to 0x1000
18049 * and round up the size to 0x1000, resulting in our covering *only*
18050 * page 0x1000. With the new semantics, we'd realize that the region covers
18051 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
18052 * 0x1000 and page 0x2000 in the region we remap.
18053 */
18054 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
18055 vm_map_offset_t range_start, range_end;
18056
18057 range_start = vm_map_trunc_page(memory_address, src_page_mask);
18058 range_end = vm_map_round_page(memory_address + size, src_page_mask);
18059 memory_address = range_start;
18060 size = range_end - range_start;
18061 offset_in_mapping = initial_memory_address - memory_address;
18062 } else {
18063 /*
18064 * IMPORTANT:
18065 * This legacy code path is broken: for the range mentioned
18066 * above [ memory_address = 0x1ff0,size = 0x20 ], which spans
18067 * two 4k pages, it yields [ memory_address = 0x1000,
18068 * size = 0x1000 ], which covers only the first 4k page.
18069 * BUT some code unfortunately depends on this bug, so we
18070 * can't fix it without breaking something.
18071 * New code should get automatically opted in the new
18072 * behavior with the new VM_FLAGS_RETURN_DATA_ADDR flags.
18073 */
18074 offset_in_mapping = 0;
18075 memory_address = vm_map_trunc_page(memory_address, src_page_mask);
18076 size = vm_map_round_page(size, src_page_mask);
18077 initial_memory_address = memory_address;
18078 initial_size = size;
18079 }
18080
18081
18082 if (size == 0) {
18083 return KERN_INVALID_ARGUMENT;
18084 }
18085
18086 if (flags & VM_FLAGS_RESILIENT_MEDIA) {
18087 /* must be copy-on-write to be "media resilient" */
18088 if (!copy) {
18089 return KERN_INVALID_ARGUMENT;
18090 }
18091 }
18092
18093 vmk_flags.vmkf_copy_pageable = target_map->hdr.entries_pageable;
18094 vmk_flags.vmkf_copy_same_map = (src_map == target_map);
18095
18096 assert(size != 0);
18097 result = vm_map_copy_extract(src_map,
18098 memory_address,
18099 size,
18100 copy, ©_map,
18101 cur_protection, /* IN/OUT */
18102 max_protection, /* IN/OUT */
18103 inheritance,
18104 vmk_flags);
18105 if (result != KERN_SUCCESS) {
18106 return result;
18107 }
18108 assert(copy_map != VM_MAP_COPY_NULL);
18109
18110 overmap_start = 0;
18111 overmap_end = 0;
18112 trimmed_start = 0;
18113 target_size = size;
18114 if (src_page_mask != target_page_mask) {
18115 vm_map_copy_t target_copy_map;
18116
18117 target_copy_map = copy_map; /* can modify "copy_map" itself */
18118 DEBUG4K_ADJUST("adjusting...\n");
18119 result = vm_map_copy_adjust_to_target(
18120 copy_map,
18121 offset_in_mapping, /* offset */
18122 initial_size,
18123 target_map,
18124 copy,
18125 &target_copy_map,
18126 &overmap_start,
18127 &overmap_end,
18128 &trimmed_start);
18129 if (result != KERN_SUCCESS) {
18130 DEBUG4K_COPY("failed to adjust 0x%x\n", result);
18131 vm_map_copy_discard(copy_map);
18132 return result;
18133 }
18134 if (trimmed_start == 0) {
18135 /* nothing trimmed: no adjustment needed */
18136 } else if (trimmed_start >= offset_in_mapping) {
18137 /* trimmed more than offset_in_mapping: nothing left */
18138 assert(overmap_start == 0);
18139 assert(overmap_end == 0);
18140 offset_in_mapping = 0;
18141 } else {
18142 /* trimmed some of offset_in_mapping: adjust */
18143 assert(overmap_start == 0);
18144 assert(overmap_end == 0);
18145 offset_in_mapping -= trimmed_start;
18146 }
18147 offset_in_mapping += overmap_start;
18148 target_size = target_copy_map->size;
18149 }
18150
18151 /*
18152 * Allocate/check a range of free virtual address
18153 * space for the target
18154 */
18155 *address = vm_map_trunc_page(*address, target_page_mask);
18156 vm_map_lock(target_map);
18157 target_size = vm_map_round_page(target_size, target_page_mask);
18158 result = vm_map_remap_range_allocate(target_map, address,
18159 target_size,
18160 mask, flags, vmk_flags, tag,
18161 &insp_entry);
18162
18163 for (entry = vm_map_copy_first_entry(copy_map);
18164 entry != vm_map_copy_to_entry(copy_map);
18165 entry = new_entry) {
18166 new_entry = entry->vme_next;
18167 vm_map_copy_entry_unlink(copy_map, entry);
18168 if (result == KERN_SUCCESS) {
18169 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
18170 /* no codesigning -> read-only access */
18171 entry->max_protection = VM_PROT_READ;
18172 entry->protection = VM_PROT_READ;
18173 entry->vme_resilient_codesign = TRUE;
18174 }
18175 entry->vme_start += *address;
18176 entry->vme_end += *address;
18177 assert(!entry->map_aligned);
18178 if ((flags & VM_FLAGS_RESILIENT_MEDIA) &&
18179 !entry->is_sub_map &&
18180 (VME_OBJECT(entry) == VM_OBJECT_NULL ||
18181 VME_OBJECT(entry)->internal)) {
18182 entry->vme_resilient_media = TRUE;
18183 }
18184 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start, MIN(target_page_mask, PAGE_MASK)));
18185 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end, MIN(target_page_mask, PAGE_MASK)));
18186 assert(VM_MAP_PAGE_ALIGNED(VME_OFFSET(entry), MIN(target_page_mask, PAGE_MASK)));
18187 vm_map_store_entry_link(target_map, insp_entry, entry,
18188 vmk_flags);
18189 insp_entry = entry;
18190 } else {
18191 if (!entry->is_sub_map) {
18192 vm_object_deallocate(VME_OBJECT(entry));
18193 } else {
18194 vm_map_deallocate(VME_SUBMAP(entry));
18195 }
18196 vm_map_copy_entry_dispose(copy_map, entry);
18197 }
18198 }
18199
18200 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
18201 *cur_protection = VM_PROT_READ;
18202 *max_protection = VM_PROT_READ;
18203 }
18204
18205 if (target_map->disable_vmentry_reuse == TRUE) {
18206 assert(!target_map->is_nested_map);
18207 if (target_map->highest_entry_end < insp_entry->vme_end) {
18208 target_map->highest_entry_end = insp_entry->vme_end;
18209 }
18210 }
18211
18212 if (result == KERN_SUCCESS) {
18213 target_map->size += target_size;
18214 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
18215
18216 }
18217 vm_map_unlock(target_map);
18218
18219 if (result == KERN_SUCCESS && target_map->wiring_required) {
18220 result = vm_map_wire_kernel(target_map, *address,
18221 *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
18222 TRUE);
18223 }
18224
18225 /*
18226 * If requested, return the address of the data pointed to by the
18227 * request, rather than the base of the resulting page.
18228 */
18229 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
18230 *address += offset_in_mapping;
18231 }
18232
18233 if (src_page_mask != target_page_mask) {
18234 DEBUG4K_SHARE("vm_remap(%p 0x%llx 0x%llx copy=%d-> %p 0x%llx 0x%llx result=0x%x\n", src_map, (uint64_t)memory_address, (uint64_t)size, copy, target_map, (uint64_t)*address, (uint64_t)offset_in_mapping, result);
18235 }
18236 vm_map_copy_discard(copy_map);
18237 copy_map = VM_MAP_COPY_NULL;
18238
18239 return result;
18240 }
18241
18242 /*
18243 * Routine: vm_map_remap_range_allocate
18244 *
18245 * Description:
18246 * Allocate a range in the specified virtual address map.
18247 * returns the address and the map entry just before the allocated
18248 * range
18249 *
18250 * Map must be locked.
18251 */
18252
18253 static kern_return_t
vm_map_remap_range_allocate(vm_map_t map,vm_map_address_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,__unused vm_tag_t tag,vm_map_entry_t * map_entry)18254 vm_map_remap_range_allocate(
18255 vm_map_t map,
18256 vm_map_address_t *address, /* IN/OUT */
18257 vm_map_size_t size,
18258 vm_map_offset_t mask,
18259 int flags,
18260 vm_map_kernel_flags_t vmk_flags,
18261 __unused vm_tag_t tag,
18262 vm_map_entry_t *map_entry) /* OUT */
18263 {
18264 vm_map_entry_t entry;
18265 vm_map_offset_t start;
18266 vm_map_offset_t end;
18267 vm_map_offset_t desired_empty_end;
18268 kern_return_t kr;
18269 vm_map_entry_t hole_entry;
18270
18271 StartAgain:;
18272
18273 start = *address;
18274
18275 if (flags & VM_FLAGS_ANYWHERE) {
18276 if (flags & VM_FLAGS_RANDOM_ADDR) {
18277 /*
18278 * Get a random start address.
18279 */
18280 kr = vm_map_random_address_for_size(map, address, size);
18281 if (kr != KERN_SUCCESS) {
18282 return kr;
18283 }
18284 start = *address;
18285 }
18286
18287 /*
18288 * Calculate the first possible address.
18289 */
18290
18291 if (start < map->min_offset) {
18292 start = map->min_offset;
18293 }
18294 if (start > map->max_offset) {
18295 return KERN_NO_SPACE;
18296 }
18297
18298 /*
18299 * Look for the first possible address;
18300 * if there's already something at this
18301 * address, we have to start after it.
18302 */
18303
18304 if (map->disable_vmentry_reuse == TRUE) {
18305 VM_MAP_HIGHEST_ENTRY(map, entry, start);
18306 } else {
18307 if (map->holelistenabled) {
18308 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
18309
18310 if (hole_entry == NULL) {
18311 /*
18312 * No more space in the map?
18313 */
18314 return KERN_NO_SPACE;
18315 } else {
18316 boolean_t found_hole = FALSE;
18317
18318 do {
18319 if (hole_entry->vme_start >= start) {
18320 start = hole_entry->vme_start;
18321 found_hole = TRUE;
18322 break;
18323 }
18324
18325 if (hole_entry->vme_end > start) {
18326 found_hole = TRUE;
18327 break;
18328 }
18329 hole_entry = hole_entry->vme_next;
18330 } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
18331
18332 if (found_hole == FALSE) {
18333 return KERN_NO_SPACE;
18334 }
18335
18336 entry = hole_entry;
18337 }
18338 } else {
18339 assert(first_free_is_valid(map));
18340 if (start == map->min_offset) {
18341 if ((entry = map->first_free) != vm_map_to_entry(map)) {
18342 start = entry->vme_end;
18343 }
18344 } else {
18345 vm_map_entry_t tmp_entry;
18346 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
18347 start = tmp_entry->vme_end;
18348 }
18349 entry = tmp_entry;
18350 }
18351 }
18352 start = vm_map_round_page(start,
18353 VM_MAP_PAGE_MASK(map));
18354 }
18355
18356 /*
18357 * In any case, the "entry" always precedes
18358 * the proposed new region throughout the
18359 * loop:
18360 */
18361
18362 while (TRUE) {
18363 vm_map_entry_t next;
18364
18365 /*
18366 * Find the end of the proposed new region.
18367 * Be sure we didn't go beyond the end, or
18368 * wrap around the address.
18369 */
18370
18371 end = ((start + mask) & ~mask);
18372 end = vm_map_round_page(end,
18373 VM_MAP_PAGE_MASK(map));
18374 if (end < start) {
18375 return KERN_NO_SPACE;
18376 }
18377 start = end;
18378 end += size;
18379
18380 /* We want an entire page of empty space, but don't increase the allocation size. */
18381 desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
18382
18383 if ((desired_empty_end > map->max_offset) || (desired_empty_end < start)) {
18384 if (map->wait_for_space) {
18385 if (size <= (map->max_offset -
18386 map->min_offset)) {
18387 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
18388 vm_map_unlock(map);
18389 thread_block(THREAD_CONTINUE_NULL);
18390 vm_map_lock(map);
18391 goto StartAgain;
18392 }
18393 }
18394
18395 return KERN_NO_SPACE;
18396 }
18397
18398 next = entry->vme_next;
18399
18400 if (map->holelistenabled) {
18401 if (entry->vme_end >= desired_empty_end) {
18402 break;
18403 }
18404 } else {
18405 /*
18406 * If there are no more entries, we must win.
18407 *
18408 * OR
18409 *
18410 * If there is another entry, it must be
18411 * after the end of the potential new region.
18412 */
18413
18414 if (next == vm_map_to_entry(map)) {
18415 break;
18416 }
18417
18418 if (next->vme_start >= desired_empty_end) {
18419 break;
18420 }
18421 }
18422
18423 /*
18424 * Didn't fit -- move to the next entry.
18425 */
18426
18427 entry = next;
18428
18429 if (map->holelistenabled) {
18430 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
18431 /*
18432 * Wrapped around
18433 */
18434 return KERN_NO_SPACE;
18435 }
18436 start = entry->vme_start;
18437 } else {
18438 start = entry->vme_end;
18439 }
18440 }
18441
18442 if (map->holelistenabled) {
18443 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
18444 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.", entry, (unsigned long long)entry->vme_start);
18445 }
18446 }
18447
18448 *address = start;
18449 } else {
18450 vm_map_entry_t temp_entry;
18451
18452 /*
18453 * Verify that:
18454 * the address doesn't itself violate
18455 * the mask requirement.
18456 */
18457
18458 if ((start & mask) != 0) {
18459 return KERN_NO_SPACE;
18460 }
18461
18462
18463 /*
18464 * ... the address is within bounds
18465 */
18466
18467 end = start + size;
18468
18469 if ((start < map->min_offset) ||
18470 (end > map->max_offset) ||
18471 (start >= end)) {
18472 return KERN_INVALID_ADDRESS;
18473 }
18474
18475 /*
18476 * If we're asked to overwrite whatever was mapped in that
18477 * range, first deallocate that range.
18478 */
18479 if (flags & VM_FLAGS_OVERWRITE) {
18480 vm_map_t zap_map;
18481 int remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES | VM_MAP_REMOVE_NO_MAP_ALIGN;
18482
18483 /*
18484 * We use a "zap_map" to avoid having to unlock
18485 * the "map" in vm_map_delete(), which would compromise
18486 * the atomicity of the "deallocate" and then "remap"
18487 * combination.
18488 */
18489 zap_map = vm_map_create(PMAP_NULL,
18490 start,
18491 end,
18492 map->hdr.entries_pageable);
18493 if (zap_map == VM_MAP_NULL) {
18494 return KERN_RESOURCE_SHORTAGE;
18495 }
18496 vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
18497 vm_map_disable_hole_optimization(zap_map);
18498
18499 if (vmk_flags.vmkf_overwrite_immutable) {
18500 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
18501 }
18502 kr = vm_map_delete(map, start, end,
18503 remove_flags,
18504 zap_map);
18505 if (kr == KERN_SUCCESS) {
18506 vm_map_destroy(zap_map,
18507 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
18508 zap_map = VM_MAP_NULL;
18509 }
18510 }
18511
18512 /*
18513 * ... the starting address isn't allocated
18514 */
18515
18516 if (vm_map_lookup_entry(map, start, &temp_entry)) {
18517 return KERN_NO_SPACE;
18518 }
18519
18520 entry = temp_entry;
18521
18522 /*
18523 * ... the next region doesn't overlap the
18524 * end point.
18525 */
18526
18527 if ((entry->vme_next != vm_map_to_entry(map)) &&
18528 (entry->vme_next->vme_start < end)) {
18529 return KERN_NO_SPACE;
18530 }
18531 }
18532 *map_entry = entry;
18533 return KERN_SUCCESS;
18534 }
18535
18536 /*
18537 * vm_map_switch:
18538 *
18539 * Set the address map for the current thread to the specified map
18540 */
18541
18542 vm_map_t
vm_map_switch(vm_map_t map)18543 vm_map_switch(
18544 vm_map_t map)
18545 {
18546 int mycpu;
18547 thread_t thread = current_thread();
18548 vm_map_t oldmap = thread->map;
18549
18550 mp_disable_preemption();
18551 mycpu = cpu_number();
18552
18553 /*
18554 * Deactivate the current map and activate the requested map
18555 */
18556 PMAP_SWITCH_USER(thread, map, mycpu);
18557
18558 mp_enable_preemption();
18559 return oldmap;
18560 }
18561
18562
18563 /*
18564 * Routine: vm_map_write_user
18565 *
18566 * Description:
18567 * Copy out data from a kernel space into space in the
18568 * destination map. The space must already exist in the
18569 * destination map.
18570 * NOTE: This routine should only be called by threads
18571 * which can block on a page fault. i.e. kernel mode user
18572 * threads.
18573 *
18574 */
18575 kern_return_t
vm_map_write_user(vm_map_t map,void * src_p,vm_map_address_t dst_addr,vm_size_t size)18576 vm_map_write_user(
18577 vm_map_t map,
18578 void *src_p,
18579 vm_map_address_t dst_addr,
18580 vm_size_t size)
18581 {
18582 kern_return_t kr = KERN_SUCCESS;
18583
18584 if (current_map() == map) {
18585 if (copyout(src_p, dst_addr, size)) {
18586 kr = KERN_INVALID_ADDRESS;
18587 }
18588 } else {
18589 vm_map_t oldmap;
18590
18591 /* take on the identity of the target map while doing */
18592 /* the transfer */
18593
18594 vm_map_reference(map);
18595 oldmap = vm_map_switch(map);
18596 if (copyout(src_p, dst_addr, size)) {
18597 kr = KERN_INVALID_ADDRESS;
18598 }
18599 vm_map_switch(oldmap);
18600 vm_map_deallocate(map);
18601 }
18602 return kr;
18603 }
18604
18605 /*
18606 * Routine: vm_map_read_user
18607 *
18608 * Description:
18609 * Copy in data from a user space source map into the
18610 * kernel map. The space must already exist in the
18611 * kernel map.
18612 * NOTE: This routine should only be called by threads
18613 * which can block on a page fault. i.e. kernel mode user
18614 * threads.
18615 *
18616 */
18617 kern_return_t
vm_map_read_user(vm_map_t map,vm_map_address_t src_addr,void * dst_p,vm_size_t size)18618 vm_map_read_user(
18619 vm_map_t map,
18620 vm_map_address_t src_addr,
18621 void *dst_p,
18622 vm_size_t size)
18623 {
18624 kern_return_t kr = KERN_SUCCESS;
18625
18626 if (current_map() == map) {
18627 if (copyin(src_addr, dst_p, size)) {
18628 kr = KERN_INVALID_ADDRESS;
18629 }
18630 } else {
18631 vm_map_t oldmap;
18632
18633 /* take on the identity of the target map while doing */
18634 /* the transfer */
18635
18636 vm_map_reference(map);
18637 oldmap = vm_map_switch(map);
18638 if (copyin(src_addr, dst_p, size)) {
18639 kr = KERN_INVALID_ADDRESS;
18640 }
18641 vm_map_switch(oldmap);
18642 vm_map_deallocate(map);
18643 }
18644 return kr;
18645 }
18646
18647
18648 /*
18649 * vm_map_check_protection:
18650 *
18651 * Assert that the target map allows the specified
18652 * privilege on the entire address region given.
18653 * The entire region must be allocated.
18654 */
18655 boolean_t
vm_map_check_protection(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t protection)18656 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
18657 vm_map_offset_t end, vm_prot_t protection)
18658 {
18659 vm_map_entry_t entry;
18660 vm_map_entry_t tmp_entry;
18661
18662 vm_map_lock(map);
18663
18664 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
18665 vm_map_unlock(map);
18666 return FALSE;
18667 }
18668
18669 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
18670 vm_map_unlock(map);
18671 return FALSE;
18672 }
18673
18674 entry = tmp_entry;
18675
18676 while (start < end) {
18677 if (entry == vm_map_to_entry(map)) {
18678 vm_map_unlock(map);
18679 return FALSE;
18680 }
18681
18682 /*
18683 * No holes allowed!
18684 */
18685
18686 if (start < entry->vme_start) {
18687 vm_map_unlock(map);
18688 return FALSE;
18689 }
18690
18691 /*
18692 * Check protection associated with entry.
18693 */
18694
18695 if ((entry->protection & protection) != protection) {
18696 vm_map_unlock(map);
18697 return FALSE;
18698 }
18699
18700 /* go to next entry */
18701
18702 start = entry->vme_end;
18703 entry = entry->vme_next;
18704 }
18705 vm_map_unlock(map);
18706 return TRUE;
18707 }
18708
18709 kern_return_t
vm_map_purgable_control(vm_map_t map,vm_map_offset_t address,vm_purgable_t control,int * state)18710 vm_map_purgable_control(
18711 vm_map_t map,
18712 vm_map_offset_t address,
18713 vm_purgable_t control,
18714 int *state)
18715 {
18716 vm_map_entry_t entry;
18717 vm_object_t object;
18718 kern_return_t kr;
18719 boolean_t was_nonvolatile;
18720
18721 /*
18722 * Vet all the input parameters and current type and state of the
18723 * underlaying object. Return with an error if anything is amiss.
18724 */
18725 if (map == VM_MAP_NULL) {
18726 return KERN_INVALID_ARGUMENT;
18727 }
18728
18729 if (control != VM_PURGABLE_SET_STATE &&
18730 control != VM_PURGABLE_GET_STATE &&
18731 control != VM_PURGABLE_PURGE_ALL &&
18732 control != VM_PURGABLE_SET_STATE_FROM_KERNEL) {
18733 return KERN_INVALID_ARGUMENT;
18734 }
18735
18736 if (control == VM_PURGABLE_PURGE_ALL) {
18737 vm_purgeable_object_purge_all();
18738 return KERN_SUCCESS;
18739 }
18740
18741 if ((control == VM_PURGABLE_SET_STATE ||
18742 control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
18743 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
18744 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) {
18745 return KERN_INVALID_ARGUMENT;
18746 }
18747
18748 vm_map_lock_read(map);
18749
18750 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
18751 /*
18752 * Must pass a valid non-submap address.
18753 */
18754 vm_map_unlock_read(map);
18755 return KERN_INVALID_ADDRESS;
18756 }
18757
18758 if ((entry->protection & VM_PROT_WRITE) == 0 &&
18759 control != VM_PURGABLE_GET_STATE) {
18760 /*
18761 * Can't apply purgable controls to something you can't write.
18762 */
18763 vm_map_unlock_read(map);
18764 return KERN_PROTECTION_FAILURE;
18765 }
18766
18767 object = VME_OBJECT(entry);
18768 if (object == VM_OBJECT_NULL ||
18769 object->purgable == VM_PURGABLE_DENY) {
18770 /*
18771 * Object must already be present and be purgeable.
18772 */
18773 vm_map_unlock_read(map);
18774 return KERN_INVALID_ARGUMENT;
18775 }
18776
18777 vm_object_lock(object);
18778
18779 #if 00
18780 if (VME_OFFSET(entry) != 0 ||
18781 entry->vme_end - entry->vme_start != object->vo_size) {
18782 /*
18783 * Can only apply purgable controls to the whole (existing)
18784 * object at once.
18785 */
18786 vm_map_unlock_read(map);
18787 vm_object_unlock(object);
18788 return KERN_INVALID_ARGUMENT;
18789 }
18790 #endif
18791
18792 assert(!entry->is_sub_map);
18793 assert(!entry->use_pmap); /* purgeable has its own accounting */
18794
18795 vm_map_unlock_read(map);
18796
18797 was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
18798
18799 kr = vm_object_purgable_control(object, control, state);
18800
18801 if (was_nonvolatile &&
18802 object->purgable != VM_PURGABLE_NONVOLATILE &&
18803 map->pmap == kernel_pmap) {
18804 #if DEBUG
18805 object->vo_purgeable_volatilizer = kernel_task;
18806 #endif /* DEBUG */
18807 }
18808
18809 vm_object_unlock(object);
18810
18811 return kr;
18812 }
18813
18814 void
vm_map_footprint_query_page_info(vm_map_t map,vm_map_entry_t map_entry,vm_map_offset_t curr_s_offset,int * disposition_p)18815 vm_map_footprint_query_page_info(
18816 vm_map_t map,
18817 vm_map_entry_t map_entry,
18818 vm_map_offset_t curr_s_offset,
18819 int *disposition_p)
18820 {
18821 int pmap_disp;
18822 vm_object_t object;
18823 int disposition;
18824 int effective_page_size;
18825
18826 vm_map_lock_assert_held(map);
18827 assert(!map->has_corpse_footprint);
18828 assert(curr_s_offset >= map_entry->vme_start);
18829 assert(curr_s_offset < map_entry->vme_end);
18830
18831 object = VME_OBJECT(map_entry);
18832 if (object == VM_OBJECT_NULL) {
18833 *disposition_p = 0;
18834 return;
18835 }
18836
18837 effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(map));
18838
18839 pmap_disp = 0;
18840 if (object == VM_OBJECT_NULL) {
18841 /* nothing mapped here: no need to ask */
18842 *disposition_p = 0;
18843 return;
18844 } else if (map_entry->is_sub_map &&
18845 !map_entry->use_pmap) {
18846 /* nested pmap: no footprint */
18847 *disposition_p = 0;
18848 return;
18849 }
18850
18851 /*
18852 * Query the pmap.
18853 */
18854 pmap_query_page_info(map->pmap, curr_s_offset, &pmap_disp);
18855
18856 /*
18857 * Compute this page's disposition.
18858 */
18859 disposition = 0;
18860
18861 /* deal with "alternate accounting" first */
18862 if (!map_entry->is_sub_map &&
18863 object->vo_no_footprint) {
18864 /* does not count in footprint */
18865 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18866 } else if (!map_entry->is_sub_map &&
18867 (object->purgable == VM_PURGABLE_NONVOLATILE ||
18868 (object->purgable == VM_PURGABLE_DENY &&
18869 object->vo_ledger_tag)) &&
18870 VM_OBJECT_OWNER(object) != NULL &&
18871 VM_OBJECT_OWNER(object)->map == map) {
18872 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18873 if ((((curr_s_offset
18874 - map_entry->vme_start
18875 + VME_OFFSET(map_entry))
18876 / effective_page_size) <
18877 (object->resident_page_count +
18878 vm_compressor_pager_get_count(object->pager)))) {
18879 /*
18880 * Non-volatile purgeable object owned
18881 * by this task: report the first
18882 * "#resident + #compressed" pages as
18883 * "resident" (to show that they
18884 * contribute to the footprint) but not
18885 * "dirty" (to avoid double-counting
18886 * with the fake "non-volatile" region
18887 * we'll report at the end of the
18888 * address space to account for all
18889 * (mapped or not) non-volatile memory
18890 * owned by this task.
18891 */
18892 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18893 }
18894 } else if (!map_entry->is_sub_map &&
18895 (object->purgable == VM_PURGABLE_VOLATILE ||
18896 object->purgable == VM_PURGABLE_EMPTY) &&
18897 VM_OBJECT_OWNER(object) != NULL &&
18898 VM_OBJECT_OWNER(object)->map == map) {
18899 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18900 if ((((curr_s_offset
18901 - map_entry->vme_start
18902 + VME_OFFSET(map_entry))
18903 / effective_page_size) <
18904 object->wired_page_count)) {
18905 /*
18906 * Volatile|empty purgeable object owned
18907 * by this task: report the first
18908 * "#wired" pages as "resident" (to
18909 * show that they contribute to the
18910 * footprint) but not "dirty" (to avoid
18911 * double-counting with the fake
18912 * "non-volatile" region we'll report
18913 * at the end of the address space to
18914 * account for all (mapped or not)
18915 * non-volatile memory owned by this
18916 * task.
18917 */
18918 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18919 }
18920 } else if (!map_entry->is_sub_map &&
18921 map_entry->iokit_acct &&
18922 object->internal &&
18923 object->purgable == VM_PURGABLE_DENY) {
18924 /*
18925 * Non-purgeable IOKit memory: phys_footprint
18926 * includes the entire virtual mapping.
18927 */
18928 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18929 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18930 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
18931 } else if (pmap_disp & (PMAP_QUERY_PAGE_ALTACCT |
18932 PMAP_QUERY_PAGE_COMPRESSED_ALTACCT)) {
18933 /* alternate accounting */
18934 #if (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG)
18935 if (map->pmap->footprint_was_suspended) {
18936 /*
18937 * The assertion below can fail if dyld
18938 * suspended footprint accounting
18939 * while doing some adjustments to
18940 * this page; the mapping would say
18941 * "use pmap accounting" but the page
18942 * would be marked "alternate
18943 * accounting".
18944 */
18945 } else
18946 #endif /* (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG) */
18947 {
18948 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18949 }
18950 disposition = 0;
18951 } else {
18952 if (pmap_disp & PMAP_QUERY_PAGE_PRESENT) {
18953 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18954 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18955 disposition |= VM_PAGE_QUERY_PAGE_REF;
18956 if (pmap_disp & PMAP_QUERY_PAGE_INTERNAL) {
18957 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
18958 } else {
18959 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
18960 }
18961 if (pmap_disp & PMAP_QUERY_PAGE_REUSABLE) {
18962 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
18963 }
18964 } else if (pmap_disp & PMAP_QUERY_PAGE_COMPRESSED) {
18965 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18966 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
18967 }
18968 }
18969
18970 *disposition_p = disposition;
18971 }
18972
18973 kern_return_t
vm_map_page_query_internal(vm_map_t target_map,vm_map_offset_t offset,int * disposition,int * ref_count)18974 vm_map_page_query_internal(
18975 vm_map_t target_map,
18976 vm_map_offset_t offset,
18977 int *disposition,
18978 int *ref_count)
18979 {
18980 kern_return_t kr;
18981 vm_page_info_basic_data_t info;
18982 mach_msg_type_number_t count;
18983
18984 count = VM_PAGE_INFO_BASIC_COUNT;
18985 kr = vm_map_page_info(target_map,
18986 offset,
18987 VM_PAGE_INFO_BASIC,
18988 (vm_page_info_t) &info,
18989 &count);
18990 if (kr == KERN_SUCCESS) {
18991 *disposition = info.disposition;
18992 *ref_count = info.ref_count;
18993 } else {
18994 *disposition = 0;
18995 *ref_count = 0;
18996 }
18997
18998 return kr;
18999 }
19000
19001 kern_return_t
vm_map_page_info(vm_map_t map,vm_map_offset_t offset,vm_page_info_flavor_t flavor,vm_page_info_t info,mach_msg_type_number_t * count)19002 vm_map_page_info(
19003 vm_map_t map,
19004 vm_map_offset_t offset,
19005 vm_page_info_flavor_t flavor,
19006 vm_page_info_t info,
19007 mach_msg_type_number_t *count)
19008 {
19009 return vm_map_page_range_info_internal(map,
19010 offset, /* start of range */
19011 (offset + 1), /* this will get rounded in the call to the page boundary */
19012 (int)-1, /* effective_page_shift: unspecified */
19013 flavor,
19014 info,
19015 count);
19016 }
19017
19018 kern_return_t
vm_map_page_range_info_internal(vm_map_t map,vm_map_offset_t start_offset,vm_map_offset_t end_offset,int effective_page_shift,vm_page_info_flavor_t flavor,vm_page_info_t info,mach_msg_type_number_t * count)19019 vm_map_page_range_info_internal(
19020 vm_map_t map,
19021 vm_map_offset_t start_offset,
19022 vm_map_offset_t end_offset,
19023 int effective_page_shift,
19024 vm_page_info_flavor_t flavor,
19025 vm_page_info_t info,
19026 mach_msg_type_number_t *count)
19027 {
19028 vm_map_entry_t map_entry = VM_MAP_ENTRY_NULL;
19029 vm_object_t object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
19030 vm_page_t m = VM_PAGE_NULL;
19031 kern_return_t retval = KERN_SUCCESS;
19032 int disposition = 0;
19033 int ref_count = 0;
19034 int depth = 0, info_idx = 0;
19035 vm_page_info_basic_t basic_info = 0;
19036 vm_map_offset_t offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
19037 vm_map_offset_t start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
19038 boolean_t do_region_footprint;
19039 ledger_amount_t ledger_resident, ledger_compressed;
19040 int effective_page_size;
19041 vm_map_offset_t effective_page_mask;
19042
19043 switch (flavor) {
19044 case VM_PAGE_INFO_BASIC:
19045 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
19046 /*
19047 * The "vm_page_info_basic_data" structure was not
19048 * properly padded, so allow the size to be off by
19049 * one to maintain backwards binary compatibility...
19050 */
19051 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1) {
19052 return KERN_INVALID_ARGUMENT;
19053 }
19054 }
19055 break;
19056 default:
19057 return KERN_INVALID_ARGUMENT;
19058 }
19059
19060 if (effective_page_shift == -1) {
19061 effective_page_shift = vm_self_region_page_shift_safely(map);
19062 if (effective_page_shift == -1) {
19063 return KERN_INVALID_ARGUMENT;
19064 }
19065 }
19066 effective_page_size = (1 << effective_page_shift);
19067 effective_page_mask = effective_page_size - 1;
19068
19069 do_region_footprint = task_self_region_footprint();
19070 disposition = 0;
19071 ref_count = 0;
19072 depth = 0;
19073 info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
19074 retval = KERN_SUCCESS;
19075
19076 offset_in_page = start_offset & effective_page_mask;
19077 start = vm_map_trunc_page(start_offset, effective_page_mask);
19078 end = vm_map_round_page(end_offset, effective_page_mask);
19079
19080 if (end < start) {
19081 return KERN_INVALID_ARGUMENT;
19082 }
19083
19084 assert((end - start) <= MAX_PAGE_RANGE_QUERY);
19085
19086 vm_map_lock_read(map);
19087
19088 task_ledgers_footprint(map->pmap->ledger, &ledger_resident, &ledger_compressed);
19089
19090 for (curr_s_offset = start; curr_s_offset < end;) {
19091 /*
19092 * New lookup needs reset of these variables.
19093 */
19094 curr_object = object = VM_OBJECT_NULL;
19095 offset_in_object = 0;
19096 ref_count = 0;
19097 depth = 0;
19098
19099 if (do_region_footprint &&
19100 curr_s_offset >= vm_map_last_entry(map)->vme_end) {
19101 /*
19102 * Request for "footprint" info about a page beyond
19103 * the end of address space: this must be for
19104 * the fake region vm_map_region_recurse_64()
19105 * reported to account for non-volatile purgeable
19106 * memory owned by this task.
19107 */
19108 disposition = 0;
19109
19110 if (curr_s_offset - vm_map_last_entry(map)->vme_end <=
19111 (unsigned) ledger_compressed) {
19112 /*
19113 * We haven't reported all the "non-volatile
19114 * compressed" pages yet, so report this fake
19115 * page as "compressed".
19116 */
19117 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
19118 } else {
19119 /*
19120 * We've reported all the non-volatile
19121 * compressed page but not all the non-volatile
19122 * pages , so report this fake page as
19123 * "resident dirty".
19124 */
19125 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19126 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
19127 disposition |= VM_PAGE_QUERY_PAGE_REF;
19128 }
19129 switch (flavor) {
19130 case VM_PAGE_INFO_BASIC:
19131 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19132 basic_info->disposition = disposition;
19133 basic_info->ref_count = 1;
19134 basic_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
19135 basic_info->offset = 0;
19136 basic_info->depth = 0;
19137
19138 info_idx++;
19139 break;
19140 }
19141 curr_s_offset += effective_page_size;
19142 continue;
19143 }
19144
19145 /*
19146 * First, find the map entry covering "curr_s_offset", going down
19147 * submaps if necessary.
19148 */
19149 if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
19150 /* no entry -> no object -> no page */
19151
19152 if (curr_s_offset < vm_map_min(map)) {
19153 /*
19154 * Illegal address that falls below map min.
19155 */
19156 curr_e_offset = MIN(end, vm_map_min(map));
19157 } else if (curr_s_offset >= vm_map_max(map)) {
19158 /*
19159 * Illegal address that falls on/after map max.
19160 */
19161 curr_e_offset = end;
19162 } else if (map_entry == vm_map_to_entry(map)) {
19163 /*
19164 * Hit a hole.
19165 */
19166 if (map_entry->vme_next == vm_map_to_entry(map)) {
19167 /*
19168 * Empty map.
19169 */
19170 curr_e_offset = MIN(map->max_offset, end);
19171 } else {
19172 /*
19173 * Hole at start of the map.
19174 */
19175 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
19176 }
19177 } else {
19178 if (map_entry->vme_next == vm_map_to_entry(map)) {
19179 /*
19180 * Hole at the end of the map.
19181 */
19182 curr_e_offset = MIN(map->max_offset, end);
19183 } else {
19184 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
19185 }
19186 }
19187
19188 assert(curr_e_offset >= curr_s_offset);
19189
19190 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> effective_page_shift;
19191
19192 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19193
19194 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
19195
19196 curr_s_offset = curr_e_offset;
19197
19198 info_idx += num_pages;
19199
19200 continue;
19201 }
19202
19203 /* compute offset from this map entry's start */
19204 offset_in_object = curr_s_offset - map_entry->vme_start;
19205
19206 /* compute offset into this map entry's object (or submap) */
19207 offset_in_object += VME_OFFSET(map_entry);
19208
19209 if (map_entry->is_sub_map) {
19210 vm_map_t sub_map = VM_MAP_NULL;
19211 vm_page_info_t submap_info = 0;
19212 vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
19213
19214 range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
19215
19216 submap_s_offset = offset_in_object;
19217 submap_e_offset = submap_s_offset + range_len;
19218
19219 sub_map = VME_SUBMAP(map_entry);
19220
19221 vm_map_reference(sub_map);
19222 vm_map_unlock_read(map);
19223
19224 submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19225
19226 assertf(VM_MAP_PAGE_SHIFT(sub_map) >= VM_MAP_PAGE_SHIFT(map),
19227 "Submap page size (%d) differs from current map (%d)\n", VM_MAP_PAGE_SIZE(sub_map), VM_MAP_PAGE_SIZE(map));
19228
19229 retval = vm_map_page_range_info_internal(sub_map,
19230 submap_s_offset,
19231 submap_e_offset,
19232 effective_page_shift,
19233 VM_PAGE_INFO_BASIC,
19234 (vm_page_info_t) submap_info,
19235 count);
19236
19237 assert(retval == KERN_SUCCESS);
19238
19239 vm_map_lock_read(map);
19240 vm_map_deallocate(sub_map);
19241
19242 /* Move the "info" index by the number of pages we inspected.*/
19243 info_idx += range_len >> effective_page_shift;
19244
19245 /* Move our current offset by the size of the range we inspected.*/
19246 curr_s_offset += range_len;
19247
19248 continue;
19249 }
19250
19251 object = VME_OBJECT(map_entry);
19252
19253 if (object == VM_OBJECT_NULL) {
19254 /*
19255 * We don't have an object here and, hence,
19256 * no pages to inspect. We'll fill up the
19257 * info structure appropriately.
19258 */
19259
19260 curr_e_offset = MIN(map_entry->vme_end, end);
19261
19262 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> effective_page_shift;
19263
19264 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19265
19266 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
19267
19268 curr_s_offset = curr_e_offset;
19269
19270 info_idx += num_pages;
19271
19272 continue;
19273 }
19274
19275 if (do_region_footprint) {
19276 disposition = 0;
19277 if (map->has_corpse_footprint) {
19278 /*
19279 * Query the page info data we saved
19280 * while forking the corpse.
19281 */
19282 vm_map_corpse_footprint_query_page_info(
19283 map,
19284 curr_s_offset,
19285 &disposition);
19286 } else {
19287 /*
19288 * Query the live pmap for footprint info
19289 * about this page.
19290 */
19291 vm_map_footprint_query_page_info(
19292 map,
19293 map_entry,
19294 curr_s_offset,
19295 &disposition);
19296 }
19297 switch (flavor) {
19298 case VM_PAGE_INFO_BASIC:
19299 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19300 basic_info->disposition = disposition;
19301 basic_info->ref_count = 1;
19302 basic_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
19303 basic_info->offset = 0;
19304 basic_info->depth = 0;
19305
19306 info_idx++;
19307 break;
19308 }
19309 curr_s_offset += effective_page_size;
19310 continue;
19311 }
19312
19313 vm_object_reference(object);
19314 /*
19315 * Shared mode -- so we can allow other readers
19316 * to grab the lock too.
19317 */
19318 vm_object_lock_shared(object);
19319
19320 curr_e_offset = MIN(map_entry->vme_end, end);
19321
19322 vm_map_unlock_read(map);
19323
19324 map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
19325
19326 curr_object = object;
19327
19328 for (; curr_s_offset < curr_e_offset;) {
19329 if (object == curr_object) {
19330 ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
19331 } else {
19332 ref_count = curr_object->ref_count;
19333 }
19334
19335 curr_offset_in_object = offset_in_object;
19336
19337 for (;;) {
19338 m = vm_page_lookup(curr_object, vm_object_trunc_page(curr_offset_in_object));
19339
19340 if (m != VM_PAGE_NULL) {
19341 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19342 break;
19343 } else {
19344 if (curr_object->internal &&
19345 curr_object->alive &&
19346 !curr_object->terminating &&
19347 curr_object->pager_ready) {
19348 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, vm_object_trunc_page(curr_offset_in_object))
19349 == VM_EXTERNAL_STATE_EXISTS) {
19350 /* the pager has that page */
19351 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
19352 break;
19353 }
19354 }
19355
19356 /*
19357 * Go down the VM object shadow chain until we find the page
19358 * we're looking for.
19359 */
19360
19361 if (curr_object->shadow != VM_OBJECT_NULL) {
19362 vm_object_t shadow = VM_OBJECT_NULL;
19363
19364 curr_offset_in_object += curr_object->vo_shadow_offset;
19365 shadow = curr_object->shadow;
19366
19367 vm_object_lock_shared(shadow);
19368 vm_object_unlock(curr_object);
19369
19370 curr_object = shadow;
19371 depth++;
19372 continue;
19373 } else {
19374 break;
19375 }
19376 }
19377 }
19378
19379 /* The ref_count is not strictly accurate, it measures the number */
19380 /* of entities holding a ref on the object, they may not be mapping */
19381 /* the object or may not be mapping the section holding the */
19382 /* target page but its still a ball park number and though an over- */
19383 /* count, it picks up the copy-on-write cases */
19384
19385 /* We could also get a picture of page sharing from pmap_attributes */
19386 /* but this would under count as only faulted-in mappings would */
19387 /* show up. */
19388
19389 if ((curr_object == object) && curr_object->shadow) {
19390 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
19391 }
19392
19393 if (!curr_object->internal) {
19394 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
19395 }
19396
19397 if (m != VM_PAGE_NULL) {
19398 if (m->vmp_fictitious) {
19399 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
19400 } else {
19401 if (m->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m))) {
19402 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
19403 }
19404
19405 if (m->vmp_reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m))) {
19406 disposition |= VM_PAGE_QUERY_PAGE_REF;
19407 }
19408
19409 if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
19410 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
19411 }
19412
19413 /*
19414 * XXX TODO4K:
19415 * when this routine deals with 4k
19416 * pages, check the appropriate CS bit
19417 * here.
19418 */
19419 if (m->vmp_cs_validated) {
19420 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
19421 }
19422 if (m->vmp_cs_tainted) {
19423 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
19424 }
19425 if (m->vmp_cs_nx) {
19426 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
19427 }
19428 if (m->vmp_reusable || curr_object->all_reusable) {
19429 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
19430 }
19431 }
19432 }
19433
19434 switch (flavor) {
19435 case VM_PAGE_INFO_BASIC:
19436 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19437 basic_info->disposition = disposition;
19438 basic_info->ref_count = ref_count;
19439 basic_info->object_id = (vm_object_id_t) (uintptr_t)
19440 VM_KERNEL_ADDRPERM(curr_object);
19441 basic_info->offset =
19442 (memory_object_offset_t) curr_offset_in_object + offset_in_page;
19443 basic_info->depth = depth;
19444
19445 info_idx++;
19446 break;
19447 }
19448
19449 disposition = 0;
19450 offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
19451
19452 /*
19453 * Move to next offset in the range and in our object.
19454 */
19455 curr_s_offset += effective_page_size;
19456 offset_in_object += effective_page_size;
19457 curr_offset_in_object = offset_in_object;
19458
19459 if (curr_object != object) {
19460 vm_object_unlock(curr_object);
19461
19462 curr_object = object;
19463
19464 vm_object_lock_shared(curr_object);
19465 } else {
19466 vm_object_lock_yield_shared(curr_object);
19467 }
19468 }
19469
19470 vm_object_unlock(curr_object);
19471 vm_object_deallocate(curr_object);
19472
19473 vm_map_lock_read(map);
19474 }
19475
19476 vm_map_unlock_read(map);
19477 return retval;
19478 }
19479
19480 /*
19481 * vm_map_msync
19482 *
19483 * Synchronises the memory range specified with its backing store
19484 * image by either flushing or cleaning the contents to the appropriate
19485 * memory manager engaging in a memory object synchronize dialog with
19486 * the manager. The client doesn't return until the manager issues
19487 * m_o_s_completed message. MIG Magically converts user task parameter
19488 * to the task's address map.
19489 *
19490 * interpretation of sync_flags
19491 * VM_SYNC_INVALIDATE - discard pages, only return precious
19492 * pages to manager.
19493 *
19494 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
19495 * - discard pages, write dirty or precious
19496 * pages back to memory manager.
19497 *
19498 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
19499 * - write dirty or precious pages back to
19500 * the memory manager.
19501 *
19502 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
19503 * is a hole in the region, and we would
19504 * have returned KERN_SUCCESS, return
19505 * KERN_INVALID_ADDRESS instead.
19506 *
19507 * NOTE
19508 * The memory object attributes have not yet been implemented, this
19509 * function will have to deal with the invalidate attribute
19510 *
19511 * RETURNS
19512 * KERN_INVALID_TASK Bad task parameter
19513 * KERN_INVALID_ARGUMENT both sync and async were specified.
19514 * KERN_SUCCESS The usual.
19515 * KERN_INVALID_ADDRESS There was a hole in the region.
19516 */
19517
19518 kern_return_t
vm_map_msync(vm_map_t map,vm_map_address_t address,vm_map_size_t size,vm_sync_t sync_flags)19519 vm_map_msync(
19520 vm_map_t map,
19521 vm_map_address_t address,
19522 vm_map_size_t size,
19523 vm_sync_t sync_flags)
19524 {
19525 vm_map_entry_t entry;
19526 vm_map_size_t amount_left;
19527 vm_object_offset_t offset;
19528 vm_object_offset_t start_offset, end_offset;
19529 boolean_t do_sync_req;
19530 boolean_t had_hole = FALSE;
19531 vm_map_offset_t pmap_offset;
19532
19533 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
19534 (sync_flags & VM_SYNC_SYNCHRONOUS)) {
19535 return KERN_INVALID_ARGUMENT;
19536 }
19537
19538 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19539 DEBUG4K_SHARE("map %p address 0x%llx size 0x%llx flags 0x%x\n", map, (uint64_t)address, (uint64_t)size, sync_flags);
19540 }
19541
19542 /*
19543 * align address and size on page boundaries
19544 */
19545 size = (vm_map_round_page(address + size,
19546 VM_MAP_PAGE_MASK(map)) -
19547 vm_map_trunc_page(address,
19548 VM_MAP_PAGE_MASK(map)));
19549 address = vm_map_trunc_page(address,
19550 VM_MAP_PAGE_MASK(map));
19551
19552 if (map == VM_MAP_NULL) {
19553 return KERN_INVALID_TASK;
19554 }
19555
19556 if (size == 0) {
19557 return KERN_SUCCESS;
19558 }
19559
19560 amount_left = size;
19561
19562 while (amount_left > 0) {
19563 vm_object_size_t flush_size;
19564 vm_object_t object;
19565
19566 vm_map_lock(map);
19567 if (!vm_map_lookup_entry(map,
19568 address,
19569 &entry)) {
19570 vm_map_size_t skip;
19571
19572 /*
19573 * hole in the address map.
19574 */
19575 had_hole = TRUE;
19576
19577 if (sync_flags & VM_SYNC_KILLPAGES) {
19578 /*
19579 * For VM_SYNC_KILLPAGES, there should be
19580 * no holes in the range, since we couldn't
19581 * prevent someone else from allocating in
19582 * that hole and we wouldn't want to "kill"
19583 * their pages.
19584 */
19585 vm_map_unlock(map);
19586 break;
19587 }
19588
19589 /*
19590 * Check for empty map.
19591 */
19592 if (entry == vm_map_to_entry(map) &&
19593 entry->vme_next == entry) {
19594 vm_map_unlock(map);
19595 break;
19596 }
19597 /*
19598 * Check that we don't wrap and that
19599 * we have at least one real map entry.
19600 */
19601 if ((map->hdr.nentries == 0) ||
19602 (entry->vme_next->vme_start < address)) {
19603 vm_map_unlock(map);
19604 break;
19605 }
19606 /*
19607 * Move up to the next entry if needed
19608 */
19609 skip = (entry->vme_next->vme_start - address);
19610 if (skip >= amount_left) {
19611 amount_left = 0;
19612 } else {
19613 amount_left -= skip;
19614 }
19615 address = entry->vme_next->vme_start;
19616 vm_map_unlock(map);
19617 continue;
19618 }
19619
19620 offset = address - entry->vme_start;
19621 pmap_offset = address;
19622
19623 /*
19624 * do we have more to flush than is contained in this
19625 * entry ?
19626 */
19627 if (amount_left + entry->vme_start + offset > entry->vme_end) {
19628 flush_size = entry->vme_end -
19629 (entry->vme_start + offset);
19630 } else {
19631 flush_size = amount_left;
19632 }
19633 amount_left -= flush_size;
19634 address += flush_size;
19635
19636 if (entry->is_sub_map == TRUE) {
19637 vm_map_t local_map;
19638 vm_map_offset_t local_offset;
19639
19640 local_map = VME_SUBMAP(entry);
19641 local_offset = VME_OFFSET(entry);
19642 vm_map_reference(local_map);
19643 vm_map_unlock(map);
19644 if (vm_map_msync(
19645 local_map,
19646 local_offset,
19647 flush_size,
19648 sync_flags) == KERN_INVALID_ADDRESS) {
19649 had_hole = TRUE;
19650 }
19651 vm_map_deallocate(local_map);
19652 continue;
19653 }
19654 object = VME_OBJECT(entry);
19655
19656 /*
19657 * We can't sync this object if the object has not been
19658 * created yet
19659 */
19660 if (object == VM_OBJECT_NULL) {
19661 vm_map_unlock(map);
19662 continue;
19663 }
19664 offset += VME_OFFSET(entry);
19665
19666 vm_object_lock(object);
19667
19668 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
19669 int kill_pages = 0;
19670 boolean_t reusable_pages = FALSE;
19671
19672 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19673 /*
19674 * This is a destructive operation and so we
19675 * err on the side of limiting the range of
19676 * the operation.
19677 */
19678 start_offset = vm_object_round_page(offset);
19679 end_offset = vm_object_trunc_page(offset + flush_size);
19680
19681 if (end_offset <= start_offset) {
19682 vm_object_unlock(object);
19683 vm_map_unlock(map);
19684 continue;
19685 }
19686
19687 pmap_offset += start_offset - offset;
19688 } else {
19689 start_offset = offset;
19690 end_offset = offset + flush_size;
19691 }
19692
19693 if (sync_flags & VM_SYNC_KILLPAGES) {
19694 if (((object->ref_count == 1) ||
19695 ((object->copy_strategy !=
19696 MEMORY_OBJECT_COPY_SYMMETRIC) &&
19697 (object->copy == VM_OBJECT_NULL))) &&
19698 (object->shadow == VM_OBJECT_NULL)) {
19699 if (object->ref_count != 1) {
19700 vm_page_stats_reusable.free_shared++;
19701 }
19702 kill_pages = 1;
19703 } else {
19704 kill_pages = -1;
19705 }
19706 }
19707 if (kill_pages != -1) {
19708 vm_object_deactivate_pages(
19709 object,
19710 start_offset,
19711 (vm_object_size_t) (end_offset - start_offset),
19712 kill_pages,
19713 reusable_pages,
19714 map->pmap,
19715 pmap_offset);
19716 }
19717 vm_object_unlock(object);
19718 vm_map_unlock(map);
19719 continue;
19720 }
19721 /*
19722 * We can't sync this object if there isn't a pager.
19723 * Don't bother to sync internal objects, since there can't
19724 * be any "permanent" storage for these objects anyway.
19725 */
19726 if ((object->pager == MEMORY_OBJECT_NULL) ||
19727 (object->internal) || (object->private)) {
19728 vm_object_unlock(object);
19729 vm_map_unlock(map);
19730 continue;
19731 }
19732 /*
19733 * keep reference on the object until syncing is done
19734 */
19735 vm_object_reference_locked(object);
19736 vm_object_unlock(object);
19737
19738 vm_map_unlock(map);
19739
19740 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19741 start_offset = vm_object_trunc_page(offset);
19742 end_offset = vm_object_round_page(offset + flush_size);
19743 } else {
19744 start_offset = offset;
19745 end_offset = offset + flush_size;
19746 }
19747
19748 do_sync_req = vm_object_sync(object,
19749 start_offset,
19750 (end_offset - start_offset),
19751 sync_flags & VM_SYNC_INVALIDATE,
19752 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
19753 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
19754 sync_flags & VM_SYNC_SYNCHRONOUS);
19755
19756 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
19757 /*
19758 * clear out the clustering and read-ahead hints
19759 */
19760 vm_object_lock(object);
19761
19762 object->pages_created = 0;
19763 object->pages_used = 0;
19764 object->sequential = 0;
19765 object->last_alloc = 0;
19766
19767 vm_object_unlock(object);
19768 }
19769 vm_object_deallocate(object);
19770 } /* while */
19771
19772 /* for proper msync() behaviour */
19773 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS)) {
19774 return KERN_INVALID_ADDRESS;
19775 }
19776
19777 return KERN_SUCCESS;
19778 }/* vm_msync */
19779
19780 kern_return_t
vm_named_entry_from_vm_object(vm_named_entry_t named_entry,vm_object_t object,vm_object_offset_t offset,vm_object_size_t size,vm_prot_t prot)19781 vm_named_entry_from_vm_object(
19782 vm_named_entry_t named_entry,
19783 vm_object_t object,
19784 vm_object_offset_t offset,
19785 vm_object_size_t size,
19786 vm_prot_t prot)
19787 {
19788 vm_map_copy_t copy;
19789 vm_map_entry_t copy_entry;
19790
19791 assert(!named_entry->is_sub_map);
19792 assert(!named_entry->is_copy);
19793 assert(!named_entry->is_object);
19794 assert(!named_entry->internal);
19795 assert(named_entry->backing.copy == VM_MAP_COPY_NULL);
19796
19797 copy = vm_map_copy_allocate();
19798 copy->type = VM_MAP_COPY_ENTRY_LIST;
19799 copy->offset = offset;
19800 copy->size = size;
19801 copy->cpy_hdr.page_shift = PAGE_SHIFT;
19802 vm_map_store_init(©->cpy_hdr);
19803
19804 copy_entry = vm_map_copy_entry_create(copy, FALSE);
19805 copy_entry->protection = prot;
19806 copy_entry->max_protection = prot;
19807 copy_entry->use_pmap = TRUE;
19808 copy_entry->vme_start = VM_MAP_TRUNC_PAGE(offset, PAGE_MASK);
19809 copy_entry->vme_end = VM_MAP_ROUND_PAGE(offset + size, PAGE_MASK);
19810 VME_OBJECT_SET(copy_entry, object);
19811 VME_OFFSET_SET(copy_entry, vm_object_trunc_page(offset));
19812 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy), copy_entry);
19813
19814 named_entry->backing.copy = copy;
19815 named_entry->is_object = TRUE;
19816 if (object->internal) {
19817 named_entry->internal = TRUE;
19818 }
19819
19820 DEBUG4K_MEMENTRY("named_entry %p copy %p object %p offset 0x%llx size 0x%llx prot 0x%x\n", named_entry, copy, object, offset, size, prot);
19821
19822 return KERN_SUCCESS;
19823 }
19824
19825 vm_object_t
vm_named_entry_to_vm_object(vm_named_entry_t named_entry)19826 vm_named_entry_to_vm_object(
19827 vm_named_entry_t named_entry)
19828 {
19829 vm_map_copy_t copy;
19830 vm_map_entry_t copy_entry;
19831 vm_object_t object;
19832
19833 assert(!named_entry->is_sub_map);
19834 assert(!named_entry->is_copy);
19835 assert(named_entry->is_object);
19836 copy = named_entry->backing.copy;
19837 assert(copy != VM_MAP_COPY_NULL);
19838 assert(copy->cpy_hdr.nentries == 1);
19839 copy_entry = vm_map_copy_first_entry(copy);
19840 assert(!copy_entry->is_sub_map);
19841 object = VME_OBJECT(copy_entry);
19842
19843 DEBUG4K_MEMENTRY("%p -> %p -> %p [0x%llx 0x%llx 0x%llx 0x%x/0x%x ] -> %p offset 0x%llx size 0x%llx prot 0x%x\n", named_entry, copy, copy_entry, (uint64_t)copy_entry->vme_start, (uint64_t)copy_entry->vme_end, copy_entry->vme_offset, copy_entry->protection, copy_entry->max_protection, object, named_entry->offset, named_entry->size, named_entry->protection);
19844
19845 return object;
19846 }
19847
19848 /*
19849 * Routine: convert_port_entry_to_map
19850 * Purpose:
19851 * Convert from a port specifying an entry or a task
19852 * to a map. Doesn't consume the port ref; produces a map ref,
19853 * which may be null. Unlike convert_port_to_map, the
19854 * port may be task or a named entry backed.
19855 * Conditions:
19856 * Nothing locked.
19857 */
19858
19859
19860 vm_map_t
convert_port_entry_to_map(ipc_port_t port)19861 convert_port_entry_to_map(
19862 ipc_port_t port)
19863 {
19864 vm_map_t map = VM_MAP_NULL;
19865 vm_named_entry_t named_entry;
19866 uint32_t try_failed_count = 0;
19867
19868 if (!IP_VALID(port)) {
19869 return VM_MAP_NULL;
19870 }
19871
19872 if (ip_kotype(port) != IKOT_NAMED_ENTRY) {
19873 return convert_port_to_map(port);
19874 }
19875
19876 ip_mq_lock(port);
19877
19878 while (TRUE) {
19879 named_entry = mach_memory_entry_from_port(port);
19880 if (named_entry == NULL) {
19881 ip_mq_unlock(port);
19882 return VM_MAP_NULL;
19883 }
19884
19885 if (lck_mtx_try_lock(&(named_entry)->Lock)) {
19886 break;
19887 }
19888
19889 ip_mq_unlock(port);
19890
19891 try_failed_count++;
19892 mutex_pause(try_failed_count);
19893 ip_mq_lock(port);
19894 }
19895
19896 named_entry->ref_count++;
19897 lck_mtx_unlock(&(named_entry)->Lock);
19898 ip_mq_unlock(port);
19899 if ((named_entry->is_sub_map) &&
19900 (named_entry->protection & VM_PROT_WRITE)) {
19901 map = named_entry->backing.map;
19902 if (map->pmap != PMAP_NULL) {
19903 if (map->pmap == kernel_pmap) {
19904 panic("userspace has access "
19905 "to a kernel map %p", map);
19906 }
19907 pmap_require(map->pmap);
19908 }
19909 vm_map_reference(map);
19910 }
19911 mach_destroy_memory_entry(port);
19912 return map;
19913 }
19914
19915 /*
19916 * Export routines to other components for the things we access locally through
19917 * macros.
19918 */
19919 #undef current_map
19920 vm_map_t
current_map(void)19921 current_map(void)
19922 {
19923 return current_map_fast();
19924 }
19925
19926 /*
19927 * vm_map_reference:
19928 *
19929 * Takes a reference on the specified map.
19930 */
19931 void
vm_map_reference(vm_map_t map)19932 vm_map_reference(
19933 vm_map_t map)
19934 {
19935 if (__probable(map != VM_MAP_NULL)) {
19936 vm_map_require(map);
19937 os_ref_retain(&map->map_refcnt);
19938 }
19939 }
19940
19941 /*
19942 * vm_map_deallocate:
19943 *
19944 * Removes a reference from the specified map,
19945 * destroying it if no references remain.
19946 * The map should not be locked.
19947 */
19948 void
vm_map_deallocate(vm_map_t map)19949 vm_map_deallocate(
19950 vm_map_t map)
19951 {
19952 if (__probable(map != VM_MAP_NULL)) {
19953 vm_map_require(map);
19954 if (os_ref_release(&map->map_refcnt) == 0) {
19955 vm_map_destroy(map, VM_MAP_REMOVE_NO_FLAGS);
19956 }
19957 }
19958 }
19959
19960 void
vm_map_inspect_deallocate(vm_map_inspect_t map)19961 vm_map_inspect_deallocate(
19962 vm_map_inspect_t map)
19963 {
19964 vm_map_deallocate((vm_map_t)map);
19965 }
19966
19967 void
vm_map_read_deallocate(vm_map_read_t map)19968 vm_map_read_deallocate(
19969 vm_map_read_t map)
19970 {
19971 vm_map_deallocate((vm_map_t)map);
19972 }
19973
19974
19975 void
vm_map_disable_NX(vm_map_t map)19976 vm_map_disable_NX(vm_map_t map)
19977 {
19978 if (map == NULL) {
19979 return;
19980 }
19981 if (map->pmap == NULL) {
19982 return;
19983 }
19984
19985 pmap_disable_NX(map->pmap);
19986 }
19987
19988 void
vm_map_disallow_data_exec(vm_map_t map)19989 vm_map_disallow_data_exec(vm_map_t map)
19990 {
19991 if (map == NULL) {
19992 return;
19993 }
19994
19995 map->map_disallow_data_exec = TRUE;
19996 }
19997
19998 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
19999 * more descriptive.
20000 */
20001 void
vm_map_set_32bit(vm_map_t map)20002 vm_map_set_32bit(vm_map_t map)
20003 {
20004 #if defined(__arm__) || defined(__arm64__)
20005 map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
20006 #else
20007 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
20008 #endif
20009 }
20010
20011
20012 void
vm_map_set_64bit(vm_map_t map)20013 vm_map_set_64bit(vm_map_t map)
20014 {
20015 #if defined(__arm__) || defined(__arm64__)
20016 map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
20017 #else
20018 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
20019 #endif
20020 }
20021
20022 /*
20023 * Expand the maximum size of an existing map to the maximum supported.
20024 */
20025 void
vm_map_set_jumbo(vm_map_t map)20026 vm_map_set_jumbo(vm_map_t map)
20027 {
20028 #if defined (__arm64__) && !defined(CONFIG_ARROW)
20029 vm_map_set_max_addr(map, ~0);
20030 #else /* arm64 */
20031 (void) map;
20032 #endif
20033 }
20034
20035 /*
20036 * This map has a JIT entitlement
20037 */
20038 void
vm_map_set_jit_entitled(vm_map_t map)20039 vm_map_set_jit_entitled(vm_map_t map)
20040 {
20041 #if defined (__arm64__)
20042 pmap_set_jit_entitled(map->pmap);
20043 #else /* arm64 */
20044 (void) map;
20045 #endif
20046 }
20047
20048 /*
20049 * Expand the maximum size of an existing map.
20050 */
20051 void
vm_map_set_max_addr(vm_map_t map,vm_map_offset_t new_max_offset)20052 vm_map_set_max_addr(vm_map_t map, vm_map_offset_t new_max_offset)
20053 {
20054 #if defined(__arm64__)
20055 vm_map_offset_t max_supported_offset = 0;
20056 vm_map_offset_t old_max_offset = map->max_offset;
20057 max_supported_offset = pmap_max_offset(vm_map_is_64bit(map), ARM_PMAP_MAX_OFFSET_JUMBO);
20058
20059 new_max_offset = trunc_page(new_max_offset);
20060
20061 /* The address space cannot be shrunk using this routine. */
20062 if (old_max_offset >= new_max_offset) {
20063 return;
20064 }
20065
20066 if (max_supported_offset < new_max_offset) {
20067 new_max_offset = max_supported_offset;
20068 }
20069
20070 map->max_offset = new_max_offset;
20071
20072 if (map->holes_list->prev->vme_end == old_max_offset) {
20073 /*
20074 * There is already a hole at the end of the map; simply make it bigger.
20075 */
20076 map->holes_list->prev->vme_end = map->max_offset;
20077 } else {
20078 /*
20079 * There is no hole at the end, so we need to create a new hole
20080 * for the new empty space we're creating.
20081 */
20082 struct vm_map_links *new_hole = zalloc(vm_map_holes_zone);
20083 new_hole->start = old_max_offset;
20084 new_hole->end = map->max_offset;
20085 new_hole->prev = map->holes_list->prev;
20086 new_hole->next = (struct vm_map_entry *)map->holes_list;
20087 map->holes_list->prev->links.next = (struct vm_map_entry *)new_hole;
20088 map->holes_list->prev = (struct vm_map_entry *)new_hole;
20089 }
20090 #else
20091 (void)map;
20092 (void)new_max_offset;
20093 #endif
20094 }
20095
20096 vm_map_offset_t
vm_compute_max_offset(boolean_t is64)20097 vm_compute_max_offset(boolean_t is64)
20098 {
20099 #if defined(__arm__) || defined(__arm64__)
20100 return pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE);
20101 #else
20102 return is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS;
20103 #endif
20104 }
20105
20106 void
vm_map_get_max_aslr_slide_section(vm_map_t map __unused,int64_t * max_sections,int64_t * section_size)20107 vm_map_get_max_aslr_slide_section(
20108 vm_map_t map __unused,
20109 int64_t *max_sections,
20110 int64_t *section_size)
20111 {
20112 #if defined(__arm64__)
20113 *max_sections = 3;
20114 *section_size = ARM_TT_TWIG_SIZE;
20115 #else
20116 *max_sections = 1;
20117 *section_size = 0;
20118 #endif
20119 }
20120
20121 uint64_t
vm_map_get_max_aslr_slide_pages(vm_map_t map)20122 vm_map_get_max_aslr_slide_pages(vm_map_t map)
20123 {
20124 #if defined(__arm64__)
20125 /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
20126 * limited embedded address space; this is also meant to minimize pmap
20127 * memory usage on 16KB page systems.
20128 */
20129 return 1 << (24 - VM_MAP_PAGE_SHIFT(map));
20130 #else
20131 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
20132 #endif
20133 }
20134
20135 uint64_t
vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)20136 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
20137 {
20138 #if defined(__arm64__)
20139 /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
20140 * of independent entropy on 16KB page systems.
20141 */
20142 return 1 << (22 - VM_MAP_PAGE_SHIFT(map));
20143 #else
20144 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
20145 #endif
20146 }
20147
20148 #ifndef __arm__
20149 boolean_t
vm_map_is_64bit(vm_map_t map)20150 vm_map_is_64bit(
20151 vm_map_t map)
20152 {
20153 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
20154 }
20155 #endif
20156
20157 boolean_t
vm_map_has_hard_pagezero(vm_map_t map,vm_map_offset_t pagezero_size)20158 vm_map_has_hard_pagezero(
20159 vm_map_t map,
20160 vm_map_offset_t pagezero_size)
20161 {
20162 /*
20163 * XXX FBDP
20164 * We should lock the VM map (for read) here but we can get away
20165 * with it for now because there can't really be any race condition:
20166 * the VM map's min_offset is changed only when the VM map is created
20167 * and when the zero page is established (when the binary gets loaded),
20168 * and this routine gets called only when the task terminates and the
20169 * VM map is being torn down, and when a new map is created via
20170 * load_machfile()/execve().
20171 */
20172 return map->min_offset >= pagezero_size;
20173 }
20174
20175 /*
20176 * Raise a VM map's maximun offset.
20177 */
20178 kern_return_t
vm_map_raise_max_offset(vm_map_t map,vm_map_offset_t new_max_offset)20179 vm_map_raise_max_offset(
20180 vm_map_t map,
20181 vm_map_offset_t new_max_offset)
20182 {
20183 kern_return_t ret;
20184
20185 vm_map_lock(map);
20186 ret = KERN_INVALID_ADDRESS;
20187
20188 if (new_max_offset >= map->max_offset) {
20189 if (!vm_map_is_64bit(map)) {
20190 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
20191 map->max_offset = new_max_offset;
20192 ret = KERN_SUCCESS;
20193 }
20194 } else {
20195 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
20196 map->max_offset = new_max_offset;
20197 ret = KERN_SUCCESS;
20198 }
20199 }
20200 }
20201
20202 vm_map_unlock(map);
20203 return ret;
20204 }
20205
20206
20207 /*
20208 * Raise a VM map's minimum offset.
20209 * To strictly enforce "page zero" reservation.
20210 */
20211 kern_return_t
vm_map_raise_min_offset(vm_map_t map,vm_map_offset_t new_min_offset)20212 vm_map_raise_min_offset(
20213 vm_map_t map,
20214 vm_map_offset_t new_min_offset)
20215 {
20216 vm_map_entry_t first_entry;
20217
20218 new_min_offset = vm_map_round_page(new_min_offset,
20219 VM_MAP_PAGE_MASK(map));
20220
20221 vm_map_lock(map);
20222
20223 if (new_min_offset < map->min_offset) {
20224 /*
20225 * Can't move min_offset backwards, as that would expose
20226 * a part of the address space that was previously, and for
20227 * possibly good reasons, inaccessible.
20228 */
20229 vm_map_unlock(map);
20230 return KERN_INVALID_ADDRESS;
20231 }
20232 if (new_min_offset >= map->max_offset) {
20233 /* can't go beyond the end of the address space */
20234 vm_map_unlock(map);
20235 return KERN_INVALID_ADDRESS;
20236 }
20237
20238 first_entry = vm_map_first_entry(map);
20239 if (first_entry != vm_map_to_entry(map) &&
20240 first_entry->vme_start < new_min_offset) {
20241 /*
20242 * Some memory was already allocated below the new
20243 * minimun offset. It's too late to change it now...
20244 */
20245 vm_map_unlock(map);
20246 return KERN_NO_SPACE;
20247 }
20248
20249 map->min_offset = new_min_offset;
20250
20251 assert(map->holes_list);
20252 map->holes_list->start = new_min_offset;
20253 assert(new_min_offset < map->holes_list->end);
20254
20255 vm_map_unlock(map);
20256
20257 return KERN_SUCCESS;
20258 }
20259
20260 /*
20261 * Set the limit on the maximum amount of address space and user wired memory allowed for this map.
20262 * This is basically a copy of the RLIMIT_AS and RLIMIT_MEMLOCK rlimit value maintained by the BSD
20263 * side of the kernel. The limits are checked in the mach VM side, so we keep a copy so we don't
20264 * have to reach over to the BSD data structures.
20265 */
20266
20267 uint64_t vm_map_set_size_limit_count = 0;
20268 kern_return_t
vm_map_set_size_limit(vm_map_t map,uint64_t new_size_limit)20269 vm_map_set_size_limit(vm_map_t map, uint64_t new_size_limit)
20270 {
20271 kern_return_t kr;
20272
20273 vm_map_lock(map);
20274 if (new_size_limit < map->size) {
20275 /* new limit should not be lower than its current size */
20276 DTRACE_VM2(vm_map_set_size_limit_fail,
20277 vm_map_size_t, map->size,
20278 uint64_t, new_size_limit);
20279 kr = KERN_FAILURE;
20280 } else if (new_size_limit == map->size_limit) {
20281 /* no change */
20282 kr = KERN_SUCCESS;
20283 } else {
20284 /* set new limit */
20285 DTRACE_VM2(vm_map_set_size_limit,
20286 vm_map_size_t, map->size,
20287 uint64_t, new_size_limit);
20288 if (new_size_limit != RLIM_INFINITY) {
20289 vm_map_set_size_limit_count++;
20290 }
20291 map->size_limit = new_size_limit;
20292 kr = KERN_SUCCESS;
20293 }
20294 vm_map_unlock(map);
20295 return kr;
20296 }
20297
20298 uint64_t vm_map_set_data_limit_count = 0;
20299 kern_return_t
vm_map_set_data_limit(vm_map_t map,uint64_t new_data_limit)20300 vm_map_set_data_limit(vm_map_t map, uint64_t new_data_limit)
20301 {
20302 kern_return_t kr;
20303
20304 vm_map_lock(map);
20305 if (new_data_limit < map->size) {
20306 /* new limit should not be lower than its current size */
20307 DTRACE_VM2(vm_map_set_data_limit_fail,
20308 vm_map_size_t, map->size,
20309 uint64_t, new_data_limit);
20310 kr = KERN_FAILURE;
20311 } else if (new_data_limit == map->data_limit) {
20312 /* no change */
20313 kr = KERN_SUCCESS;
20314 } else {
20315 /* set new limit */
20316 DTRACE_VM2(vm_map_set_data_limit,
20317 vm_map_size_t, map->size,
20318 uint64_t, new_data_limit);
20319 if (new_data_limit != RLIM_INFINITY) {
20320 vm_map_set_data_limit_count++;
20321 }
20322 map->data_limit = new_data_limit;
20323 kr = KERN_SUCCESS;
20324 }
20325 vm_map_unlock(map);
20326 return kr;
20327 }
20328
20329 void
vm_map_set_user_wire_limit(vm_map_t map,vm_size_t limit)20330 vm_map_set_user_wire_limit(vm_map_t map,
20331 vm_size_t limit)
20332 {
20333 vm_map_lock(map);
20334 map->user_wire_limit = limit;
20335 vm_map_unlock(map);
20336 }
20337
20338
20339 void
vm_map_switch_protect(vm_map_t map,boolean_t val)20340 vm_map_switch_protect(vm_map_t map,
20341 boolean_t val)
20342 {
20343 vm_map_lock(map);
20344 map->switch_protect = val;
20345 vm_map_unlock(map);
20346 }
20347
20348 extern int cs_process_enforcement_enable;
20349 boolean_t
vm_map_cs_enforcement(vm_map_t map)20350 vm_map_cs_enforcement(
20351 vm_map_t map)
20352 {
20353 if (cs_process_enforcement_enable) {
20354 return TRUE;
20355 }
20356 return map->cs_enforcement;
20357 }
20358
20359 kern_return_t
vm_map_cs_wx_enable(vm_map_t map)20360 vm_map_cs_wx_enable(
20361 vm_map_t map)
20362 {
20363 return pmap_cs_allow_invalid(vm_map_pmap(map));
20364 }
20365
20366 void
vm_map_cs_debugged_set(vm_map_t map,boolean_t val)20367 vm_map_cs_debugged_set(
20368 vm_map_t map,
20369 boolean_t val)
20370 {
20371 vm_map_lock(map);
20372 map->cs_debugged = val;
20373 vm_map_unlock(map);
20374 }
20375
20376 void
vm_map_cs_enforcement_set(vm_map_t map,boolean_t val)20377 vm_map_cs_enforcement_set(
20378 vm_map_t map,
20379 boolean_t val)
20380 {
20381 vm_map_lock(map);
20382 map->cs_enforcement = val;
20383 pmap_set_vm_map_cs_enforced(map->pmap, val);
20384 vm_map_unlock(map);
20385 }
20386
20387 /*
20388 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
20389 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
20390 * bump both counters.
20391 */
20392 void
vm_map_iokit_mapped_region(vm_map_t map,vm_size_t bytes)20393 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
20394 {
20395 pmap_t pmap = vm_map_pmap(map);
20396
20397 ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
20398 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
20399 }
20400
20401 void
vm_map_iokit_unmapped_region(vm_map_t map,vm_size_t bytes)20402 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
20403 {
20404 pmap_t pmap = vm_map_pmap(map);
20405
20406 ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
20407 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
20408 }
20409
20410 /* Add (generate) code signature for memory range */
20411 #if CONFIG_DYNAMIC_CODE_SIGNING
20412 kern_return_t
vm_map_sign(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)20413 vm_map_sign(vm_map_t map,
20414 vm_map_offset_t start,
20415 vm_map_offset_t end)
20416 {
20417 vm_map_entry_t entry;
20418 vm_page_t m;
20419 vm_object_t object;
20420
20421 /*
20422 * Vet all the input parameters and current type and state of the
20423 * underlaying object. Return with an error if anything is amiss.
20424 */
20425 if (map == VM_MAP_NULL) {
20426 return KERN_INVALID_ARGUMENT;
20427 }
20428
20429 vm_map_lock_read(map);
20430
20431 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
20432 /*
20433 * Must pass a valid non-submap address.
20434 */
20435 vm_map_unlock_read(map);
20436 return KERN_INVALID_ADDRESS;
20437 }
20438
20439 if ((entry->vme_start > start) || (entry->vme_end < end)) {
20440 /*
20441 * Map entry doesn't cover the requested range. Not handling
20442 * this situation currently.
20443 */
20444 vm_map_unlock_read(map);
20445 return KERN_INVALID_ARGUMENT;
20446 }
20447
20448 object = VME_OBJECT(entry);
20449 if (object == VM_OBJECT_NULL) {
20450 /*
20451 * Object must already be present or we can't sign.
20452 */
20453 vm_map_unlock_read(map);
20454 return KERN_INVALID_ARGUMENT;
20455 }
20456
20457 vm_object_lock(object);
20458 vm_map_unlock_read(map);
20459
20460 while (start < end) {
20461 uint32_t refmod;
20462
20463 m = vm_page_lookup(object,
20464 start - entry->vme_start + VME_OFFSET(entry));
20465 if (m == VM_PAGE_NULL) {
20466 /* shoud we try to fault a page here? we can probably
20467 * demand it exists and is locked for this request */
20468 vm_object_unlock(object);
20469 return KERN_FAILURE;
20470 }
20471 /* deal with special page status */
20472 if (m->vmp_busy ||
20473 (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_private || m->vmp_absent))) {
20474 vm_object_unlock(object);
20475 return KERN_FAILURE;
20476 }
20477
20478 /* Page is OK... now "validate" it */
20479 /* This is the place where we'll call out to create a code
20480 * directory, later */
20481 /* XXX TODO4K: deal with 4k subpages individually? */
20482 m->vmp_cs_validated = VMP_CS_ALL_TRUE;
20483
20484 /* The page is now "clean" for codesigning purposes. That means
20485 * we don't consider it as modified (wpmapped) anymore. But
20486 * we'll disconnect the page so we note any future modification
20487 * attempts. */
20488 m->vmp_wpmapped = FALSE;
20489 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
20490
20491 /* Pull the dirty status from the pmap, since we cleared the
20492 * wpmapped bit */
20493 if ((refmod & VM_MEM_MODIFIED) && !m->vmp_dirty) {
20494 SET_PAGE_DIRTY(m, FALSE);
20495 }
20496
20497 /* On to the next page */
20498 start += PAGE_SIZE;
20499 }
20500 vm_object_unlock(object);
20501
20502 return KERN_SUCCESS;
20503 }
20504 #endif
20505
20506 kern_return_t
vm_map_partial_reap(vm_map_t map,unsigned int * reclaimed_resident,unsigned int * reclaimed_compressed)20507 vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
20508 {
20509 vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
20510 vm_map_entry_t next_entry;
20511 kern_return_t kr = KERN_SUCCESS;
20512 vm_map_t zap_map;
20513
20514 vm_map_lock(map);
20515
20516 /*
20517 * We use a "zap_map" to avoid having to unlock
20518 * the "map" in vm_map_delete().
20519 */
20520 zap_map = vm_map_create(PMAP_NULL,
20521 map->min_offset,
20522 map->max_offset,
20523 map->hdr.entries_pageable);
20524
20525 if (zap_map == VM_MAP_NULL) {
20526 return KERN_RESOURCE_SHORTAGE;
20527 }
20528
20529 vm_map_set_page_shift(zap_map,
20530 VM_MAP_PAGE_SHIFT(map));
20531 vm_map_disable_hole_optimization(zap_map);
20532
20533 for (entry = vm_map_first_entry(map);
20534 entry != vm_map_to_entry(map);
20535 entry = next_entry) {
20536 next_entry = entry->vme_next;
20537
20538 if (VME_OBJECT(entry) &&
20539 !entry->is_sub_map &&
20540 (VME_OBJECT(entry)->internal == TRUE) &&
20541 (VME_OBJECT(entry)->ref_count == 1)) {
20542 *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
20543 *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
20544
20545 (void)vm_map_delete(map,
20546 entry->vme_start,
20547 entry->vme_end,
20548 VM_MAP_REMOVE_SAVE_ENTRIES,
20549 zap_map);
20550 }
20551 }
20552
20553 vm_map_unlock(map);
20554
20555 /*
20556 * Get rid of the "zap_maps" and all the map entries that
20557 * they may still contain.
20558 */
20559 if (zap_map != VM_MAP_NULL) {
20560 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
20561 zap_map = VM_MAP_NULL;
20562 }
20563
20564 return kr;
20565 }
20566
20567
20568 #if DEVELOPMENT || DEBUG
20569
20570 int
vm_map_disconnect_page_mappings(vm_map_t map,boolean_t do_unnest)20571 vm_map_disconnect_page_mappings(
20572 vm_map_t map,
20573 boolean_t do_unnest)
20574 {
20575 vm_map_entry_t entry;
20576 ledger_amount_t byte_count = 0;
20577
20578 if (do_unnest == TRUE) {
20579 #ifndef NO_NESTED_PMAP
20580 vm_map_lock(map);
20581
20582 for (entry = vm_map_first_entry(map);
20583 entry != vm_map_to_entry(map);
20584 entry = entry->vme_next) {
20585 if (entry->is_sub_map && entry->use_pmap) {
20586 /*
20587 * Make sure the range between the start of this entry and
20588 * the end of this entry is no longer nested, so that
20589 * we will only remove mappings from the pmap in use by this
20590 * this task
20591 */
20592 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
20593 }
20594 }
20595 vm_map_unlock(map);
20596 #endif
20597 }
20598 vm_map_lock_read(map);
20599
20600 ledger_get_balance(map->pmap->ledger, task_ledgers.phys_mem, &byte_count);
20601
20602 for (entry = vm_map_first_entry(map);
20603 entry != vm_map_to_entry(map);
20604 entry = entry->vme_next) {
20605 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
20606 (VME_OBJECT(entry)->phys_contiguous))) {
20607 continue;
20608 }
20609 if (entry->is_sub_map) {
20610 assert(!entry->use_pmap);
20611 }
20612
20613 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
20614 }
20615 vm_map_unlock_read(map);
20616
20617 return (int) (byte_count / VM_MAP_PAGE_SIZE(map));
20618 }
20619
20620 kern_return_t
vm_map_inject_error(vm_map_t map,vm_map_offset_t vaddr)20621 vm_map_inject_error(vm_map_t map, vm_map_offset_t vaddr)
20622 {
20623 vm_object_t object = NULL;
20624 vm_object_offset_t offset;
20625 vm_prot_t prot;
20626 boolean_t wired;
20627 vm_map_version_t version;
20628 vm_map_t real_map;
20629 int result = KERN_FAILURE;
20630
20631 vaddr = vm_map_trunc_page(vaddr, PAGE_MASK);
20632 vm_map_lock(map);
20633
20634 result = vm_map_lookup_locked(&map, vaddr, VM_PROT_READ,
20635 OBJECT_LOCK_EXCLUSIVE, &version, &object, &offset, &prot, &wired,
20636 NULL, &real_map, NULL);
20637 if (object == NULL) {
20638 result = KERN_MEMORY_ERROR;
20639 } else if (object->pager) {
20640 result = vm_compressor_pager_inject_error(object->pager,
20641 offset);
20642 } else {
20643 result = KERN_MEMORY_PRESENT;
20644 }
20645
20646 if (object != NULL) {
20647 vm_object_unlock(object);
20648 }
20649
20650 if (real_map != map) {
20651 vm_map_unlock(real_map);
20652 }
20653 vm_map_unlock(map);
20654
20655 return result;
20656 }
20657
20658 #endif
20659
20660
20661 #if CONFIG_FREEZE
20662
20663
20664 extern struct freezer_context freezer_context_global;
20665 AbsoluteTime c_freezer_last_yield_ts = 0;
20666
20667 extern unsigned int memorystatus_freeze_private_shared_pages_ratio;
20668 extern unsigned int memorystatus_freeze_shared_mb_per_process_max;
20669
20670 kern_return_t
vm_map_freeze(task_t task,unsigned int * purgeable_count,unsigned int * wired_count,unsigned int * clean_count,unsigned int * dirty_count,unsigned int dirty_budget,unsigned int * shared_count,int * freezer_error_code,boolean_t eval_only)20671 vm_map_freeze(
20672 task_t task,
20673 unsigned int *purgeable_count,
20674 unsigned int *wired_count,
20675 unsigned int *clean_count,
20676 unsigned int *dirty_count,
20677 unsigned int dirty_budget,
20678 unsigned int *shared_count,
20679 int *freezer_error_code,
20680 boolean_t eval_only)
20681 {
20682 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
20683 kern_return_t kr = KERN_SUCCESS;
20684 boolean_t evaluation_phase = TRUE;
20685 vm_object_t cur_shared_object = NULL;
20686 int cur_shared_obj_ref_cnt = 0;
20687 unsigned int dirty_private_count = 0, dirty_shared_count = 0, obj_pages_snapshot = 0;
20688
20689 *purgeable_count = *wired_count = *clean_count = *dirty_count = *shared_count = 0;
20690
20691 /*
20692 * We need the exclusive lock here so that we can
20693 * block any page faults or lookups while we are
20694 * in the middle of freezing this vm map.
20695 */
20696 vm_map_t map = task->map;
20697
20698 vm_map_lock(map);
20699
20700 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
20701
20702 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
20703 if (vm_compressor_low_on_space()) {
20704 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
20705 }
20706
20707 if (vm_swap_low_on_space()) {
20708 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
20709 }
20710
20711 kr = KERN_NO_SPACE;
20712 goto done;
20713 }
20714
20715 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
20716 /*
20717 * In-memory compressor backing the freezer. No disk.
20718 * So no need to do the evaluation phase.
20719 */
20720 evaluation_phase = FALSE;
20721
20722 if (eval_only == TRUE) {
20723 /*
20724 * We don't support 'eval_only' mode
20725 * in this non-swap config.
20726 */
20727 *freezer_error_code = FREEZER_ERROR_GENERIC;
20728 kr = KERN_INVALID_ARGUMENT;
20729 goto done;
20730 }
20731
20732 freezer_context_global.freezer_ctx_uncompressed_pages = 0;
20733 clock_get_uptime(&c_freezer_last_yield_ts);
20734 }
20735 again:
20736
20737 for (entry2 = vm_map_first_entry(map);
20738 entry2 != vm_map_to_entry(map);
20739 entry2 = entry2->vme_next) {
20740 vm_object_t src_object = VME_OBJECT(entry2);
20741
20742 if (src_object &&
20743 !entry2->is_sub_map &&
20744 !src_object->phys_contiguous) {
20745 /* If eligible, scan the entry, moving eligible pages over to our parent object */
20746
20747 if (src_object->internal == TRUE) {
20748 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
20749 /*
20750 * We skip purgeable objects during evaluation phase only.
20751 * If we decide to freeze this process, we'll explicitly
20752 * purge these objects before we go around again with
20753 * 'evaluation_phase' set to FALSE.
20754 */
20755
20756 if ((src_object->purgable == VM_PURGABLE_EMPTY) || (src_object->purgable == VM_PURGABLE_VOLATILE)) {
20757 /*
20758 * We want to purge objects that may not belong to this task but are mapped
20759 * in this task alone. Since we already purged this task's purgeable memory
20760 * at the end of a successful evaluation phase, we want to avoid doing no-op calls
20761 * on this task's purgeable objects. Hence the check for only volatile objects.
20762 */
20763 if (evaluation_phase == FALSE &&
20764 (src_object->purgable == VM_PURGABLE_VOLATILE) &&
20765 (src_object->ref_count == 1)) {
20766 vm_object_lock(src_object);
20767 vm_object_purge(src_object, 0);
20768 vm_object_unlock(src_object);
20769 }
20770 continue;
20771 }
20772
20773 /*
20774 * Pages belonging to this object could be swapped to disk.
20775 * Make sure it's not a shared object because we could end
20776 * up just bringing it back in again.
20777 *
20778 * We try to optimize somewhat by checking for objects that are mapped
20779 * more than once within our own map. But we don't do full searches,
20780 * we just look at the entries following our current entry.
20781 */
20782
20783 if (src_object->ref_count > 1) {
20784 if (src_object != cur_shared_object) {
20785 obj_pages_snapshot = (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
20786 dirty_shared_count += obj_pages_snapshot;
20787
20788 cur_shared_object = src_object;
20789 cur_shared_obj_ref_cnt = 1;
20790 continue;
20791 } else {
20792 cur_shared_obj_ref_cnt++;
20793 if (src_object->ref_count == cur_shared_obj_ref_cnt) {
20794 /*
20795 * Fall through to below and treat this object as private.
20796 * So deduct its pages from our shared total and add it to the
20797 * private total.
20798 */
20799
20800 dirty_shared_count -= obj_pages_snapshot;
20801 dirty_private_count += obj_pages_snapshot;
20802 } else {
20803 continue;
20804 }
20805 }
20806 }
20807
20808
20809 if (src_object->ref_count == 1) {
20810 dirty_private_count += (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
20811 }
20812
20813 if (evaluation_phase == TRUE) {
20814 continue;
20815 }
20816 }
20817
20818 uint32_t paged_out_count = vm_object_compressed_freezer_pageout(src_object, dirty_budget);
20819 *wired_count += src_object->wired_page_count;
20820
20821 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
20822 if (vm_compressor_low_on_space()) {
20823 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
20824 }
20825
20826 if (vm_swap_low_on_space()) {
20827 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
20828 }
20829
20830 kr = KERN_NO_SPACE;
20831 break;
20832 }
20833 if (paged_out_count >= dirty_budget) {
20834 break;
20835 }
20836 dirty_budget -= paged_out_count;
20837 }
20838 }
20839 }
20840
20841 *shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
20842 if (evaluation_phase) {
20843 unsigned int shared_pages_threshold = (memorystatus_freeze_shared_mb_per_process_max * 1024 * 1024ULL) / PAGE_SIZE_64;
20844
20845 if (dirty_shared_count > shared_pages_threshold) {
20846 *freezer_error_code = FREEZER_ERROR_EXCESS_SHARED_MEMORY;
20847 kr = KERN_FAILURE;
20848 goto done;
20849 }
20850
20851 if (dirty_shared_count &&
20852 ((dirty_private_count / dirty_shared_count) < memorystatus_freeze_private_shared_pages_ratio)) {
20853 *freezer_error_code = FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO;
20854 kr = KERN_FAILURE;
20855 goto done;
20856 }
20857
20858 evaluation_phase = FALSE;
20859 dirty_shared_count = dirty_private_count = 0;
20860
20861 freezer_context_global.freezer_ctx_uncompressed_pages = 0;
20862 clock_get_uptime(&c_freezer_last_yield_ts);
20863
20864 if (eval_only) {
20865 kr = KERN_SUCCESS;
20866 goto done;
20867 }
20868
20869 vm_purgeable_purge_task_owned(task);
20870
20871 goto again;
20872 } else {
20873 kr = KERN_SUCCESS;
20874 }
20875
20876 done:
20877 vm_map_unlock(map);
20878
20879 if ((eval_only == FALSE) && (kr == KERN_SUCCESS)) {
20880 vm_object_compressed_freezer_done();
20881 }
20882 return kr;
20883 }
20884
20885 #endif
20886
20887 /*
20888 * vm_map_entry_should_cow_for_true_share:
20889 *
20890 * Determines if the map entry should be clipped and setup for copy-on-write
20891 * to avoid applying "true_share" to a large VM object when only a subset is
20892 * targeted.
20893 *
20894 * For now, we target only the map entries created for the Objective C
20895 * Garbage Collector, which initially have the following properties:
20896 * - alias == VM_MEMORY_MALLOC
20897 * - wired_count == 0
20898 * - !needs_copy
20899 * and a VM object with:
20900 * - internal
20901 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
20902 * - !true_share
20903 * - vo_size == ANON_CHUNK_SIZE
20904 *
20905 * Only non-kernel map entries.
20906 */
20907 boolean_t
vm_map_entry_should_cow_for_true_share(vm_map_entry_t entry)20908 vm_map_entry_should_cow_for_true_share(
20909 vm_map_entry_t entry)
20910 {
20911 vm_object_t object;
20912
20913 if (entry->is_sub_map) {
20914 /* entry does not point at a VM object */
20915 return FALSE;
20916 }
20917
20918 if (entry->needs_copy) {
20919 /* already set for copy_on_write: done! */
20920 return FALSE;
20921 }
20922
20923 if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
20924 VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
20925 /* not a malloc heap or Obj-C Garbage Collector heap */
20926 return FALSE;
20927 }
20928
20929 if (entry->wired_count) {
20930 /* wired: can't change the map entry... */
20931 vm_counters.should_cow_but_wired++;
20932 return FALSE;
20933 }
20934
20935 object = VME_OBJECT(entry);
20936
20937 if (object == VM_OBJECT_NULL) {
20938 /* no object yet... */
20939 return FALSE;
20940 }
20941
20942 if (!object->internal) {
20943 /* not an internal object */
20944 return FALSE;
20945 }
20946
20947 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
20948 /* not the default copy strategy */
20949 return FALSE;
20950 }
20951
20952 if (object->true_share) {
20953 /* already true_share: too late to avoid it */
20954 return FALSE;
20955 }
20956
20957 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
20958 object->vo_size != ANON_CHUNK_SIZE) {
20959 /* ... not an object created for the ObjC Garbage Collector */
20960 return FALSE;
20961 }
20962
20963 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
20964 object->vo_size != 2048 * 4096) {
20965 /* ... not a "MALLOC_SMALL" heap */
20966 return FALSE;
20967 }
20968
20969 /*
20970 * All the criteria match: we have a large object being targeted for "true_share".
20971 * To limit the adverse side-effects linked with "true_share", tell the caller to
20972 * try and avoid setting up the entire object for "true_share" by clipping the
20973 * targeted range and setting it up for copy-on-write.
20974 */
20975 return TRUE;
20976 }
20977
20978 vm_map_offset_t
vm_map_round_page_mask(vm_map_offset_t offset,vm_map_offset_t mask)20979 vm_map_round_page_mask(
20980 vm_map_offset_t offset,
20981 vm_map_offset_t mask)
20982 {
20983 return VM_MAP_ROUND_PAGE(offset, mask);
20984 }
20985
20986 vm_map_offset_t
vm_map_trunc_page_mask(vm_map_offset_t offset,vm_map_offset_t mask)20987 vm_map_trunc_page_mask(
20988 vm_map_offset_t offset,
20989 vm_map_offset_t mask)
20990 {
20991 return VM_MAP_TRUNC_PAGE(offset, mask);
20992 }
20993
20994 boolean_t
vm_map_page_aligned(vm_map_offset_t offset,vm_map_offset_t mask)20995 vm_map_page_aligned(
20996 vm_map_offset_t offset,
20997 vm_map_offset_t mask)
20998 {
20999 return ((offset) & mask) == 0;
21000 }
21001
21002 int
vm_map_page_shift(vm_map_t map)21003 vm_map_page_shift(
21004 vm_map_t map)
21005 {
21006 return VM_MAP_PAGE_SHIFT(map);
21007 }
21008
21009 int
vm_map_page_size(vm_map_t map)21010 vm_map_page_size(
21011 vm_map_t map)
21012 {
21013 return VM_MAP_PAGE_SIZE(map);
21014 }
21015
21016 vm_map_offset_t
vm_map_page_mask(vm_map_t map)21017 vm_map_page_mask(
21018 vm_map_t map)
21019 {
21020 return VM_MAP_PAGE_MASK(map);
21021 }
21022
21023 kern_return_t
vm_map_set_page_shift(vm_map_t map,int pageshift)21024 vm_map_set_page_shift(
21025 vm_map_t map,
21026 int pageshift)
21027 {
21028 if (map->hdr.nentries != 0) {
21029 /* too late to change page size */
21030 return KERN_FAILURE;
21031 }
21032
21033 map->hdr.page_shift = pageshift;
21034
21035 return KERN_SUCCESS;
21036 }
21037
21038 kern_return_t
vm_map_query_volatile(vm_map_t map,mach_vm_size_t * volatile_virtual_size_p,mach_vm_size_t * volatile_resident_size_p,mach_vm_size_t * volatile_compressed_size_p,mach_vm_size_t * volatile_pmap_size_p,mach_vm_size_t * volatile_compressed_pmap_size_p)21039 vm_map_query_volatile(
21040 vm_map_t map,
21041 mach_vm_size_t *volatile_virtual_size_p,
21042 mach_vm_size_t *volatile_resident_size_p,
21043 mach_vm_size_t *volatile_compressed_size_p,
21044 mach_vm_size_t *volatile_pmap_size_p,
21045 mach_vm_size_t *volatile_compressed_pmap_size_p)
21046 {
21047 mach_vm_size_t volatile_virtual_size;
21048 mach_vm_size_t volatile_resident_count;
21049 mach_vm_size_t volatile_compressed_count;
21050 mach_vm_size_t volatile_pmap_count;
21051 mach_vm_size_t volatile_compressed_pmap_count;
21052 mach_vm_size_t resident_count;
21053 vm_map_entry_t entry;
21054 vm_object_t object;
21055
21056 /* map should be locked by caller */
21057
21058 volatile_virtual_size = 0;
21059 volatile_resident_count = 0;
21060 volatile_compressed_count = 0;
21061 volatile_pmap_count = 0;
21062 volatile_compressed_pmap_count = 0;
21063
21064 for (entry = vm_map_first_entry(map);
21065 entry != vm_map_to_entry(map);
21066 entry = entry->vme_next) {
21067 mach_vm_size_t pmap_resident_bytes, pmap_compressed_bytes;
21068
21069 if (entry->is_sub_map) {
21070 continue;
21071 }
21072 if (!(entry->protection & VM_PROT_WRITE)) {
21073 continue;
21074 }
21075 object = VME_OBJECT(entry);
21076 if (object == VM_OBJECT_NULL) {
21077 continue;
21078 }
21079 if (object->purgable != VM_PURGABLE_VOLATILE &&
21080 object->purgable != VM_PURGABLE_EMPTY) {
21081 continue;
21082 }
21083 if (VME_OFFSET(entry)) {
21084 /*
21085 * If the map entry has been split and the object now
21086 * appears several times in the VM map, we don't want
21087 * to count the object's resident_page_count more than
21088 * once. We count it only for the first one, starting
21089 * at offset 0 and ignore the other VM map entries.
21090 */
21091 continue;
21092 }
21093 resident_count = object->resident_page_count;
21094 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
21095 resident_count = 0;
21096 } else {
21097 resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
21098 }
21099
21100 volatile_virtual_size += entry->vme_end - entry->vme_start;
21101 volatile_resident_count += resident_count;
21102 if (object->pager) {
21103 volatile_compressed_count +=
21104 vm_compressor_pager_get_count(object->pager);
21105 }
21106 pmap_compressed_bytes = 0;
21107 pmap_resident_bytes =
21108 pmap_query_resident(map->pmap,
21109 entry->vme_start,
21110 entry->vme_end,
21111 &pmap_compressed_bytes);
21112 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
21113 volatile_compressed_pmap_count += (pmap_compressed_bytes
21114 / PAGE_SIZE);
21115 }
21116
21117 /* map is still locked on return */
21118
21119 *volatile_virtual_size_p = volatile_virtual_size;
21120 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
21121 *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
21122 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
21123 *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
21124
21125 return KERN_SUCCESS;
21126 }
21127
21128 void
vm_map_sizes(vm_map_t map,vm_map_size_t * psize,vm_map_size_t * pfree,vm_map_size_t * plargest_free)21129 vm_map_sizes(vm_map_t map,
21130 vm_map_size_t * psize,
21131 vm_map_size_t * pfree,
21132 vm_map_size_t * plargest_free)
21133 {
21134 vm_map_entry_t entry;
21135 vm_map_offset_t prev;
21136 vm_map_size_t free, total_free, largest_free;
21137 boolean_t end;
21138
21139 if (!map) {
21140 *psize = *pfree = *plargest_free = 0;
21141 return;
21142 }
21143 total_free = largest_free = 0;
21144
21145 vm_map_lock_read(map);
21146 if (psize) {
21147 *psize = map->max_offset - map->min_offset;
21148 }
21149
21150 prev = map->min_offset;
21151 for (entry = vm_map_first_entry(map);; entry = entry->vme_next) {
21152 end = (entry == vm_map_to_entry(map));
21153
21154 if (end) {
21155 free = entry->vme_end - prev;
21156 } else {
21157 free = entry->vme_start - prev;
21158 }
21159
21160 total_free += free;
21161 if (free > largest_free) {
21162 largest_free = free;
21163 }
21164
21165 if (end) {
21166 break;
21167 }
21168 prev = entry->vme_end;
21169 }
21170 vm_map_unlock_read(map);
21171 if (pfree) {
21172 *pfree = total_free;
21173 }
21174 if (plargest_free) {
21175 *plargest_free = largest_free;
21176 }
21177 }
21178
21179 #if VM_SCAN_FOR_SHADOW_CHAIN
21180 int vm_map_shadow_max(vm_map_t map);
21181 int
vm_map_shadow_max(vm_map_t map)21182 vm_map_shadow_max(
21183 vm_map_t map)
21184 {
21185 int shadows, shadows_max;
21186 vm_map_entry_t entry;
21187 vm_object_t object, next_object;
21188
21189 if (map == NULL) {
21190 return 0;
21191 }
21192
21193 shadows_max = 0;
21194
21195 vm_map_lock_read(map);
21196
21197 for (entry = vm_map_first_entry(map);
21198 entry != vm_map_to_entry(map);
21199 entry = entry->vme_next) {
21200 if (entry->is_sub_map) {
21201 continue;
21202 }
21203 object = VME_OBJECT(entry);
21204 if (object == NULL) {
21205 continue;
21206 }
21207 vm_object_lock_shared(object);
21208 for (shadows = 0;
21209 object->shadow != NULL;
21210 shadows++, object = next_object) {
21211 next_object = object->shadow;
21212 vm_object_lock_shared(next_object);
21213 vm_object_unlock(object);
21214 }
21215 vm_object_unlock(object);
21216 if (shadows > shadows_max) {
21217 shadows_max = shadows;
21218 }
21219 }
21220
21221 vm_map_unlock_read(map);
21222
21223 return shadows_max;
21224 }
21225 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
21226
21227 void
vm_commit_pagezero_status(vm_map_t lmap)21228 vm_commit_pagezero_status(vm_map_t lmap)
21229 {
21230 pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
21231 }
21232
21233 #if XNU_TARGET_OS_OSX
21234 void
vm_map_set_high_start(vm_map_t map,vm_map_offset_t high_start)21235 vm_map_set_high_start(
21236 vm_map_t map,
21237 vm_map_offset_t high_start)
21238 {
21239 map->vmmap_high_start = high_start;
21240 }
21241 #endif /* XNU_TARGET_OS_OSX */
21242
21243
21244 /*
21245 * FORKED CORPSE FOOTPRINT
21246 *
21247 * A forked corpse gets a copy of the original VM map but its pmap is mostly
21248 * empty since it never ran and never got to fault in any pages.
21249 * Collecting footprint info (via "sysctl vm.self_region_footprint") for
21250 * a forked corpse would therefore return very little information.
21251 *
21252 * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
21253 * to vm_map_fork() to collect footprint information from the original VM map
21254 * and its pmap, and store it in the forked corpse's VM map. That information
21255 * is stored in place of the VM map's "hole list" since we'll never need to
21256 * lookup for holes in the corpse's map.
21257 *
21258 * The corpse's footprint info looks like this:
21259 *
21260 * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
21261 * as follows:
21262 * +---------------------------------------+
21263 * header-> | cf_size |
21264 * +-------------------+-------------------+
21265 * | cf_last_region | cf_last_zeroes |
21266 * +-------------------+-------------------+
21267 * region1-> | cfr_vaddr |
21268 * +-------------------+-------------------+
21269 * | cfr_num_pages | d0 | d1 | d2 | d3 |
21270 * +---------------------------------------+
21271 * | d4 | d5 | ... |
21272 * +---------------------------------------+
21273 * | ... |
21274 * +-------------------+-------------------+
21275 * | dy | dz | na | na | cfr_vaddr... | <-region2
21276 * +-------------------+-------------------+
21277 * | cfr_vaddr (ctd) | cfr_num_pages |
21278 * +---------------------------------------+
21279 * | d0 | d1 ... |
21280 * +---------------------------------------+
21281 * ...
21282 * +---------------------------------------+
21283 * last region-> | cfr_vaddr |
21284 * +---------------------------------------+
21285 * + cfr_num_pages | d0 | d1 | d2 | d3 |
21286 * +---------------------------------------+
21287 * ...
21288 * +---------------------------------------+
21289 * | dx | dy | dz | na | na | na | na | na |
21290 * +---------------------------------------+
21291 *
21292 * where:
21293 * cf_size: total size of the buffer (rounded to page size)
21294 * cf_last_region: offset in the buffer of the last "region" sub-header
21295 * cf_last_zeroes: number of trailing "zero" dispositions at the end
21296 * of last region
21297 * cfr_vaddr: virtual address of the start of the covered "region"
21298 * cfr_num_pages: number of pages in the covered "region"
21299 * d*: disposition of the page at that virtual address
21300 * Regions in the buffer are word-aligned.
21301 *
21302 * We estimate the size of the buffer based on the number of memory regions
21303 * and the virtual size of the address space. While copying each memory region
21304 * during vm_map_fork(), we also collect the footprint info for that region
21305 * and store it in the buffer, packing it as much as possible (coalescing
21306 * contiguous memory regions to avoid having too many region headers and
21307 * avoiding long streaks of "zero" page dispositions by splitting footprint
21308 * "regions", so the number of regions in the footprint buffer might not match
21309 * the number of memory regions in the address space.
21310 *
21311 * We also have to copy the original task's "nonvolatile" ledgers since that's
21312 * part of the footprint and will need to be reported to any tool asking for
21313 * the footprint information of the forked corpse.
21314 */
21315
21316 uint64_t vm_map_corpse_footprint_count = 0;
21317 uint64_t vm_map_corpse_footprint_size_avg = 0;
21318 uint64_t vm_map_corpse_footprint_size_max = 0;
21319 uint64_t vm_map_corpse_footprint_full = 0;
21320 uint64_t vm_map_corpse_footprint_no_buf = 0;
21321
21322 struct vm_map_corpse_footprint_header {
21323 vm_size_t cf_size; /* allocated buffer size */
21324 uint32_t cf_last_region; /* offset of last region in buffer */
21325 union {
21326 uint32_t cfu_last_zeroes; /* during creation:
21327 * number of "zero" dispositions at
21328 * end of last region */
21329 uint32_t cfu_hint_region; /* during lookup:
21330 * offset of last looked up region */
21331 #define cf_last_zeroes cfu.cfu_last_zeroes
21332 #define cf_hint_region cfu.cfu_hint_region
21333 } cfu;
21334 };
21335 typedef uint8_t cf_disp_t;
21336 struct vm_map_corpse_footprint_region {
21337 vm_map_offset_t cfr_vaddr; /* region start virtual address */
21338 uint32_t cfr_num_pages; /* number of pages in this "region" */
21339 cf_disp_t cfr_disposition[0]; /* disposition of each page */
21340 } __attribute__((packed));
21341
21342 static cf_disp_t
vm_page_disposition_to_cf_disp(int disposition)21343 vm_page_disposition_to_cf_disp(
21344 int disposition)
21345 {
21346 assert(sizeof(cf_disp_t) == 1);
21347 /* relocate bits that don't fit in a "uint8_t" */
21348 if (disposition & VM_PAGE_QUERY_PAGE_REUSABLE) {
21349 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
21350 }
21351 /* cast gets rid of extra bits */
21352 return (cf_disp_t) disposition;
21353 }
21354
21355 static int
vm_page_cf_disp_to_disposition(cf_disp_t cf_disp)21356 vm_page_cf_disp_to_disposition(
21357 cf_disp_t cf_disp)
21358 {
21359 int disposition;
21360
21361 assert(sizeof(cf_disp_t) == 1);
21362 disposition = (int) cf_disp;
21363 /* move relocated bits back in place */
21364 if (cf_disp & VM_PAGE_QUERY_PAGE_FICTITIOUS) {
21365 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
21366 disposition &= ~VM_PAGE_QUERY_PAGE_FICTITIOUS;
21367 }
21368 return disposition;
21369 }
21370
21371 /*
21372 * vm_map_corpse_footprint_new_region:
21373 * closes the current footprint "region" and creates a new one
21374 *
21375 * Returns NULL if there's not enough space in the buffer for a new region.
21376 */
21377 static struct vm_map_corpse_footprint_region *
vm_map_corpse_footprint_new_region(struct vm_map_corpse_footprint_header * footprint_header)21378 vm_map_corpse_footprint_new_region(
21379 struct vm_map_corpse_footprint_header *footprint_header)
21380 {
21381 uintptr_t footprint_edge;
21382 uint32_t new_region_offset;
21383 struct vm_map_corpse_footprint_region *footprint_region;
21384 struct vm_map_corpse_footprint_region *new_footprint_region;
21385
21386 footprint_edge = ((uintptr_t)footprint_header +
21387 footprint_header->cf_size);
21388 footprint_region = ((struct vm_map_corpse_footprint_region *)
21389 ((char *)footprint_header +
21390 footprint_header->cf_last_region));
21391 assert((uintptr_t)footprint_region + sizeof(*footprint_region) <=
21392 footprint_edge);
21393
21394 /* get rid of trailing zeroes in the last region */
21395 assert(footprint_region->cfr_num_pages >=
21396 footprint_header->cf_last_zeroes);
21397 footprint_region->cfr_num_pages -=
21398 footprint_header->cf_last_zeroes;
21399 footprint_header->cf_last_zeroes = 0;
21400
21401 /* reuse this region if it's now empty */
21402 if (footprint_region->cfr_num_pages == 0) {
21403 return footprint_region;
21404 }
21405
21406 /* compute offset of new region */
21407 new_region_offset = footprint_header->cf_last_region;
21408 new_region_offset += sizeof(*footprint_region);
21409 new_region_offset += (footprint_region->cfr_num_pages * sizeof(cf_disp_t));
21410 new_region_offset = roundup(new_region_offset, sizeof(int));
21411
21412 /* check if we're going over the edge */
21413 if (((uintptr_t)footprint_header +
21414 new_region_offset +
21415 sizeof(*footprint_region)) >=
21416 footprint_edge) {
21417 /* over the edge: no new region */
21418 return NULL;
21419 }
21420
21421 /* adjust offset of last region in header */
21422 footprint_header->cf_last_region = new_region_offset;
21423
21424 new_footprint_region = (struct vm_map_corpse_footprint_region *)
21425 ((char *)footprint_header +
21426 footprint_header->cf_last_region);
21427 new_footprint_region->cfr_vaddr = 0;
21428 new_footprint_region->cfr_num_pages = 0;
21429 /* caller needs to initialize new region */
21430
21431 return new_footprint_region;
21432 }
21433
21434 /*
21435 * vm_map_corpse_footprint_collect:
21436 * collect footprint information for "old_entry" in "old_map" and
21437 * stores it in "new_map"'s vmmap_footprint_info.
21438 */
21439 kern_return_t
vm_map_corpse_footprint_collect(vm_map_t old_map,vm_map_entry_t old_entry,vm_map_t new_map)21440 vm_map_corpse_footprint_collect(
21441 vm_map_t old_map,
21442 vm_map_entry_t old_entry,
21443 vm_map_t new_map)
21444 {
21445 vm_map_offset_t va;
21446 kern_return_t kr;
21447 struct vm_map_corpse_footprint_header *footprint_header;
21448 struct vm_map_corpse_footprint_region *footprint_region;
21449 struct vm_map_corpse_footprint_region *new_footprint_region;
21450 cf_disp_t *next_disp_p;
21451 uintptr_t footprint_edge;
21452 uint32_t num_pages_tmp;
21453 int effective_page_size;
21454
21455 effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(old_map));
21456
21457 va = old_entry->vme_start;
21458
21459 vm_map_lock_assert_exclusive(old_map);
21460 vm_map_lock_assert_exclusive(new_map);
21461
21462 assert(new_map->has_corpse_footprint);
21463 assert(!old_map->has_corpse_footprint);
21464 if (!new_map->has_corpse_footprint ||
21465 old_map->has_corpse_footprint) {
21466 /*
21467 * This can only transfer footprint info from a
21468 * map with a live pmap to a map with a corpse footprint.
21469 */
21470 return KERN_NOT_SUPPORTED;
21471 }
21472
21473 if (new_map->vmmap_corpse_footprint == NULL) {
21474 vm_offset_t buf;
21475 vm_size_t buf_size;
21476
21477 buf = 0;
21478 buf_size = (sizeof(*footprint_header) +
21479 (old_map->hdr.nentries
21480 *
21481 (sizeof(*footprint_region) +
21482 +3)) /* potential alignment for each region */
21483 +
21484 ((old_map->size / effective_page_size)
21485 *
21486 sizeof(cf_disp_t))); /* disposition for each page */
21487 // printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
21488 buf_size = round_page(buf_size);
21489
21490 /* limit buffer to 1 page to validate overflow detection */
21491 // buf_size = PAGE_SIZE;
21492
21493 /* limit size to a somewhat sane amount */
21494 #if XNU_TARGET_OS_OSX
21495 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (8*1024*1024) /* 8MB */
21496 #else /* XNU_TARGET_OS_OSX */
21497 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (256*1024) /* 256KB */
21498 #endif /* XNU_TARGET_OS_OSX */
21499 if (buf_size > VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE) {
21500 buf_size = VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE;
21501 }
21502
21503 /*
21504 * Allocate the pageable buffer (with a trailing guard page).
21505 * It will be zero-filled on demand.
21506 */
21507 kr = kernel_memory_allocate(kernel_map,
21508 &buf,
21509 (buf_size
21510 + PAGE_SIZE), /* trailing guard page */
21511 0, /* mask */
21512 KMA_PAGEABLE | KMA_GUARD_LAST,
21513 VM_KERN_MEMORY_DIAG);
21514 if (kr != KERN_SUCCESS) {
21515 vm_map_corpse_footprint_no_buf++;
21516 return kr;
21517 }
21518
21519 /* initialize header and 1st region */
21520 footprint_header = (struct vm_map_corpse_footprint_header *)buf;
21521 new_map->vmmap_corpse_footprint = footprint_header;
21522
21523 footprint_header->cf_size = buf_size;
21524 footprint_header->cf_last_region =
21525 sizeof(*footprint_header);
21526 footprint_header->cf_last_zeroes = 0;
21527
21528 footprint_region = (struct vm_map_corpse_footprint_region *)
21529 ((char *)footprint_header +
21530 footprint_header->cf_last_region);
21531 footprint_region->cfr_vaddr = 0;
21532 footprint_region->cfr_num_pages = 0;
21533 } else {
21534 /* retrieve header and last region */
21535 footprint_header = (struct vm_map_corpse_footprint_header *)
21536 new_map->vmmap_corpse_footprint;
21537 footprint_region = (struct vm_map_corpse_footprint_region *)
21538 ((char *)footprint_header +
21539 footprint_header->cf_last_region);
21540 }
21541 footprint_edge = ((uintptr_t)footprint_header +
21542 footprint_header->cf_size);
21543
21544 if ((footprint_region->cfr_vaddr +
21545 (((vm_map_offset_t)footprint_region->cfr_num_pages) *
21546 effective_page_size))
21547 != old_entry->vme_start) {
21548 uint64_t num_pages_delta, num_pages_delta_size;
21549 uint32_t region_offset_delta_size;
21550
21551 /*
21552 * Not the next contiguous virtual address:
21553 * start a new region or store "zero" dispositions for
21554 * the missing pages?
21555 */
21556 /* size of gap in actual page dispositions */
21557 num_pages_delta = ((old_entry->vme_start -
21558 footprint_region->cfr_vaddr) / effective_page_size)
21559 - footprint_region->cfr_num_pages;
21560 num_pages_delta_size = num_pages_delta * sizeof(cf_disp_t);
21561 /* size of gap as a new footprint region header */
21562 region_offset_delta_size =
21563 (sizeof(*footprint_region) +
21564 roundup(((footprint_region->cfr_num_pages -
21565 footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)),
21566 sizeof(int)) -
21567 ((footprint_region->cfr_num_pages -
21568 footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)));
21569 // printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
21570 if (region_offset_delta_size < num_pages_delta_size ||
21571 os_add3_overflow(footprint_region->cfr_num_pages,
21572 (uint32_t) num_pages_delta,
21573 1,
21574 &num_pages_tmp)) {
21575 /*
21576 * Storing data for this gap would take more space
21577 * than inserting a new footprint region header:
21578 * let's start a new region and save space. If it's a
21579 * tie, let's avoid using a new region, since that
21580 * would require more region hops to find the right
21581 * range during lookups.
21582 *
21583 * If the current region's cfr_num_pages would overflow
21584 * if we added "zero" page dispositions for the gap,
21585 * no choice but to start a new region.
21586 */
21587 // printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
21588 new_footprint_region =
21589 vm_map_corpse_footprint_new_region(footprint_header);
21590 /* check that we're not going over the edge */
21591 if (new_footprint_region == NULL) {
21592 goto over_the_edge;
21593 }
21594 footprint_region = new_footprint_region;
21595 /* initialize new region as empty */
21596 footprint_region->cfr_vaddr = old_entry->vme_start;
21597 footprint_region->cfr_num_pages = 0;
21598 } else {
21599 /*
21600 * Store "zero" page dispositions for the missing
21601 * pages.
21602 */
21603 // printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
21604 for (; num_pages_delta > 0; num_pages_delta--) {
21605 next_disp_p = (cf_disp_t *)
21606 ((uintptr_t) footprint_region +
21607 sizeof(*footprint_region));
21608 next_disp_p += footprint_region->cfr_num_pages;
21609 /* check that we're not going over the edge */
21610 if ((uintptr_t)next_disp_p >= footprint_edge) {
21611 goto over_the_edge;
21612 }
21613 /* store "zero" disposition for this gap page */
21614 footprint_region->cfr_num_pages++;
21615 *next_disp_p = (cf_disp_t) 0;
21616 footprint_header->cf_last_zeroes++;
21617 }
21618 }
21619 }
21620
21621 for (va = old_entry->vme_start;
21622 va < old_entry->vme_end;
21623 va += effective_page_size) {
21624 int disposition;
21625 cf_disp_t cf_disp;
21626
21627 vm_map_footprint_query_page_info(old_map,
21628 old_entry,
21629 va,
21630 &disposition);
21631 cf_disp = vm_page_disposition_to_cf_disp(disposition);
21632
21633 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
21634
21635 if (cf_disp == 0 && footprint_region->cfr_num_pages == 0) {
21636 /*
21637 * Ignore "zero" dispositions at start of
21638 * region: just move start of region.
21639 */
21640 footprint_region->cfr_vaddr += effective_page_size;
21641 continue;
21642 }
21643
21644 /* would region's cfr_num_pages overflow? */
21645 if (os_add_overflow(footprint_region->cfr_num_pages, 1,
21646 &num_pages_tmp)) {
21647 /* overflow: create a new region */
21648 new_footprint_region =
21649 vm_map_corpse_footprint_new_region(
21650 footprint_header);
21651 if (new_footprint_region == NULL) {
21652 goto over_the_edge;
21653 }
21654 footprint_region = new_footprint_region;
21655 footprint_region->cfr_vaddr = va;
21656 footprint_region->cfr_num_pages = 0;
21657 }
21658
21659 next_disp_p = (cf_disp_t *) ((uintptr_t) footprint_region +
21660 sizeof(*footprint_region));
21661 next_disp_p += footprint_region->cfr_num_pages;
21662 /* check that we're not going over the edge */
21663 if ((uintptr_t)next_disp_p >= footprint_edge) {
21664 goto over_the_edge;
21665 }
21666 /* store this dispostion */
21667 *next_disp_p = cf_disp;
21668 footprint_region->cfr_num_pages++;
21669
21670 if (cf_disp != 0) {
21671 /* non-zero disp: break the current zero streak */
21672 footprint_header->cf_last_zeroes = 0;
21673 /* done */
21674 continue;
21675 }
21676
21677 /* zero disp: add to the current streak of zeroes */
21678 footprint_header->cf_last_zeroes++;
21679 if ((footprint_header->cf_last_zeroes +
21680 roundup(((footprint_region->cfr_num_pages -
21681 footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)) &
21682 (sizeof(int) - 1),
21683 sizeof(int))) <
21684 (sizeof(*footprint_header))) {
21685 /*
21686 * There are not enough trailing "zero" dispositions
21687 * (+ the extra padding we would need for the previous
21688 * region); creating a new region would not save space
21689 * at this point, so let's keep this "zero" disposition
21690 * in this region and reconsider later.
21691 */
21692 continue;
21693 }
21694 /*
21695 * Create a new region to avoid having too many consecutive
21696 * "zero" dispositions.
21697 */
21698 new_footprint_region =
21699 vm_map_corpse_footprint_new_region(footprint_header);
21700 if (new_footprint_region == NULL) {
21701 goto over_the_edge;
21702 }
21703 footprint_region = new_footprint_region;
21704 /* initialize the new region as empty ... */
21705 footprint_region->cfr_num_pages = 0;
21706 /* ... and skip this "zero" disp */
21707 footprint_region->cfr_vaddr = va + effective_page_size;
21708 }
21709
21710 return KERN_SUCCESS;
21711
21712 over_the_edge:
21713 // printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
21714 vm_map_corpse_footprint_full++;
21715 return KERN_RESOURCE_SHORTAGE;
21716 }
21717
21718 /*
21719 * vm_map_corpse_footprint_collect_done:
21720 * completes the footprint collection by getting rid of any remaining
21721 * trailing "zero" dispositions and trimming the unused part of the
21722 * kernel buffer
21723 */
21724 void
vm_map_corpse_footprint_collect_done(vm_map_t new_map)21725 vm_map_corpse_footprint_collect_done(
21726 vm_map_t new_map)
21727 {
21728 struct vm_map_corpse_footprint_header *footprint_header;
21729 struct vm_map_corpse_footprint_region *footprint_region;
21730 vm_size_t buf_size, actual_size;
21731 kern_return_t kr;
21732
21733 assert(new_map->has_corpse_footprint);
21734 if (!new_map->has_corpse_footprint ||
21735 new_map->vmmap_corpse_footprint == NULL) {
21736 return;
21737 }
21738
21739 footprint_header = (struct vm_map_corpse_footprint_header *)
21740 new_map->vmmap_corpse_footprint;
21741 buf_size = footprint_header->cf_size;
21742
21743 footprint_region = (struct vm_map_corpse_footprint_region *)
21744 ((char *)footprint_header +
21745 footprint_header->cf_last_region);
21746
21747 /* get rid of trailing zeroes in last region */
21748 assert(footprint_region->cfr_num_pages >= footprint_header->cf_last_zeroes);
21749 footprint_region->cfr_num_pages -= footprint_header->cf_last_zeroes;
21750 footprint_header->cf_last_zeroes = 0;
21751
21752 actual_size = (vm_size_t)(footprint_header->cf_last_region +
21753 sizeof(*footprint_region) +
21754 (footprint_region->cfr_num_pages * sizeof(cf_disp_t)));
21755
21756 // printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
21757 vm_map_corpse_footprint_size_avg =
21758 (((vm_map_corpse_footprint_size_avg *
21759 vm_map_corpse_footprint_count) +
21760 actual_size) /
21761 (vm_map_corpse_footprint_count + 1));
21762 vm_map_corpse_footprint_count++;
21763 if (actual_size > vm_map_corpse_footprint_size_max) {
21764 vm_map_corpse_footprint_size_max = actual_size;
21765 }
21766
21767 actual_size = round_page(actual_size);
21768 if (buf_size > actual_size) {
21769 kr = vm_deallocate(kernel_map,
21770 ((vm_address_t)footprint_header +
21771 actual_size +
21772 PAGE_SIZE), /* trailing guard page */
21773 (buf_size - actual_size));
21774 assertf(kr == KERN_SUCCESS,
21775 "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21776 footprint_header,
21777 (uint64_t) buf_size,
21778 (uint64_t) actual_size,
21779 kr);
21780 kr = vm_protect(kernel_map,
21781 ((vm_address_t)footprint_header +
21782 actual_size),
21783 PAGE_SIZE,
21784 FALSE, /* set_maximum */
21785 VM_PROT_NONE);
21786 assertf(kr == KERN_SUCCESS,
21787 "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21788 footprint_header,
21789 (uint64_t) buf_size,
21790 (uint64_t) actual_size,
21791 kr);
21792 }
21793
21794 footprint_header->cf_size = actual_size;
21795 }
21796
21797 /*
21798 * vm_map_corpse_footprint_query_page_info:
21799 * retrieves the disposition of the page at virtual address "vaddr"
21800 * in the forked corpse's VM map
21801 *
21802 * This is the equivalent of vm_map_footprint_query_page_info() for a forked corpse.
21803 */
21804 kern_return_t
vm_map_corpse_footprint_query_page_info(vm_map_t map,vm_map_offset_t va,int * disposition_p)21805 vm_map_corpse_footprint_query_page_info(
21806 vm_map_t map,
21807 vm_map_offset_t va,
21808 int *disposition_p)
21809 {
21810 struct vm_map_corpse_footprint_header *footprint_header;
21811 struct vm_map_corpse_footprint_region *footprint_region;
21812 uint32_t footprint_region_offset;
21813 vm_map_offset_t region_start, region_end;
21814 int disp_idx;
21815 kern_return_t kr;
21816 int effective_page_size;
21817 cf_disp_t cf_disp;
21818
21819 if (!map->has_corpse_footprint) {
21820 *disposition_p = 0;
21821 kr = KERN_INVALID_ARGUMENT;
21822 goto done;
21823 }
21824
21825 footprint_header = map->vmmap_corpse_footprint;
21826 if (footprint_header == NULL) {
21827 *disposition_p = 0;
21828 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21829 kr = KERN_INVALID_ARGUMENT;
21830 goto done;
21831 }
21832
21833 /* start looking at the hint ("cf_hint_region") */
21834 footprint_region_offset = footprint_header->cf_hint_region;
21835
21836 effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(map));
21837
21838 lookup_again:
21839 if (footprint_region_offset < sizeof(*footprint_header)) {
21840 /* hint too low: start from 1st region */
21841 footprint_region_offset = sizeof(*footprint_header);
21842 }
21843 if (footprint_region_offset >= footprint_header->cf_last_region) {
21844 /* hint too high: re-start from 1st region */
21845 footprint_region_offset = sizeof(*footprint_header);
21846 }
21847 footprint_region = (struct vm_map_corpse_footprint_region *)
21848 ((char *)footprint_header + footprint_region_offset);
21849 region_start = footprint_region->cfr_vaddr;
21850 region_end = (region_start +
21851 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
21852 effective_page_size));
21853 if (va < region_start &&
21854 footprint_region_offset != sizeof(*footprint_header)) {
21855 /* our range starts before the hint region */
21856
21857 /* reset the hint (in a racy way...) */
21858 footprint_header->cf_hint_region = sizeof(*footprint_header);
21859 /* lookup "va" again from 1st region */
21860 footprint_region_offset = sizeof(*footprint_header);
21861 goto lookup_again;
21862 }
21863
21864 while (va >= region_end) {
21865 if (footprint_region_offset >= footprint_header->cf_last_region) {
21866 break;
21867 }
21868 /* skip the region's header */
21869 footprint_region_offset += sizeof(*footprint_region);
21870 /* skip the region's page dispositions */
21871 footprint_region_offset += (footprint_region->cfr_num_pages * sizeof(cf_disp_t));
21872 /* align to next word boundary */
21873 footprint_region_offset =
21874 roundup(footprint_region_offset,
21875 sizeof(int));
21876 footprint_region = (struct vm_map_corpse_footprint_region *)
21877 ((char *)footprint_header + footprint_region_offset);
21878 region_start = footprint_region->cfr_vaddr;
21879 region_end = (region_start +
21880 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
21881 effective_page_size));
21882 }
21883 if (va < region_start || va >= region_end) {
21884 /* page not found */
21885 *disposition_p = 0;
21886 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21887 kr = KERN_SUCCESS;
21888 goto done;
21889 }
21890
21891 /* "va" found: set the lookup hint for next lookup (in a racy way...) */
21892 footprint_header->cf_hint_region = footprint_region_offset;
21893
21894 /* get page disposition for "va" in this region */
21895 disp_idx = (int) ((va - footprint_region->cfr_vaddr) / effective_page_size);
21896 cf_disp = footprint_region->cfr_disposition[disp_idx];
21897 *disposition_p = vm_page_cf_disp_to_disposition(cf_disp);
21898 kr = KERN_SUCCESS;
21899 done:
21900 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21901 /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
21902 DTRACE_VM4(footprint_query_page_info,
21903 vm_map_t, map,
21904 vm_map_offset_t, va,
21905 int, *disposition_p,
21906 kern_return_t, kr);
21907
21908 return kr;
21909 }
21910
21911 void
vm_map_corpse_footprint_destroy(vm_map_t map)21912 vm_map_corpse_footprint_destroy(
21913 vm_map_t map)
21914 {
21915 if (map->has_corpse_footprint &&
21916 map->vmmap_corpse_footprint != 0) {
21917 struct vm_map_corpse_footprint_header *footprint_header;
21918 vm_size_t buf_size;
21919 kern_return_t kr;
21920
21921 footprint_header = map->vmmap_corpse_footprint;
21922 buf_size = footprint_header->cf_size;
21923 kr = vm_deallocate(kernel_map,
21924 (vm_offset_t) map->vmmap_corpse_footprint,
21925 ((vm_size_t) buf_size
21926 + PAGE_SIZE)); /* trailing guard page */
21927 assertf(kr == KERN_SUCCESS, "kr=0x%x\n", kr);
21928 map->vmmap_corpse_footprint = 0;
21929 map->has_corpse_footprint = FALSE;
21930 }
21931 }
21932
21933 /*
21934 * vm_map_copy_footprint_ledgers:
21935 * copies any ledger that's relevant to the memory footprint of "old_task"
21936 * into the forked corpse's task ("new_task")
21937 */
21938 void
vm_map_copy_footprint_ledgers(task_t old_task,task_t new_task)21939 vm_map_copy_footprint_ledgers(
21940 task_t old_task,
21941 task_t new_task)
21942 {
21943 vm_map_copy_ledger(old_task, new_task, task_ledgers.phys_footprint);
21944 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile);
21945 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile_compressed);
21946 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal);
21947 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal_compressed);
21948 vm_map_copy_ledger(old_task, new_task, task_ledgers.iokit_mapped);
21949 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting);
21950 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting_compressed);
21951 vm_map_copy_ledger(old_task, new_task, task_ledgers.page_table);
21952 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint);
21953 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint_compressed);
21954 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile);
21955 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile_compressed);
21956 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint);
21957 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint_compressed);
21958 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint);
21959 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint_compressed);
21960 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint);
21961 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint_compressed);
21962 vm_map_copy_ledger(old_task, new_task, task_ledgers.wired_mem);
21963 }
21964
21965 /*
21966 * vm_map_copy_ledger:
21967 * copy a single ledger from "old_task" to "new_task"
21968 */
21969 void
vm_map_copy_ledger(task_t old_task,task_t new_task,int ledger_entry)21970 vm_map_copy_ledger(
21971 task_t old_task,
21972 task_t new_task,
21973 int ledger_entry)
21974 {
21975 ledger_amount_t old_balance, new_balance, delta;
21976
21977 assert(new_task->map->has_corpse_footprint);
21978 if (!new_task->map->has_corpse_footprint) {
21979 return;
21980 }
21981
21982 /* turn off sanity checks for the ledger we're about to mess with */
21983 ledger_disable_panic_on_negative(new_task->ledger,
21984 ledger_entry);
21985
21986 /* adjust "new_task" to match "old_task" */
21987 ledger_get_balance(old_task->ledger,
21988 ledger_entry,
21989 &old_balance);
21990 ledger_get_balance(new_task->ledger,
21991 ledger_entry,
21992 &new_balance);
21993 if (new_balance == old_balance) {
21994 /* new == old: done */
21995 } else if (new_balance > old_balance) {
21996 /* new > old ==> new -= new - old */
21997 delta = new_balance - old_balance;
21998 ledger_debit(new_task->ledger,
21999 ledger_entry,
22000 delta);
22001 } else {
22002 /* new < old ==> new += old - new */
22003 delta = old_balance - new_balance;
22004 ledger_credit(new_task->ledger,
22005 ledger_entry,
22006 delta);
22007 }
22008 }
22009
22010 /*
22011 * vm_map_get_pmap:
22012 * returns the pmap associated with the vm_map
22013 */
22014 pmap_t
vm_map_get_pmap(vm_map_t map)22015 vm_map_get_pmap(vm_map_t map)
22016 {
22017 return vm_map_pmap(map);
22018 }
22019
22020 #if MACH_ASSERT
22021
22022 extern int pmap_ledgers_panic;
22023 extern int pmap_ledgers_panic_leeway;
22024
22025 #define LEDGER_DRIFT(__LEDGER) \
22026 int __LEDGER##_over; \
22027 ledger_amount_t __LEDGER##_over_total; \
22028 ledger_amount_t __LEDGER##_over_max; \
22029 int __LEDGER##_under; \
22030 ledger_amount_t __LEDGER##_under_total; \
22031 ledger_amount_t __LEDGER##_under_max
22032
22033 struct {
22034 uint64_t num_pmaps_checked;
22035
22036 LEDGER_DRIFT(phys_footprint);
22037 LEDGER_DRIFT(internal);
22038 LEDGER_DRIFT(internal_compressed);
22039 LEDGER_DRIFT(external);
22040 LEDGER_DRIFT(reusable);
22041 LEDGER_DRIFT(iokit_mapped);
22042 LEDGER_DRIFT(alternate_accounting);
22043 LEDGER_DRIFT(alternate_accounting_compressed);
22044 LEDGER_DRIFT(page_table);
22045 LEDGER_DRIFT(purgeable_volatile);
22046 LEDGER_DRIFT(purgeable_nonvolatile);
22047 LEDGER_DRIFT(purgeable_volatile_compressed);
22048 LEDGER_DRIFT(purgeable_nonvolatile_compressed);
22049 LEDGER_DRIFT(tagged_nofootprint);
22050 LEDGER_DRIFT(tagged_footprint);
22051 LEDGER_DRIFT(tagged_nofootprint_compressed);
22052 LEDGER_DRIFT(tagged_footprint_compressed);
22053 LEDGER_DRIFT(network_volatile);
22054 LEDGER_DRIFT(network_nonvolatile);
22055 LEDGER_DRIFT(network_volatile_compressed);
22056 LEDGER_DRIFT(network_nonvolatile_compressed);
22057 LEDGER_DRIFT(media_nofootprint);
22058 LEDGER_DRIFT(media_footprint);
22059 LEDGER_DRIFT(media_nofootprint_compressed);
22060 LEDGER_DRIFT(media_footprint_compressed);
22061 LEDGER_DRIFT(graphics_nofootprint);
22062 LEDGER_DRIFT(graphics_footprint);
22063 LEDGER_DRIFT(graphics_nofootprint_compressed);
22064 LEDGER_DRIFT(graphics_footprint_compressed);
22065 LEDGER_DRIFT(neural_nofootprint);
22066 LEDGER_DRIFT(neural_footprint);
22067 LEDGER_DRIFT(neural_nofootprint_compressed);
22068 LEDGER_DRIFT(neural_footprint_compressed);
22069 } pmap_ledgers_drift;
22070
22071 void
vm_map_pmap_check_ledgers(pmap_t pmap,ledger_t ledger,int pid,char * procname)22072 vm_map_pmap_check_ledgers(
22073 pmap_t pmap,
22074 ledger_t ledger,
22075 int pid,
22076 char *procname)
22077 {
22078 ledger_amount_t bal;
22079 boolean_t do_panic;
22080
22081 do_panic = FALSE;
22082
22083 pmap_ledgers_drift.num_pmaps_checked++;
22084
22085 #define LEDGER_CHECK_BALANCE(__LEDGER) \
22086 MACRO_BEGIN \
22087 int panic_on_negative = TRUE; \
22088 ledger_get_balance(ledger, \
22089 task_ledgers.__LEDGER, \
22090 &bal); \
22091 ledger_get_panic_on_negative(ledger, \
22092 task_ledgers.__LEDGER, \
22093 &panic_on_negative); \
22094 if (bal != 0) { \
22095 if (panic_on_negative || \
22096 (pmap_ledgers_panic && \
22097 pmap_ledgers_panic_leeway > 0 && \
22098 (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) || \
22099 bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
22100 do_panic = TRUE; \
22101 } \
22102 printf("LEDGER BALANCE proc %d (%s) " \
22103 "\"%s\" = %lld\n", \
22104 pid, procname, #__LEDGER, bal); \
22105 if (bal > 0) { \
22106 pmap_ledgers_drift.__LEDGER##_over++; \
22107 pmap_ledgers_drift.__LEDGER##_over_total += bal; \
22108 if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
22109 pmap_ledgers_drift.__LEDGER##_over_max = bal; \
22110 } \
22111 } else if (bal < 0) { \
22112 pmap_ledgers_drift.__LEDGER##_under++; \
22113 pmap_ledgers_drift.__LEDGER##_under_total += bal; \
22114 if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
22115 pmap_ledgers_drift.__LEDGER##_under_max = bal; \
22116 } \
22117 } \
22118 } \
22119 MACRO_END
22120
22121 LEDGER_CHECK_BALANCE(phys_footprint);
22122 LEDGER_CHECK_BALANCE(internal);
22123 LEDGER_CHECK_BALANCE(internal_compressed);
22124 LEDGER_CHECK_BALANCE(external);
22125 LEDGER_CHECK_BALANCE(reusable);
22126 LEDGER_CHECK_BALANCE(iokit_mapped);
22127 LEDGER_CHECK_BALANCE(alternate_accounting);
22128 LEDGER_CHECK_BALANCE(alternate_accounting_compressed);
22129 LEDGER_CHECK_BALANCE(page_table);
22130 LEDGER_CHECK_BALANCE(purgeable_volatile);
22131 LEDGER_CHECK_BALANCE(purgeable_nonvolatile);
22132 LEDGER_CHECK_BALANCE(purgeable_volatile_compressed);
22133 LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed);
22134 LEDGER_CHECK_BALANCE(tagged_nofootprint);
22135 LEDGER_CHECK_BALANCE(tagged_footprint);
22136 LEDGER_CHECK_BALANCE(tagged_nofootprint_compressed);
22137 LEDGER_CHECK_BALANCE(tagged_footprint_compressed);
22138 LEDGER_CHECK_BALANCE(network_volatile);
22139 LEDGER_CHECK_BALANCE(network_nonvolatile);
22140 LEDGER_CHECK_BALANCE(network_volatile_compressed);
22141 LEDGER_CHECK_BALANCE(network_nonvolatile_compressed);
22142 LEDGER_CHECK_BALANCE(media_nofootprint);
22143 LEDGER_CHECK_BALANCE(media_footprint);
22144 LEDGER_CHECK_BALANCE(media_nofootprint_compressed);
22145 LEDGER_CHECK_BALANCE(media_footprint_compressed);
22146 LEDGER_CHECK_BALANCE(graphics_nofootprint);
22147 LEDGER_CHECK_BALANCE(graphics_footprint);
22148 LEDGER_CHECK_BALANCE(graphics_nofootprint_compressed);
22149 LEDGER_CHECK_BALANCE(graphics_footprint_compressed);
22150 LEDGER_CHECK_BALANCE(neural_nofootprint);
22151 LEDGER_CHECK_BALANCE(neural_footprint);
22152 LEDGER_CHECK_BALANCE(neural_nofootprint_compressed);
22153 LEDGER_CHECK_BALANCE(neural_footprint_compressed);
22154
22155 if (do_panic) {
22156 if (pmap_ledgers_panic) {
22157 panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers",
22158 pmap, pid, procname);
22159 } else {
22160 printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
22161 pmap, pid, procname);
22162 }
22163 }
22164 }
22165 #endif /* MACH_ASSERT */
22166