xref: /xnu-8020.101.4/osfmk/vm/vm_map.c (revision e7776783b89a353188416a9a346c6cdb4928faad)
1 /*
2  * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 /*
59  *	File:	vm/vm_map.c
60  *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
61  *	Date:	1985
62  *
63  *	Virtual memory mapping module.
64  */
65 
66 #include <mach_assert.h>
67 
68 #include <vm/vm_options.h>
69 
70 #include <libkern/OSAtomic.h>
71 
72 #include <mach/kern_return.h>
73 #include <mach/port.h>
74 #include <mach/vm_attributes.h>
75 #include <mach/vm_param.h>
76 #include <mach/vm_behavior.h>
77 #include <mach/vm_statistics.h>
78 #include <mach/memory_object.h>
79 #include <mach/mach_vm.h>
80 #include <machine/cpu_capabilities.h>
81 #include <mach/sdt.h>
82 
83 #include <kern/assert.h>
84 #include <kern/backtrace.h>
85 #include <kern/counter.h>
86 #include <kern/exc_guard.h>
87 #include <kern/kalloc.h>
88 #include <kern/zalloc_internal.h>
89 
90 #include <vm/cpm.h>
91 #include <vm/vm_compressor.h>
92 #include <vm/vm_compressor_pager.h>
93 #include <vm/vm_init.h>
94 #include <vm/vm_fault.h>
95 #include <vm/vm_map.h>
96 #include <vm/vm_object.h>
97 #include <vm/vm_page.h>
98 #include <vm/vm_pageout.h>
99 #include <vm/pmap.h>
100 #include <vm/vm_kern.h>
101 #include <ipc/ipc_port.h>
102 #include <kern/sched_prim.h>
103 #include <kern/misc_protos.h>
104 
105 #include <mach/vm_map_server.h>
106 #include <mach/mach_host_server.h>
107 #include <vm/vm_protos.h>
108 #include <vm/vm_purgeable_internal.h>
109 
110 #include <vm/vm_protos.h>
111 #include <vm/vm_shared_region.h>
112 #include <vm/vm_map_store.h>
113 
114 #include <san/kasan.h>
115 
116 #include <sys/resource.h>
117 #include <sys/codesign.h>
118 #include <sys/mman.h>
119 #include <sys/reboot.h>
120 #include <sys/kdebug_triage.h>
121 
122 #if __LP64__
123 #define HAVE_VM_MAP_RESERVED_ENTRY_ZONE 0
124 #else
125 #define HAVE_VM_MAP_RESERVED_ENTRY_ZONE 1
126 #endif
127 
128 #include <libkern/section_keywords.h>
129 #if DEVELOPMENT || DEBUG
130 extern int proc_selfcsflags(void);
131 int panic_on_unsigned_execute = 0;
132 int panic_on_mlock_failure = 0;
133 #endif /* DEVELOPMENT || DEBUG */
134 
135 #if MACH_ASSERT
136 int debug4k_filter = 0;
137 char debug4k_proc_name[1024] = "";
138 int debug4k_proc_filter = (int)-1 & ~(1 << __DEBUG4K_FAULT);
139 int debug4k_panic_on_misaligned_sharing = 0;
140 const char *debug4k_category_name[] = {
141 	"error",        /* 0 */
142 	"life",         /* 1 */
143 	"load",         /* 2 */
144 	"fault",        /* 3 */
145 	"copy",         /* 4 */
146 	"share",        /* 5 */
147 	"adjust",       /* 6 */
148 	"pmap",         /* 7 */
149 	"mementry",     /* 8 */
150 	"iokit",        /* 9 */
151 	"upl",          /* 10 */
152 	"exc",          /* 11 */
153 	"vfs"           /* 12 */
154 };
155 #endif /* MACH_ASSERT */
156 int debug4k_no_cow_copyin = 0;
157 
158 
159 #if __arm64__
160 extern const int fourk_binary_compatibility_unsafe;
161 extern const int fourk_binary_compatibility_allow_wx;
162 #endif /* __arm64__ */
163 extern int proc_selfpid(void);
164 extern char *proc_name_address(void *p);
165 
166 #if VM_MAP_DEBUG_APPLE_PROTECT
167 int vm_map_debug_apple_protect = 0;
168 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
169 #if VM_MAP_DEBUG_FOURK
170 int vm_map_debug_fourk = 0;
171 #endif /* VM_MAP_DEBUG_FOURK */
172 
173 SECURITY_READ_ONLY_LATE(int) vm_map_executable_immutable = 1;
174 int vm_map_executable_immutable_verbose = 0;
175 
176 os_refgrp_decl(static, map_refgrp, "vm_map", NULL);
177 
178 extern u_int32_t random(void);  /* from <libkern/libkern.h> */
179 /* Internal prototypes
180  */
181 
182 static void vm_map_simplify_range(
183 	vm_map_t        map,
184 	vm_map_offset_t start,
185 	vm_map_offset_t end);   /* forward */
186 
187 static boolean_t        vm_map_range_check(
188 	vm_map_t        map,
189 	vm_map_offset_t start,
190 	vm_map_offset_t end,
191 	vm_map_entry_t  *entry);
192 
193 static vm_map_entry_t   _vm_map_entry_create(
194 	struct vm_map_header    *map_header, boolean_t map_locked);
195 
196 static void             _vm_map_entry_dispose(
197 	struct vm_map_header    *map_header,
198 	vm_map_entry_t          entry);
199 
200 static void             vm_map_pmap_enter(
201 	vm_map_t                map,
202 	vm_map_offset_t         addr,
203 	vm_map_offset_t         end_addr,
204 	vm_object_t             object,
205 	vm_object_offset_t      offset,
206 	vm_prot_t               protection);
207 
208 static void             _vm_map_clip_end(
209 	struct vm_map_header    *map_header,
210 	vm_map_entry_t          entry,
211 	vm_map_offset_t         end);
212 
213 static void             _vm_map_clip_start(
214 	struct vm_map_header    *map_header,
215 	vm_map_entry_t          entry,
216 	vm_map_offset_t         start);
217 
218 static void             vm_map_entry_delete(
219 	vm_map_t        map,
220 	vm_map_entry_t  entry);
221 
222 static kern_return_t    vm_map_delete(
223 	vm_map_t        map,
224 	vm_map_offset_t start,
225 	vm_map_offset_t end,
226 	int             flags,
227 	vm_map_t        zap_map);
228 
229 static void             vm_map_copy_insert(
230 	vm_map_t        map,
231 	vm_map_entry_t  after_where,
232 	vm_map_copy_t   copy);
233 
234 static kern_return_t    vm_map_copy_overwrite_unaligned(
235 	vm_map_t        dst_map,
236 	vm_map_entry_t  entry,
237 	vm_map_copy_t   copy,
238 	vm_map_address_t start,
239 	boolean_t       discard_on_success);
240 
241 static kern_return_t    vm_map_copy_overwrite_aligned(
242 	vm_map_t        dst_map,
243 	vm_map_entry_t  tmp_entry,
244 	vm_map_copy_t   copy,
245 	vm_map_offset_t start,
246 	pmap_t          pmap);
247 
248 static kern_return_t    vm_map_copyin_kernel_buffer(
249 	vm_map_t        src_map,
250 	vm_map_address_t src_addr,
251 	vm_map_size_t   len,
252 	boolean_t       src_destroy,
253 	vm_map_copy_t   *copy_result);  /* OUT */
254 
255 static kern_return_t    vm_map_copyout_kernel_buffer(
256 	vm_map_t        map,
257 	vm_map_address_t *addr, /* IN/OUT */
258 	vm_map_copy_t   copy,
259 	vm_map_size_t   copy_size,
260 	boolean_t       overwrite,
261 	boolean_t       consume_on_success);
262 
263 static void             vm_map_fork_share(
264 	vm_map_t        old_map,
265 	vm_map_entry_t  old_entry,
266 	vm_map_t        new_map);
267 
268 static boolean_t        vm_map_fork_copy(
269 	vm_map_t        old_map,
270 	vm_map_entry_t  *old_entry_p,
271 	vm_map_t        new_map,
272 	int             vm_map_copyin_flags);
273 
274 static kern_return_t    vm_map_wire_nested(
275 	vm_map_t                   map,
276 	vm_map_offset_t            start,
277 	vm_map_offset_t            end,
278 	vm_prot_t                  caller_prot,
279 	vm_tag_t                   tag,
280 	boolean_t                  user_wire,
281 	pmap_t                     map_pmap,
282 	vm_map_offset_t            pmap_addr,
283 	ppnum_t                    *physpage_p);
284 
285 static kern_return_t    vm_map_unwire_nested(
286 	vm_map_t                   map,
287 	vm_map_offset_t            start,
288 	vm_map_offset_t            end,
289 	boolean_t                  user_wire,
290 	pmap_t                     map_pmap,
291 	vm_map_offset_t            pmap_addr);
292 
293 static kern_return_t    vm_map_overwrite_submap_recurse(
294 	vm_map_t                   dst_map,
295 	vm_map_offset_t            dst_addr,
296 	vm_map_size_t              dst_size);
297 
298 static kern_return_t    vm_map_copy_overwrite_nested(
299 	vm_map_t                   dst_map,
300 	vm_map_offset_t            dst_addr,
301 	vm_map_copy_t              copy,
302 	boolean_t                  interruptible,
303 	pmap_t                     pmap,
304 	boolean_t                  discard_on_success);
305 
306 static kern_return_t    vm_map_remap_extract(
307 	vm_map_t                map,
308 	vm_map_offset_t         addr,
309 	vm_map_size_t           size,
310 	boolean_t               copy,
311 	struct vm_map_header    *map_header,
312 	vm_prot_t               *cur_protection,
313 	vm_prot_t               *max_protection,
314 	vm_inherit_t            inheritance,
315 	vm_map_kernel_flags_t   vmk_flags);
316 
317 static kern_return_t    vm_map_remap_range_allocate(
318 	vm_map_t                map,
319 	vm_map_address_t        *address,
320 	vm_map_size_t           size,
321 	vm_map_offset_t         mask,
322 	int                     flags,
323 	vm_map_kernel_flags_t   vmk_flags,
324 	vm_tag_t                tag,
325 	vm_map_entry_t          *map_entry);
326 
327 static void             vm_map_region_look_for_page(
328 	vm_map_t                   map,
329 	vm_map_offset_t            va,
330 	vm_object_t                object,
331 	vm_object_offset_t         offset,
332 	int                        max_refcnt,
333 	unsigned short             depth,
334 	vm_region_extended_info_t  extended,
335 	mach_msg_type_number_t count);
336 
337 static int              vm_map_region_count_obj_refs(
338 	vm_map_entry_t             entry,
339 	vm_object_t                object);
340 
341 
342 static kern_return_t    vm_map_willneed(
343 	vm_map_t        map,
344 	vm_map_offset_t start,
345 	vm_map_offset_t end);
346 
347 static kern_return_t    vm_map_reuse_pages(
348 	vm_map_t        map,
349 	vm_map_offset_t start,
350 	vm_map_offset_t end);
351 
352 static kern_return_t    vm_map_reusable_pages(
353 	vm_map_t        map,
354 	vm_map_offset_t start,
355 	vm_map_offset_t end);
356 
357 static kern_return_t    vm_map_can_reuse(
358 	vm_map_t        map,
359 	vm_map_offset_t start,
360 	vm_map_offset_t end);
361 
362 #if MACH_ASSERT
363 static kern_return_t    vm_map_pageout(
364 	vm_map_t        map,
365 	vm_map_offset_t start,
366 	vm_map_offset_t end);
367 #endif /* MACH_ASSERT */
368 
369 kern_return_t vm_map_corpse_footprint_collect(
370 	vm_map_t        old_map,
371 	vm_map_entry_t  old_entry,
372 	vm_map_t        new_map);
373 void vm_map_corpse_footprint_collect_done(
374 	vm_map_t        new_map);
375 void vm_map_corpse_footprint_destroy(
376 	vm_map_t        map);
377 kern_return_t vm_map_corpse_footprint_query_page_info(
378 	vm_map_t        map,
379 	vm_map_offset_t va,
380 	int             *disposition_p);
381 void vm_map_footprint_query_page_info(
382 	vm_map_t        map,
383 	vm_map_entry_t  map_entry,
384 	vm_map_offset_t curr_s_offset,
385 	int             *disposition_p);
386 
387 pid_t find_largest_process_vm_map_entries(void);
388 
389 extern int exit_with_guard_exception(void *p, mach_exception_data_type_t code,
390     mach_exception_data_type_t subcode);
391 
392 /*
393  * Macros to copy a vm_map_entry. We must be careful to correctly
394  * manage the wired page count. vm_map_entry_copy() creates a new
395  * map entry to the same memory - the wired count in the new entry
396  * must be set to zero. vm_map_entry_copy_full() creates a new
397  * entry that is identical to the old entry.  This preserves the
398  * wire count; it's used for map splitting and zone changing in
399  * vm_map_copyout.
400  */
401 
402 static inline void
vm_map_entry_copy_pmap_cs_assoc(vm_map_t map __unused,vm_map_entry_t new __unused,vm_map_entry_t old __unused)403 vm_map_entry_copy_pmap_cs_assoc(
404 	vm_map_t map __unused,
405 	vm_map_entry_t new __unused,
406 	vm_map_entry_t old __unused)
407 {
408 	/* when pmap_cs is not enabled, assert as a sanity check */
409 	assert(new->pmap_cs_associated == FALSE);
410 }
411 
412 /*
413  * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
414  * But for security reasons on some platforms, we don't want the
415  * new mapping to be "used for jit", so we reset the flag here.
416  */
417 static inline void
vm_map_entry_copy_code_signing(vm_map_t map,vm_map_entry_t new,vm_map_entry_t old __unused)418 vm_map_entry_copy_code_signing(
419 	vm_map_t map,
420 	vm_map_entry_t new,
421 	vm_map_entry_t old __unused)
422 {
423 	if (VM_MAP_POLICY_ALLOW_JIT_COPY(map)) {
424 		assert(new->used_for_jit == old->used_for_jit);
425 	} else {
426 		new->used_for_jit = FALSE;
427 	}
428 }
429 
430 static inline void
vm_map_entry_copy_full(vm_map_entry_t new,vm_map_entry_t old)431 vm_map_entry_copy_full(
432 	vm_map_entry_t new,
433 	vm_map_entry_t old)
434 {
435 #if MAP_ENTRY_CREATION_DEBUG
436 	btref_put(new->vme_creation_bt);
437 	btref_retain(old->vme_creation_bt);
438 #endif
439 #if MAP_ENTRY_INSERTION_DEBUG
440 	btref_put(new->vme_insertion_bt);
441 	btref_retain(old->vme_insertion_bt);
442 #endif
443 	*new = *old;
444 }
445 
446 static inline void
vm_map_entry_copy(vm_map_t map,vm_map_entry_t new,vm_map_entry_t old)447 vm_map_entry_copy(
448 	vm_map_t map,
449 	vm_map_entry_t new,
450 	vm_map_entry_t old)
451 {
452 	vm_map_entry_copy_full(new, old);
453 
454 	new->is_shared = FALSE;
455 	new->needs_wakeup = FALSE;
456 	new->in_transition = FALSE;
457 	new->wired_count = 0;
458 	new->user_wired_count = 0;
459 	new->permanent = FALSE;
460 	vm_map_entry_copy_code_signing(map, new, old);
461 	vm_map_entry_copy_pmap_cs_assoc(map, new, old);
462 	if (new->iokit_acct) {
463 		assertf(!new->use_pmap, "old %p new %p\n", old, new);
464 		new->iokit_acct = FALSE;
465 		new->use_pmap = TRUE;
466 	}
467 	new->vme_resilient_codesign = FALSE;
468 	new->vme_resilient_media = FALSE;
469 	new->vme_atomic = FALSE;
470 	new->vme_no_copy_on_read = FALSE;
471 }
472 
473 /*
474  * Normal lock_read_to_write() returns FALSE/0 on failure.
475  * These functions evaluate to zero on success and non-zero value on failure.
476  */
477 __attribute__((always_inline))
478 int
vm_map_lock_read_to_write(vm_map_t map)479 vm_map_lock_read_to_write(vm_map_t map)
480 {
481 	if (lck_rw_lock_shared_to_exclusive(&(map)->lock)) {
482 		DTRACE_VM(vm_map_lock_upgrade);
483 		return 0;
484 	}
485 	return 1;
486 }
487 
488 __attribute__((always_inline))
489 boolean_t
vm_map_try_lock(vm_map_t map)490 vm_map_try_lock(vm_map_t map)
491 {
492 	if (lck_rw_try_lock_exclusive(&(map)->lock)) {
493 		DTRACE_VM(vm_map_lock_w);
494 		return TRUE;
495 	}
496 	return FALSE;
497 }
498 
499 __attribute__((always_inline))
500 boolean_t
vm_map_try_lock_read(vm_map_t map)501 vm_map_try_lock_read(vm_map_t map)
502 {
503 	if (lck_rw_try_lock_shared(&(map)->lock)) {
504 		DTRACE_VM(vm_map_lock_r);
505 		return TRUE;
506 	}
507 	return FALSE;
508 }
509 
510 /*
511  * Routines to get the page size the caller should
512  * use while inspecting the target address space.
513  * Use the "_safely" variant if the caller is dealing with a user-provided
514  * array whose size depends on the page size, to avoid any overflow or
515  * underflow of a user-allocated buffer.
516  */
517 int
vm_self_region_page_shift_safely(vm_map_t target_map)518 vm_self_region_page_shift_safely(
519 	vm_map_t target_map)
520 {
521 	int effective_page_shift = 0;
522 
523 	if (PAGE_SIZE == (4096)) {
524 		/* x86_64 and 4k watches: always use 4k */
525 		return PAGE_SHIFT;
526 	}
527 	/* did caller provide an explicit page size for this thread to use? */
528 	effective_page_shift = thread_self_region_page_shift();
529 	if (effective_page_shift) {
530 		/* use the explicitly-provided page size */
531 		return effective_page_shift;
532 	}
533 	/* no explicit page size: use the caller's page size... */
534 	effective_page_shift = VM_MAP_PAGE_SHIFT(current_map());
535 	if (effective_page_shift == VM_MAP_PAGE_SHIFT(target_map)) {
536 		/* page size match: safe to use */
537 		return effective_page_shift;
538 	}
539 	/* page size mismatch */
540 	return -1;
541 }
542 int
vm_self_region_page_shift(vm_map_t target_map)543 vm_self_region_page_shift(
544 	vm_map_t target_map)
545 {
546 	int effective_page_shift;
547 
548 	effective_page_shift = vm_self_region_page_shift_safely(target_map);
549 	if (effective_page_shift == -1) {
550 		/* no safe value but OK to guess for caller */
551 		effective_page_shift = MIN(VM_MAP_PAGE_SHIFT(current_map()),
552 		    VM_MAP_PAGE_SHIFT(target_map));
553 	}
554 	return effective_page_shift;
555 }
556 
557 
558 /*
559  *	Decide if we want to allow processes to execute from their data or stack areas.
560  *	override_nx() returns true if we do.  Data/stack execution can be enabled independently
561  *	for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
562  *	or allow_stack_exec to enable data execution for that type of data area for that particular
563  *	ABI (or both by or'ing the flags together).  These are initialized in the architecture
564  *	specific pmap files since the default behavior varies according to architecture.  The
565  *	main reason it varies is because of the need to provide binary compatibility with old
566  *	applications that were written before these restrictions came into being.  In the old
567  *	days, an app could execute anything it could read, but this has slowly been tightened
568  *	up over time.  The default behavior is:
569  *
570  *	32-bit PPC apps		may execute from both stack and data areas
571  *	32-bit Intel apps	may exeucte from data areas but not stack
572  *	64-bit PPC/Intel apps	may not execute from either data or stack
573  *
574  *	An application on any architecture may override these defaults by explicitly
575  *	adding PROT_EXEC permission to the page in question with the mprotect(2)
576  *	system call.  This code here just determines what happens when an app tries to
577  *      execute from a page that lacks execute permission.
578  *
579  *	Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
580  *	default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
581  *	a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
582  *	execution from data areas for a particular binary even if the arch normally permits it. As
583  *	a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
584  *	to support some complicated use cases, notably browsers with out-of-process plugins that
585  *	are not all NX-safe.
586  */
587 
588 extern int allow_data_exec, allow_stack_exec;
589 
590 int
override_nx(vm_map_t map,uint32_t user_tag)591 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
592 {
593 	int current_abi;
594 
595 	if (map->pmap == kernel_pmap) {
596 		return FALSE;
597 	}
598 
599 	/*
600 	 * Determine if the app is running in 32 or 64 bit mode.
601 	 */
602 
603 	if (vm_map_is_64bit(map)) {
604 		current_abi = VM_ABI_64;
605 	} else {
606 		current_abi = VM_ABI_32;
607 	}
608 
609 	/*
610 	 * Determine if we should allow the execution based on whether it's a
611 	 * stack or data area and the current architecture.
612 	 */
613 
614 	if (user_tag == VM_MEMORY_STACK) {
615 		return allow_stack_exec & current_abi;
616 	}
617 
618 	return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
619 }
620 
621 
622 /*
623  *	Virtual memory maps provide for the mapping, protection,
624  *	and sharing of virtual memory objects.  In addition,
625  *	this module provides for an efficient virtual copy of
626  *	memory from one map to another.
627  *
628  *	Synchronization is required prior to most operations.
629  *
630  *	Maps consist of an ordered doubly-linked list of simple
631  *	entries; a single hint is used to speed up lookups.
632  *
633  *	Sharing maps have been deleted from this version of Mach.
634  *	All shared objects are now mapped directly into the respective
635  *	maps.  This requires a change in the copy on write strategy;
636  *	the asymmetric (delayed) strategy is used for shared temporary
637  *	objects instead of the symmetric (shadow) strategy.  All maps
638  *	are now "top level" maps (either task map, kernel map or submap
639  *	of the kernel map).
640  *
641  *	Since portions of maps are specified by start/end addreses,
642  *	which may not align with existing map entries, all
643  *	routines merely "clip" entries to these start/end values.
644  *	[That is, an entry is split into two, bordering at a
645  *	start or end value.]  Note that these clippings may not
646  *	always be necessary (as the two resulting entries are then
647  *	not changed); however, the clipping is done for convenience.
648  *	No attempt is currently made to "glue back together" two
649  *	abutting entries.
650  *
651  *	The symmetric (shadow) copy strategy implements virtual copy
652  *	by copying VM object references from one map to
653  *	another, and then marking both regions as copy-on-write.
654  *	It is important to note that only one writeable reference
655  *	to a VM object region exists in any map when this strategy
656  *	is used -- this means that shadow object creation can be
657  *	delayed until a write operation occurs.  The symmetric (delayed)
658  *	strategy allows multiple maps to have writeable references to
659  *	the same region of a vm object, and hence cannot delay creating
660  *	its copy objects.  See vm_object_copy_quickly() in vm_object.c.
661  *	Copying of permanent objects is completely different; see
662  *	vm_object_copy_strategically() in vm_object.c.
663  */
664 
665 static SECURITY_READ_ONLY_LATE(zone_t) vm_map_zone;       /* zone for vm_map structures */
666 static SECURITY_READ_ONLY_LATE(zone_t) vm_map_copy_zone;  /* zone for vm_map_copy structures */
667 
668 SECURITY_READ_ONLY_LATE(zone_t)        vm_map_entry_zone; /* zone for vm_map_entry structures */
669 SECURITY_READ_ONLY_LATE(zone_t)        vm_map_holes_zone; /* zone for vm map holes (vm_map_links) structures */
670 #if HAVE_VM_MAP_RESERVED_ENTRY_ZONE
671 SECURITY_READ_ONLY_LATE(zone_t)        vm_map_entry_reserved_zone;
672 #endif /* HAVE_VM_MAP_RESERVED_ENTRY_ZONE */
673 
674 #define VM_MAP_ZONE_NAME "maps"
675 #define VM_MAP_ZFLAGS ( \
676 	ZC_NOENCRYPT | \
677 	ZC_NOGZALLOC | \
678 	ZC_ALLOW_FOREIGN)
679 
680 #define VM_MAP_ENTRY_ZONE_NAME "VM map entries"
681 #define VM_MAP_ENTRY_ZFLAGS ( \
682 	ZC_NOENCRYPT | \
683 	ZC_CACHING | \
684 	ZC_NOGZALLOC | \
685 	ZC_KASAN_NOQUARANTINE | \
686 	ZC_VM_LP64 | \
687 	ZC_ALLOW_FOREIGN)
688 
689 #if HAVE_VM_MAP_RESERVED_ENTRY_ZONE
690 #define VM_MAP_ENTRY_RESERVED_ZONE_NAME "Reserved VM map entries"
691 #define VM_MAP_ENTRY_RESERVED_ZFLAGS ( \
692 	ZC_NOENCRYPT | \
693 	ZC_NOCACHING | \
694 	ZC_NOGZALLOC | \
695 	ZC_KASAN_NOQUARANTINE | \
696 	ZC_VM)
697 #endif /* HAVE_VM_MAP_RESERVED_ENTRY_ZONE */
698 
699 #define VM_MAP_HOLES_ZONE_NAME "VM map holes"
700 #define VM_MAP_HOLES_ZFLAGS ( \
701 	ZC_NOENCRYPT | \
702 	ZC_CACHING | \
703 	ZC_NOGZALLOC | \
704 	ZC_KASAN_NOQUARANTINE | \
705 	ZC_VM_LP64 | \
706 	ZC_ALLOW_FOREIGN)
707 
708 /*
709  * Asserts that a vm_map_copy object is coming from the
710  * vm_map_copy_zone to ensure that it isn't a fake constructed
711  * anywhere else.
712  */
713 static inline void
vm_map_copy_require(struct vm_map_copy * copy)714 vm_map_copy_require(struct vm_map_copy *copy)
715 {
716 	zone_id_require(ZONE_ID_VM_MAP_COPY, sizeof(struct vm_map_copy), copy);
717 }
718 
719 /*
720  *	vm_map_require:
721  *
722  *	Ensures that the argument is memory allocated from the genuine
723  *	vm map zone. (See zone_id_require_allow_foreign).
724  */
725 void
vm_map_require(vm_map_t map)726 vm_map_require(vm_map_t map)
727 {
728 	zone_id_require_allow_foreign(ZONE_ID_VM_MAP, sizeof(struct _vm_map), map);
729 }
730 
731 static __startup_data vm_offset_t      map_data;
732 static __startup_data vm_size_t        map_data_size;
733 static __startup_data vm_offset_t      kentry_data;
734 static __startup_data vm_size_t        kentry_data_size;
735 static __startup_data vm_offset_t      map_holes_data;
736 static __startup_data vm_size_t        map_holes_data_size;
737 
738 #if XNU_TARGET_OS_OSX
739 #define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
740 #else /* XNU_TARGET_OS_OSX */
741 #define         NO_COALESCE_LIMIT  0
742 #endif /* XNU_TARGET_OS_OSX */
743 
744 /* Skip acquiring locks if we're in the midst of a kernel core dump */
745 unsigned int not_in_kdp = 1;
746 
747 unsigned int vm_map_set_cache_attr_count = 0;
748 
749 kern_return_t
vm_map_set_cache_attr(vm_map_t map,vm_map_offset_t va)750 vm_map_set_cache_attr(
751 	vm_map_t        map,
752 	vm_map_offset_t va)
753 {
754 	vm_map_entry_t  map_entry;
755 	vm_object_t     object;
756 	kern_return_t   kr = KERN_SUCCESS;
757 
758 	vm_map_lock_read(map);
759 
760 	if (!vm_map_lookup_entry(map, va, &map_entry) ||
761 	    map_entry->is_sub_map) {
762 		/*
763 		 * that memory is not properly mapped
764 		 */
765 		kr = KERN_INVALID_ARGUMENT;
766 		goto done;
767 	}
768 	object = VME_OBJECT(map_entry);
769 
770 	if (object == VM_OBJECT_NULL) {
771 		/*
772 		 * there should be a VM object here at this point
773 		 */
774 		kr = KERN_INVALID_ARGUMENT;
775 		goto done;
776 	}
777 	vm_object_lock(object);
778 	object->set_cache_attr = TRUE;
779 	vm_object_unlock(object);
780 
781 	vm_map_set_cache_attr_count++;
782 done:
783 	vm_map_unlock_read(map);
784 
785 	return kr;
786 }
787 
788 
789 #if CONFIG_CODE_DECRYPTION
790 /*
791  * vm_map_apple_protected:
792  * This remaps the requested part of the object with an object backed by
793  * the decrypting pager.
794  * crypt_info contains entry points and session data for the crypt module.
795  * The crypt_info block will be copied by vm_map_apple_protected. The data structures
796  * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
797  */
798 kern_return_t
vm_map_apple_protected(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_object_offset_t crypto_backing_offset,struct pager_crypt_info * crypt_info,uint32_t cryptid)799 vm_map_apple_protected(
800 	vm_map_t                map,
801 	vm_map_offset_t         start,
802 	vm_map_offset_t         end,
803 	vm_object_offset_t      crypto_backing_offset,
804 	struct pager_crypt_info *crypt_info,
805 	uint32_t                cryptid)
806 {
807 	boolean_t       map_locked;
808 	kern_return_t   kr;
809 	vm_map_entry_t  map_entry;
810 	struct vm_map_entry tmp_entry;
811 	memory_object_t unprotected_mem_obj;
812 	vm_object_t     protected_object;
813 	vm_map_offset_t map_addr;
814 	vm_map_offset_t start_aligned, end_aligned;
815 	vm_object_offset_t      crypto_start, crypto_end;
816 	int             vm_flags;
817 	vm_map_kernel_flags_t vmk_flags;
818 	boolean_t       cache_pager;
819 
820 	vm_flags = 0;
821 	vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
822 
823 	map_locked = FALSE;
824 	unprotected_mem_obj = MEMORY_OBJECT_NULL;
825 
826 	start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
827 	end_aligned = vm_map_round_page(end, PAGE_MASK_64);
828 	start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
829 	end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
830 
831 #if __arm64__
832 	/*
833 	 * "start" and "end" might be 4K-aligned but not 16K-aligned,
834 	 * so we might have to loop and establish up to 3 mappings:
835 	 *
836 	 * + the first 16K-page, which might overlap with the previous
837 	 *   4K-aligned mapping,
838 	 * + the center,
839 	 * + the last 16K-page, which might overlap with the next
840 	 *   4K-aligned mapping.
841 	 * Each of these mapping might be backed by a vnode pager (if
842 	 * properly page-aligned) or a "fourk_pager", itself backed by a
843 	 * vnode pager (if 4K-aligned but not page-aligned).
844 	 */
845 #endif /* __arm64__ */
846 
847 	map_addr = start_aligned;
848 	for (map_addr = start_aligned;
849 	    map_addr < end;
850 	    map_addr = tmp_entry.vme_end) {
851 		vm_map_lock(map);
852 		map_locked = TRUE;
853 
854 		/* lookup the protected VM object */
855 		if (!vm_map_lookup_entry(map,
856 		    map_addr,
857 		    &map_entry) ||
858 		    map_entry->is_sub_map ||
859 		    VME_OBJECT(map_entry) == VM_OBJECT_NULL) {
860 			/* that memory is not properly mapped */
861 			kr = KERN_INVALID_ARGUMENT;
862 			goto done;
863 		}
864 
865 		/* ensure mapped memory is mapped as executable except
866 		 *  except for model decryption flow */
867 		if ((cryptid != CRYPTID_MODEL_ENCRYPTION) &&
868 		    !(map_entry->protection & VM_PROT_EXECUTE)) {
869 			kr = KERN_INVALID_ARGUMENT;
870 			goto done;
871 		}
872 
873 		/* get the protected object to be decrypted */
874 		protected_object = VME_OBJECT(map_entry);
875 		if (protected_object == VM_OBJECT_NULL) {
876 			/* there should be a VM object here at this point */
877 			kr = KERN_INVALID_ARGUMENT;
878 			goto done;
879 		}
880 		/* ensure protected object stays alive while map is unlocked */
881 		vm_object_reference(protected_object);
882 
883 		/* limit the map entry to the area we want to cover */
884 		vm_map_clip_start(map, map_entry, start_aligned);
885 		vm_map_clip_end(map, map_entry, end_aligned);
886 
887 		tmp_entry = *map_entry;
888 		map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
889 		vm_map_unlock(map);
890 		map_locked = FALSE;
891 
892 		/*
893 		 * This map entry might be only partially encrypted
894 		 * (if not fully "page-aligned").
895 		 */
896 		crypto_start = 0;
897 		crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
898 		if (tmp_entry.vme_start < start) {
899 			if (tmp_entry.vme_start != start_aligned) {
900 				kr = KERN_INVALID_ADDRESS;
901 			}
902 			crypto_start += (start - tmp_entry.vme_start);
903 		}
904 		if (tmp_entry.vme_end > end) {
905 			if (tmp_entry.vme_end != end_aligned) {
906 				kr = KERN_INVALID_ADDRESS;
907 			}
908 			crypto_end -= (tmp_entry.vme_end - end);
909 		}
910 
911 		/*
912 		 * This "extra backing offset" is needed to get the decryption
913 		 * routine to use the right key.  It adjusts for the possibly
914 		 * relative offset of an interposed "4K" pager...
915 		 */
916 		if (crypto_backing_offset == (vm_object_offset_t) -1) {
917 			crypto_backing_offset = VME_OFFSET(&tmp_entry);
918 		}
919 
920 		cache_pager = TRUE;
921 #if XNU_TARGET_OS_OSX
922 		if (vm_map_is_alien(map)) {
923 			cache_pager = FALSE;
924 		}
925 #endif /* XNU_TARGET_OS_OSX */
926 
927 		/*
928 		 * Lookup (and create if necessary) the protected memory object
929 		 * matching that VM object.
930 		 * If successful, this also grabs a reference on the memory object,
931 		 * to guarantee that it doesn't go away before we get a chance to map
932 		 * it.
933 		 */
934 		unprotected_mem_obj = apple_protect_pager_setup(
935 			protected_object,
936 			VME_OFFSET(&tmp_entry),
937 			crypto_backing_offset,
938 			crypt_info,
939 			crypto_start,
940 			crypto_end,
941 			cache_pager);
942 
943 		/* release extra ref on protected object */
944 		vm_object_deallocate(protected_object);
945 
946 		if (unprotected_mem_obj == NULL) {
947 			kr = KERN_FAILURE;
948 			goto done;
949 		}
950 
951 		vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
952 		/* can overwrite an immutable mapping */
953 		vmk_flags.vmkf_overwrite_immutable = TRUE;
954 #if __arm64__
955 		if (tmp_entry.used_for_jit &&
956 		    (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
957 		    PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
958 		    fourk_binary_compatibility_unsafe &&
959 		    fourk_binary_compatibility_allow_wx) {
960 			printf("** FOURK_COMPAT [%d]: "
961 			    "allowing write+execute at 0x%llx\n",
962 			    proc_selfpid(), tmp_entry.vme_start);
963 			vmk_flags.vmkf_map_jit = TRUE;
964 		}
965 #endif /* __arm64__ */
966 
967 		/* map this memory object in place of the current one */
968 		map_addr = tmp_entry.vme_start;
969 		kr = vm_map_enter_mem_object(map,
970 		    &map_addr,
971 		    (tmp_entry.vme_end -
972 		    tmp_entry.vme_start),
973 		    (mach_vm_offset_t) 0,
974 		    vm_flags,
975 		    vmk_flags,
976 		    VM_KERN_MEMORY_NONE,
977 		    (ipc_port_t)(uintptr_t) unprotected_mem_obj,
978 		    0,
979 		    TRUE,
980 		    tmp_entry.protection,
981 		    tmp_entry.max_protection,
982 		    tmp_entry.inheritance);
983 		assertf(kr == KERN_SUCCESS,
984 		    "kr = 0x%x\n", kr);
985 		assertf(map_addr == tmp_entry.vme_start,
986 		    "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
987 		    (uint64_t)map_addr,
988 		    (uint64_t) tmp_entry.vme_start,
989 		    &tmp_entry);
990 
991 #if VM_MAP_DEBUG_APPLE_PROTECT
992 		if (vm_map_debug_apple_protect) {
993 			printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
994 			    " backing:[object:%p,offset:0x%llx,"
995 			    "crypto_backing_offset:0x%llx,"
996 			    "crypto_start:0x%llx,crypto_end:0x%llx]\n",
997 			    map,
998 			    (uint64_t) map_addr,
999 			    (uint64_t) (map_addr + (tmp_entry.vme_end -
1000 			    tmp_entry.vme_start)),
1001 			    unprotected_mem_obj,
1002 			    protected_object,
1003 			    VME_OFFSET(&tmp_entry),
1004 			    crypto_backing_offset,
1005 			    crypto_start,
1006 			    crypto_end);
1007 		}
1008 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1009 
1010 		/*
1011 		 * Release the reference obtained by
1012 		 * apple_protect_pager_setup().
1013 		 * The mapping (if it succeeded) is now holding a reference on
1014 		 * the memory object.
1015 		 */
1016 		memory_object_deallocate(unprotected_mem_obj);
1017 		unprotected_mem_obj = MEMORY_OBJECT_NULL;
1018 
1019 		/* continue with next map entry */
1020 		crypto_backing_offset += (tmp_entry.vme_end -
1021 		    tmp_entry.vme_start);
1022 		crypto_backing_offset -= crypto_start;
1023 	}
1024 	kr = KERN_SUCCESS;
1025 
1026 done:
1027 	if (map_locked) {
1028 		vm_map_unlock(map);
1029 	}
1030 	return kr;
1031 }
1032 #endif  /* CONFIG_CODE_DECRYPTION */
1033 
1034 
1035 LCK_GRP_DECLARE(vm_map_lck_grp, "vm_map");
1036 LCK_ATTR_DECLARE(vm_map_lck_attr, 0, 0);
1037 LCK_ATTR_DECLARE(vm_map_lck_rw_attr, 0, LCK_ATTR_DEBUG);
1038 
1039 #if XNU_TARGET_OS_OSX
1040 int malloc_no_cow = 0;
1041 #else /* XNU_TARGET_OS_OSX */
1042 int malloc_no_cow = 1;
1043 #endif /* XNU_TARGET_OS_OSX */
1044 uint64_t vm_memory_malloc_no_cow_mask = 0ULL;
1045 #if DEBUG
1046 int vm_check_map_sanity = 0;
1047 #endif
1048 
1049 /*
1050  *	vm_map_init:
1051  *
1052  *	Initialize the vm_map module.  Must be called before
1053  *	any other vm_map routines.
1054  *
1055  *	Map and entry structures are allocated from zones -- we must
1056  *	initialize those zones.
1057  *
1058  *	There are three zones of interest:
1059  *
1060  *	vm_map_zone:		used to allocate maps.
1061  *	vm_map_entry_zone:	used to allocate map entries.
1062  *
1063  *	LP32:
1064  *	vm_map_entry_reserved_zone:     fallback zone for kernel map entries
1065  *
1066  *	The kernel allocates map entries from a special zone that is initially
1067  *	"crammed" with memory.  It would be difficult (perhaps impossible) for
1068  *	the kernel to allocate more memory to a entry zone when it became
1069  *	empty since the very act of allocating memory implies the creation
1070  *	of a new entry.
1071  */
1072 __startup_func
1073 void
vm_map_init(void)1074 vm_map_init(void)
1075 {
1076 
1077 #if MACH_ASSERT
1078 	PE_parse_boot_argn("debug4k_filter", &debug4k_filter,
1079 	    sizeof(debug4k_filter));
1080 #endif /* MACH_ASSERT */
1081 
1082 	vm_map_zone = zone_create_ext(VM_MAP_ZONE_NAME, sizeof(struct _vm_map),
1083 	    VM_MAP_ZFLAGS, ZONE_ID_VM_MAP, NULL);
1084 
1085 	/*
1086 	 * Don't quarantine because we always need elements available
1087 	 * Disallow GC on this zone... to aid the GC.
1088 	 */
1089 	vm_map_entry_zone = zone_create_ext(VM_MAP_ENTRY_ZONE_NAME,
1090 	    sizeof(struct vm_map_entry), VM_MAP_ENTRY_ZFLAGS,
1091 	    ZONE_ID_VM_MAP_ENTRY, ^(zone_t z) {
1092 		z->z_elems_rsv = (uint16_t)(32 *
1093 		(ml_early_cpu_max_number() + 1));
1094 	});
1095 #if HAVE_VM_MAP_RESERVED_ENTRY_ZONE
1096 	vm_map_entry_reserved_zone = zone_create(VM_MAP_ENTRY_RESERVED_ZONE_NAME,
1097 	    sizeof(struct vm_map_entry), VM_MAP_ENTRY_RESERVED_ZFLAGS);
1098 #endif /* HAVE_VM_MAP_RESERVED_ENTRY_ZONE */
1099 
1100 	vm_map_holes_zone = zone_create_ext(VM_MAP_HOLES_ZONE_NAME,
1101 	    sizeof(struct vm_map_links), VM_MAP_HOLES_ZFLAGS,
1102 	    ZONE_ID_VM_MAP_HOLES, ^(zone_t z) {
1103 		z->z_elems_rsv = (uint16_t)(16 * 1024 / zone_elem_size(z));
1104 	});
1105 
1106 	vm_map_copy_zone = zone_create_ext("VM map copies", sizeof(struct vm_map_copy),
1107 	    ZC_NOENCRYPT | ZC_CACHING, ZONE_ID_VM_MAP_COPY, NULL);
1108 
1109 	/*
1110 	 * Add the stolen memory to zones, adjust zone size and stolen counts.
1111 	 */
1112 	zone_cram_foreign(vm_map_zone, map_data, map_data_size);
1113 	zone_cram_foreign(vm_map_entry_zone, kentry_data, kentry_data_size);
1114 	zone_cram_foreign(vm_map_holes_zone, map_holes_data, map_holes_data_size);
1115 	printf("VM boostrap: %d maps, %d entries and %d holes available\n",
1116 	    vm_map_zone->z_elems_free,
1117 	    vm_map_entry_zone->z_elems_free,
1118 	    vm_map_holes_zone->z_elems_free);
1119 
1120 	/*
1121 	 * Since these are covered by zones, remove them from stolen page accounting.
1122 	 */
1123 	VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
1124 
1125 #if VM_MAP_DEBUG_APPLE_PROTECT
1126 	PE_parse_boot_argn("vm_map_debug_apple_protect",
1127 	    &vm_map_debug_apple_protect,
1128 	    sizeof(vm_map_debug_apple_protect));
1129 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1130 #if VM_MAP_DEBUG_APPLE_FOURK
1131 	PE_parse_boot_argn("vm_map_debug_fourk",
1132 	    &vm_map_debug_fourk,
1133 	    sizeof(vm_map_debug_fourk));
1134 #endif /* VM_MAP_DEBUG_FOURK */
1135 	PE_parse_boot_argn("vm_map_executable_immutable",
1136 	    &vm_map_executable_immutable,
1137 	    sizeof(vm_map_executable_immutable));
1138 	PE_parse_boot_argn("vm_map_executable_immutable_verbose",
1139 	    &vm_map_executable_immutable_verbose,
1140 	    sizeof(vm_map_executable_immutable_verbose));
1141 
1142 	PE_parse_boot_argn("malloc_no_cow",
1143 	    &malloc_no_cow,
1144 	    sizeof(malloc_no_cow));
1145 	if (malloc_no_cow) {
1146 		vm_memory_malloc_no_cow_mask = 0ULL;
1147 		vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC;
1148 		vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_SMALL;
1149 		vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_MEDIUM;
1150 		vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE;
1151 //		vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
1152 //		vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
1153 		vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_TINY;
1154 		vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE;
1155 		vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED;
1156 		vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_NANO;
1157 //		vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
1158 		PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
1159 		    &vm_memory_malloc_no_cow_mask,
1160 		    sizeof(vm_memory_malloc_no_cow_mask));
1161 	}
1162 
1163 #if DEBUG
1164 	PE_parse_boot_argn("vm_check_map_sanity", &vm_check_map_sanity, sizeof(vm_check_map_sanity));
1165 	if (vm_check_map_sanity) {
1166 		kprintf("VM sanity checking enabled\n");
1167 	} else {
1168 		kprintf("VM sanity checking disabled. Set bootarg vm_check_map_sanity=1 to enable\n");
1169 	}
1170 #endif /* DEBUG */
1171 
1172 #if DEVELOPMENT || DEBUG
1173 	PE_parse_boot_argn("panic_on_unsigned_execute",
1174 	    &panic_on_unsigned_execute,
1175 	    sizeof(panic_on_unsigned_execute));
1176 	PE_parse_boot_argn("panic_on_mlock_failure",
1177 	    &panic_on_mlock_failure,
1178 	    sizeof(panic_on_mlock_failure));
1179 #endif /* DEVELOPMENT || DEBUG */
1180 }
1181 
1182 __startup_func
1183 static void
vm_map_steal_memory(void)1184 vm_map_steal_memory(void)
1185 {
1186 	uint16_t kentry_initial_pages;
1187 	uint16_t zone_foreign_pages;
1188 	bool overloaded = false;
1189 
1190 	/*
1191 	 * 1 page of maps and holes is enough for early boot
1192 	 *
1193 	 * Those early crams are only needed to bootstrap zones
1194 	 * until zone_init() has run (STARTUP_RANK_FIRST of ZALLOC).
1195 	 * After that point, zones know how to allocate vm map entries,
1196 	 * holes, and maps.
1197 	 */
1198 	map_data_size = zone_get_foreign_alloc_size(VM_MAP_ZONE_NAME,
1199 	    sizeof(struct _vm_map), VM_MAP_ZFLAGS, 1);
1200 
1201 	map_holes_data_size = zone_get_foreign_alloc_size(VM_MAP_HOLES_ZONE_NAME,
1202 	    sizeof(struct vm_map_links), VM_MAP_HOLES_ZFLAGS, 1);
1203 
1204 	/*
1205 	 * kentry_initial_pages corresponds to the number of kernel map entries
1206 	 * required during bootstrap for the duration of zone_init().
1207 	 */
1208 #if defined(__LP64__)
1209 	kentry_initial_pages = (uint16_t)atop(10 * 4096);
1210 #else
1211 	kentry_initial_pages = 6;
1212 #endif
1213 
1214 #if CONFIG_GZALLOC
1215 	/*
1216 	 * If using the guard allocator, reserve more memory for the kernel
1217 	 * reserved map entry pool.
1218 	 */
1219 	if (gzalloc_enabled()) {
1220 		kentry_initial_pages *= 100;
1221 		overloaded = true;
1222 	}
1223 #endif
1224 	if (PE_parse_boot_argn("zone_foreign_pages", &zone_foreign_pages,
1225 	    sizeof(zone_foreign_pages))) {
1226 		kentry_initial_pages = zone_foreign_pages;
1227 		overloaded = true;
1228 	}
1229 
1230 	kentry_data_size = zone_get_foreign_alloc_size(VM_MAP_ENTRY_ZONE_NAME,
1231 	    sizeof(struct vm_map_entry), VM_MAP_ENTRY_ZFLAGS,
1232 	    kentry_initial_pages);
1233 
1234 	/*
1235 	 * Steal a contiguous range of memory so that a simple range check
1236 	 * can validate foreign addresses being freed/crammed to these
1237 	 * zones
1238 	 */
1239 	vm_size_t total_size;
1240 	if (os_add3_overflow(map_data_size, kentry_data_size,
1241 	    map_holes_data_size, &total_size)) {
1242 		panic("vm_map_steal_memory: overflow in amount of memory requested");
1243 	}
1244 	map_data = zone_foreign_mem_init(total_size, overloaded);
1245 	kentry_data = map_data + map_data_size;
1246 	map_holes_data = kentry_data + kentry_data_size;
1247 }
1248 STARTUP(PMAP_STEAL, STARTUP_RANK_FIRST, vm_map_steal_memory);
1249 
1250 __startup_func
1251 static void
vm_kernel_boostraped(void)1252 vm_kernel_boostraped(void)
1253 {
1254 	printf("VM bootstrap done: %d maps, %d entries and %d holes left\n",
1255 	    vm_map_zone->z_elems_free,
1256 	    vm_map_entry_zone->z_elems_free,
1257 	    vm_map_holes_zone->z_elems_free);
1258 }
1259 STARTUP(ZALLOC, STARTUP_RANK_SECOND, vm_kernel_boostraped);
1260 
1261 void
vm_map_disable_hole_optimization(vm_map_t map)1262 vm_map_disable_hole_optimization(vm_map_t map)
1263 {
1264 	vm_map_entry_t  head_entry, hole_entry, next_hole_entry;
1265 
1266 	if (map->holelistenabled) {
1267 		head_entry = hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1268 
1269 		while (hole_entry != NULL) {
1270 			next_hole_entry = hole_entry->vme_next;
1271 
1272 			hole_entry->vme_next = NULL;
1273 			hole_entry->vme_prev = NULL;
1274 			zfree(vm_map_holes_zone, hole_entry);
1275 
1276 			if (next_hole_entry == head_entry) {
1277 				hole_entry = NULL;
1278 			} else {
1279 				hole_entry = next_hole_entry;
1280 			}
1281 		}
1282 
1283 		map->holes_list = NULL;
1284 		map->holelistenabled = FALSE;
1285 
1286 		map->first_free = vm_map_first_entry(map);
1287 		SAVE_HINT_HOLE_WRITE(map, NULL);
1288 	}
1289 }
1290 
1291 boolean_t
vm_kernel_map_is_kernel(vm_map_t map)1292 vm_kernel_map_is_kernel(vm_map_t map)
1293 {
1294 	return map->pmap == kernel_pmap;
1295 }
1296 
1297 /*
1298  *	vm_map_create:
1299  *
1300  *	Creates and returns a new empty VM map with
1301  *	the given physical map structure, and having
1302  *	the given lower and upper address bounds.
1303  */
1304 
1305 extern vm_map_t vm_map_create_external(
1306 	pmap_t                  pmap,
1307 	vm_map_offset_t         min_off,
1308 	vm_map_offset_t         max_off,
1309 	boolean_t               pageable);
1310 
1311 vm_map_t
vm_map_create_external(pmap_t pmap,vm_map_offset_t min,vm_map_offset_t max,boolean_t pageable)1312 vm_map_create_external(
1313 	pmap_t                  pmap,
1314 	vm_map_offset_t         min,
1315 	vm_map_offset_t         max,
1316 	boolean_t               pageable)
1317 {
1318 	vm_map_create_options_t options = VM_MAP_CREATE_DEFAULT;
1319 
1320 	if (pageable) {
1321 		options |= VM_MAP_CREATE_PAGEABLE;
1322 	}
1323 	return vm_map_create_options(pmap, min, max, options);
1324 }
1325 
1326 vm_map_t
vm_map_create_options(pmap_t pmap,vm_map_offset_t min,vm_map_offset_t max,vm_map_create_options_t options)1327 vm_map_create_options(
1328 	pmap_t                  pmap,
1329 	vm_map_offset_t         min,
1330 	vm_map_offset_t         max,
1331 	vm_map_create_options_t options)
1332 {
1333 	vm_map_t result;
1334 
1335 	result = zalloc_flags(vm_map_zone, Z_WAITOK | Z_NOFAIL | Z_ZERO);
1336 
1337 	vm_map_first_entry(result) = vm_map_to_entry(result);
1338 	vm_map_last_entry(result)  = vm_map_to_entry(result);
1339 
1340 	vm_map_store_init(&result->hdr);
1341 	result->hdr.entries_pageable = (bool)(options & VM_MAP_CREATE_PAGEABLE);
1342 	vm_map_set_page_shift(result, PAGE_SHIFT);
1343 
1344 	result->size_limit = RLIM_INFINITY;             /* default unlimited */
1345 	result->data_limit = RLIM_INFINITY;             /* default unlimited */
1346 	result->user_wire_limit = MACH_VM_MAX_ADDRESS;  /* default limit is unlimited */
1347 	os_ref_init_count_raw(&result->map_refcnt, &map_refgrp, 1);
1348 	result->pmap = pmap;
1349 	result->min_offset = min;
1350 	result->max_offset = max;
1351 	result->first_free = vm_map_to_entry(result);
1352 	result->hint = vm_map_to_entry(result);
1353 
1354 	if (options & VM_MAP_CREATE_NEVER_FAULTS) {
1355 		assert(pmap == kernel_pmap);
1356 		result->never_faults = true;
1357 	}
1358 
1359 	/* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1360 	if (options & VM_MAP_CREATE_CORPSE_FOOTPRINT) {
1361 		result->has_corpse_footprint = true;
1362 	} else if (startup_phase >= STARTUP_SUB_ZALLOC &&
1363 	    !(options & VM_MAP_CREATE_DISABLE_HOLELIST)) {
1364 		struct vm_map_links *hole_entry = zalloc(vm_map_holes_zone);
1365 
1366 		hole_entry->start = min;
1367 #if defined(__arm__) || defined(__arm64__)
1368 		hole_entry->end = result->max_offset;
1369 #else
1370 		hole_entry->end = MAX(max, (vm_map_offset_t)MACH_VM_MAX_ADDRESS);
1371 #endif
1372 		result->holes_list = result->hole_hint = hole_entry;
1373 		hole_entry->prev = hole_entry->next = CAST_TO_VM_MAP_ENTRY(hole_entry);
1374 		result->holelistenabled = true;
1375 	}
1376 
1377 	vm_map_lock_init(result);
1378 
1379 	return result;
1380 }
1381 
1382 vm_map_size_t
vm_map_adjusted_size(vm_map_t map)1383 vm_map_adjusted_size(vm_map_t map)
1384 {
1385 	struct vm_reserved_region *regions = NULL;
1386 	size_t num_regions = 0;
1387 	mach_vm_size_t  reserved_size = 0, map_size = 0;
1388 
1389 	if (map == NULL || (map->size == 0)) {
1390 		return 0;
1391 	}
1392 
1393 	map_size = map->size;
1394 
1395 	if (map->reserved_regions == FALSE || !vm_map_is_exotic(map) || map->terminated) {
1396 		/*
1397 		 * No special reserved regions or not an exotic map or the task
1398 		 * is terminating and these special regions might have already
1399 		 * been deallocated.
1400 		 */
1401 		return map_size;
1402 	}
1403 
1404 	num_regions = ml_get_vm_reserved_regions(vm_map_is_64bit(map), &regions);
1405 	assert((num_regions == 0) || (num_regions > 0 && regions != NULL));
1406 
1407 	while (num_regions) {
1408 		reserved_size += regions[--num_regions].vmrr_size;
1409 	}
1410 
1411 	/*
1412 	 * There are a few places where the map is being switched out due to
1413 	 * 'termination' without that bit being set (e.g. exec and corpse purging).
1414 	 * In those cases, we could have the map's regions being deallocated on
1415 	 * a core while some accounting process is trying to get the map's size.
1416 	 * So this assert can't be enabled till all those places are uniform in
1417 	 * their use of the 'map->terminated' bit.
1418 	 *
1419 	 * assert(map_size >= reserved_size);
1420 	 */
1421 
1422 	return (map_size >= reserved_size) ? (map_size - reserved_size) : map_size;
1423 }
1424 
1425 /*
1426  *	vm_map_entry_create:	[ internal use only ]
1427  *
1428  *	Allocates a VM map entry for insertion in the
1429  *	given map (or map copy).  No fields are filled.
1430  */
1431 #define vm_map_entry_create(map, map_locked)    _vm_map_entry_create(&(map)->hdr, map_locked)
1432 
1433 #define vm_map_copy_entry_create(copy, map_locked)                                      \
1434 	_vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1435 
1436 static vm_map_entry_t
_vm_map_entry_create(struct vm_map_header * map_header __unused,boolean_t map_locked __unused)1437 _vm_map_entry_create(
1438 	struct vm_map_header    *map_header __unused,
1439 	boolean_t               map_locked __unused)
1440 {
1441 	vm_map_entry_t  entry = NULL;
1442 	zone_t zone = vm_map_entry_zone;
1443 
1444 	assert(map_header->entries_pageable ? !map_locked : TRUE);
1445 
1446 #if HAVE_VM_MAP_RESERVED_ENTRY_ZONE
1447 	zone_security_flags_t zsflags = zone_security_array[ZONE_ID_VM_MAP_ENTRY];
1448 	if (map_header == &zone_submap(zsflags)->hdr) {
1449 		/*
1450 		 * If we are trying to allocate an entry for the submap
1451 		 * of the vm_map_entry_zone, then this can cause recursive
1452 		 * locking of this map.
1453 		 *
1454 		 * Try to allocate _without blocking_ from this zone,
1455 		 * but if it is depleted, we need to go to the
1456 		 * vm_map_entry_reserved_zone which is in the zalloc
1457 		 * "VM" submap, which can grow without taking any map lock.
1458 		 *
1459 		 * Note: the vm_map_entry_zone has a rather high "reserve"
1460 		 * setup in order to minimize usage of the reserved one.
1461 		 */
1462 		entry = zalloc_flags(vm_map_entry_zone, Z_NOWAIT | Z_ZERO);
1463 		zone = vm_map_entry_reserved_zone;
1464 	}
1465 #endif
1466 	if (entry == NULL) {
1467 		entry = zalloc_flags(zone, Z_WAITOK | Z_ZERO);
1468 	}
1469 
1470 	entry->behavior = VM_BEHAVIOR_DEFAULT;
1471 	entry->inheritance = VM_INHERIT_DEFAULT;
1472 
1473 	vm_map_store_update((vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1474 #if MAP_ENTRY_CREATION_DEBUG
1475 	entry->vme_creation_maphdr = map_header;
1476 	entry->vme_creation_bt = btref_get(__builtin_frame_address(0),
1477 	    BTREF_GET_NOWAIT);
1478 #endif
1479 	return entry;
1480 }
1481 
1482 /*
1483  *	vm_map_entry_dispose:	[ internal use only ]
1484  *
1485  *	Inverse of vm_map_entry_create.
1486  *
1487  *      write map lock held so no need to
1488  *	do anything special to insure correctness
1489  *      of the stores
1490  */
1491 #define vm_map_entry_dispose(map, entry)                        \
1492 	_vm_map_entry_dispose(&(map)->hdr, (entry))
1493 
1494 #define vm_map_copy_entry_dispose(copy, entry) \
1495 	_vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1496 
1497 static void
_vm_map_entry_dispose(struct vm_map_header * map_header __unused,vm_map_entry_t entry)1498 _vm_map_entry_dispose(
1499 	struct vm_map_header    *map_header __unused,
1500 	vm_map_entry_t          entry)
1501 {
1502 #if MAP_ENTRY_CREATION_DEBUG
1503 	btref_put(entry->vme_creation_bt);
1504 #endif
1505 #if MAP_ENTRY_INSERTION_DEBUG
1506 	btref_put(entry->vme_insertion_bt);
1507 #endif
1508 #if HAVE_VM_MAP_RESERVED_ENTRY_ZONE
1509 	switch (zone_id_for_native_element(entry, sizeof(*entry))) {
1510 	case ZONE_ID_VM_MAP_ENTRY:
1511 	case ZONE_ID_INVALID: /* foreign elements are regular entries always */
1512 		break;
1513 	default:
1514 		zfree(vm_map_entry_reserved_zone, entry);
1515 		return;
1516 	}
1517 #endif /* HAVE_VM_MAP_RESERVED_ENTRY_ZONE */
1518 	zfree(vm_map_entry_zone, entry);
1519 }
1520 
1521 #if MACH_ASSERT
1522 static boolean_t first_free_check = FALSE;
1523 boolean_t
first_free_is_valid(vm_map_t map)1524 first_free_is_valid(
1525 	vm_map_t        map)
1526 {
1527 	if (!first_free_check) {
1528 		return TRUE;
1529 	}
1530 
1531 	return first_free_is_valid_store( map );
1532 }
1533 #endif /* MACH_ASSERT */
1534 
1535 
1536 #define vm_map_copy_entry_link(copy, after_where, entry)                \
1537 	_vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1538 
1539 #define vm_map_copy_entry_unlink(copy, entry)                           \
1540 	_vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1541 
1542 /*
1543  *	vm_map_destroy:
1544  *
1545  *	Actually destroy a map.
1546  */
1547 void
vm_map_destroy(vm_map_t map,int flags)1548 vm_map_destroy(
1549 	vm_map_t        map,
1550 	int             flags)
1551 {
1552 	vm_map_lock(map);
1553 
1554 	/* final cleanup: no need to unnest shared region */
1555 	flags |= VM_MAP_REMOVE_NO_UNNESTING;
1556 	/* final cleanup: ok to remove immutable mappings */
1557 	flags |= VM_MAP_REMOVE_IMMUTABLE;
1558 	/* final cleanup: allow gaps in range */
1559 	flags |= VM_MAP_REMOVE_GAPS_OK;
1560 
1561 	/* clean up regular map entries */
1562 	(void) vm_map_delete(map, map->min_offset, map->max_offset,
1563 	    flags, VM_MAP_NULL);
1564 	/* clean up leftover special mappings (commpage, GPU carveout, etc...) */
1565 #if     !defined(__arm__)
1566 	(void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1567 	    flags, VM_MAP_NULL);
1568 #endif /* !__arm__ */
1569 
1570 	vm_map_disable_hole_optimization(map);
1571 	vm_map_corpse_footprint_destroy(map);
1572 
1573 	vm_map_unlock(map);
1574 
1575 	assert(map->hdr.nentries == 0);
1576 
1577 	if (map->pmap) {
1578 		pmap_destroy(map->pmap);
1579 	}
1580 
1581 #if LOCKS_INDIRECT_ALLOW
1582 	if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1583 		/*
1584 		 * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1585 		 * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1586 		 * structure or kalloc'ed via lck_mtx_init.
1587 		 * An example is s_lock_ext within struct _vm_map.
1588 		 *
1589 		 * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1590 		 * can add another tag to detect embedded vs alloc'ed indirect external
1591 		 * mutexes but that'll be additional checks in the lock path and require
1592 		 * updating dependencies for the old vs new tag.
1593 		 *
1594 		 * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1595 		 * just when lock debugging is ON, we choose to forego explicitly destroying
1596 		 * the vm_map mutex and rw lock. Because the vm_map_lck_grp is
1597 		 * permanent, this has no serious side-effect.
1598 		 */
1599 	} else
1600 #endif /* LOCKS_INDIRECT_ALLOW */
1601 	{
1602 		lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1603 	}
1604 
1605 	zfree(vm_map_zone, map);
1606 }
1607 
1608 /*
1609  * Returns pid of the task with the largest number of VM map entries.
1610  * Used in the zone-map-exhaustion jetsam path.
1611  */
1612 pid_t
find_largest_process_vm_map_entries(void)1613 find_largest_process_vm_map_entries(void)
1614 {
1615 	pid_t victim_pid = -1;
1616 	int max_vm_map_entries = 0;
1617 	task_t task = TASK_NULL;
1618 	queue_head_t *task_list = &tasks;
1619 
1620 	lck_mtx_lock(&tasks_threads_lock);
1621 	queue_iterate(task_list, task, task_t, tasks) {
1622 		if (task == kernel_task || !task->active) {
1623 			continue;
1624 		}
1625 
1626 		vm_map_t task_map = task->map;
1627 		if (task_map != VM_MAP_NULL) {
1628 			int task_vm_map_entries = task_map->hdr.nentries;
1629 			if (task_vm_map_entries > max_vm_map_entries) {
1630 				max_vm_map_entries = task_vm_map_entries;
1631 				victim_pid = pid_from_task(task);
1632 			}
1633 		}
1634 	}
1635 	lck_mtx_unlock(&tasks_threads_lock);
1636 
1637 	printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1638 	return victim_pid;
1639 }
1640 
1641 
1642 /*
1643  *	vm_map_lookup_entry:	[ internal use only ]
1644  *
1645  *	Calls into the vm map store layer to find the map
1646  *	entry containing (or immediately preceding) the
1647  *	specified address in the given map; the entry is returned
1648  *	in the "entry" parameter.  The boolean
1649  *	result indicates whether the address is
1650  *	actually contained in the map.
1651  */
1652 boolean_t
vm_map_lookup_entry(vm_map_t map,vm_map_offset_t address,vm_map_entry_t * entry)1653 vm_map_lookup_entry(
1654 	vm_map_t        map,
1655 	vm_map_offset_t address,
1656 	vm_map_entry_t  *entry)         /* OUT */
1657 {
1658 #if CONFIG_KERNEL_TBI
1659 	if (VM_KERNEL_ADDRESS(address)) {
1660 		address = VM_KERNEL_STRIP_UPTR(address);
1661 	}
1662 #endif /* CONFIG_KERNEL_TBI */
1663 #if CONFIG_PROB_GZALLOC
1664 	if (map->pmap == kernel_pmap) {
1665 		assertf(!pgz_owned(address),
1666 		    "it is the responsibility of callers to unguard PGZ addresses");
1667 	}
1668 #endif /* CONFIG_PROB_GZALLOC */
1669 	return vm_map_store_lookup_entry( map, address, entry );
1670 }
1671 
1672 #if CONFIG_PROB_GZALLOC
1673 boolean_t
vm_map_lookup_entry_allow_pgz(vm_map_t map,vm_map_offset_t address,vm_map_entry_t * entry)1674 vm_map_lookup_entry_allow_pgz(
1675 	vm_map_t        map,
1676 	vm_map_offset_t address,
1677 	vm_map_entry_t  *entry)         /* OUT */
1678 {
1679 #if CONFIG_KERNEL_TBI
1680 	if (VM_KERNEL_ADDRESS(address)) {
1681 		address = VM_KERNEL_STRIP_UPTR(address);
1682 	}
1683 #endif /* CONFIG_KERNEL_TBI */
1684 	return vm_map_store_lookup_entry( map, address, entry );
1685 }
1686 #endif /* CONFIG_PROB_GZALLOC */
1687 
1688 
1689 /*
1690  *	Routine:	vm_map_find_space
1691  *	Purpose:
1692  *		Allocate a range in the specified virtual address map,
1693  *		returning the entry allocated for that range.
1694  *		Used by kmem_alloc, etc.
1695  *
1696  *		The map must be NOT be locked. It will be returned locked
1697  *		on KERN_SUCCESS, unlocked on failure.
1698  *
1699  *		If an entry is allocated, the object/offset fields
1700  *		are initialized to zero.
1701  */
1702 kern_return_t
vm_map_find_space(vm_map_t map,vm_map_offset_t * address,vm_map_size_t size,vm_map_offset_t mask,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,vm_map_entry_t * o_entry)1703 vm_map_find_space(
1704 	vm_map_t                map,
1705 	vm_map_offset_t         *address,       /* OUT */
1706 	vm_map_size_t           size,
1707 	vm_map_offset_t         mask,
1708 	vm_map_kernel_flags_t   vmk_flags,
1709 	vm_tag_t                tag,
1710 	vm_map_entry_t          *o_entry)       /* OUT */
1711 {
1712 	vm_map_entry_t          entry, new_entry, hole_entry;
1713 	vm_map_offset_t         start;
1714 	vm_map_offset_t         end;
1715 
1716 	if (size == 0) {
1717 		*address = 0;
1718 		return KERN_INVALID_ARGUMENT;
1719 	}
1720 
1721 	new_entry = vm_map_entry_create(map, FALSE);
1722 	vm_map_lock(map);
1723 
1724 	if (vmk_flags.vmkf_last_free) {
1725 		assert(!map->disable_vmentry_reuse);
1726 		/* TODO: Make backward lookup generic and support guard pages */
1727 		assert(!vmk_flags.vmkf_guard_after && !vmk_flags.vmkf_guard_before);
1728 		assert(VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map)));
1729 
1730 		/* Allocate space from end of map */
1731 		vm_map_store_find_last_free(map, &entry);
1732 
1733 		if (!entry) {
1734 			goto noSpace;
1735 		}
1736 
1737 		if (entry == vm_map_to_entry(map)) {
1738 			end = map->max_offset;
1739 		} else {
1740 			end = entry->vme_start;
1741 		}
1742 
1743 		while (TRUE) {
1744 			vm_map_entry_t prev;
1745 
1746 			start = end - size;
1747 
1748 			if ((start < map->min_offset) || end < start) {
1749 				goto noSpace;
1750 			}
1751 
1752 			prev = entry->vme_prev;
1753 			entry = prev;
1754 
1755 			if (prev == vm_map_to_entry(map)) {
1756 				break;
1757 			}
1758 
1759 			if (prev->vme_end <= start) {
1760 				break;
1761 			}
1762 
1763 			/*
1764 			 *	Didn't fit -- move to the next entry.
1765 			 */
1766 
1767 			end = entry->vme_start;
1768 		}
1769 	} else {
1770 		if (vmk_flags.vmkf_guard_after) {
1771 			/* account for the back guard page in the size */
1772 			size += VM_MAP_PAGE_SIZE(map);
1773 		}
1774 
1775 		/*
1776 		 *	Look for the first possible address; if there's already
1777 		 *	something at this address, we have to start after it.
1778 		 */
1779 
1780 		if (map->disable_vmentry_reuse == TRUE) {
1781 			VM_MAP_HIGHEST_ENTRY(map, entry, start);
1782 		} else {
1783 			if (map->holelistenabled) {
1784 				hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1785 
1786 				if (hole_entry == NULL) {
1787 					/*
1788 					 * No more space in the map?
1789 					 */
1790 					goto noSpace;
1791 				}
1792 
1793 				entry = hole_entry;
1794 				start = entry->vme_start;
1795 			} else {
1796 				assert(first_free_is_valid(map));
1797 				if ((entry = map->first_free) == vm_map_to_entry(map)) {
1798 					start = map->min_offset;
1799 				} else {
1800 					start = entry->vme_end;
1801 				}
1802 			}
1803 		}
1804 
1805 		/*
1806 		 *	In any case, the "entry" always precedes
1807 		 *	the proposed new region throughout the loop:
1808 		 */
1809 
1810 		while (TRUE) {
1811 			vm_map_entry_t  next;
1812 
1813 			/*
1814 			 *	Find the end of the proposed new region.
1815 			 *	Be sure we didn't go beyond the end, or
1816 			 *	wrap around the address.
1817 			 */
1818 
1819 			if (vmk_flags.vmkf_guard_before) {
1820 				/* reserve space for the front guard page */
1821 				start += VM_MAP_PAGE_SIZE(map);
1822 			}
1823 			end = ((start + mask) & ~mask);
1824 
1825 			if (end < start) {
1826 				goto noSpace;
1827 			}
1828 			start = end;
1829 			assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
1830 			end += size;
1831 			assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
1832 
1833 			if ((end > map->max_offset) || (end < start)) {
1834 				goto noSpace;
1835 			}
1836 
1837 			next = entry->vme_next;
1838 
1839 			if (map->holelistenabled) {
1840 				if (entry->vme_end >= end) {
1841 					break;
1842 				}
1843 			} else {
1844 				/*
1845 				 *	If there are no more entries, we must win.
1846 				 *
1847 				 *	OR
1848 				 *
1849 				 *	If there is another entry, it must be
1850 				 *	after the end of the potential new region.
1851 				 */
1852 
1853 				if (next == vm_map_to_entry(map)) {
1854 					break;
1855 				}
1856 
1857 				if (next->vme_start >= end) {
1858 					break;
1859 				}
1860 			}
1861 
1862 			/*
1863 			 *	Didn't fit -- move to the next entry.
1864 			 */
1865 
1866 			entry = next;
1867 
1868 			if (map->holelistenabled) {
1869 				if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
1870 					/*
1871 					 * Wrapped around
1872 					 */
1873 					goto noSpace;
1874 				}
1875 				start = entry->vme_start;
1876 			} else {
1877 				start = entry->vme_end;
1878 			}
1879 		}
1880 
1881 		if (vmk_flags.vmkf_guard_before) {
1882 			/* go back for the front guard page */
1883 			start -= VM_MAP_PAGE_SIZE(map);
1884 		}
1885 	}
1886 
1887 	if (map->holelistenabled) {
1888 		if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1889 			panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.", entry, (unsigned long long)entry->vme_start);
1890 		}
1891 	}
1892 
1893 	/*
1894 	 *	At this point,
1895 	 *		"start" and "end" should define the endpoints of the
1896 	 *			available new range, and
1897 	 *		"entry" should refer to the region before the new
1898 	 *			range, and
1899 	 *
1900 	 *		the map should be locked.
1901 	 */
1902 
1903 	*address = start;
1904 
1905 	assert(start < end);
1906 	new_entry->vme_start = start;
1907 	new_entry->vme_end = end;
1908 	assert(page_aligned(new_entry->vme_start));
1909 	assert(page_aligned(new_entry->vme_end));
1910 	assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1911 	    VM_MAP_PAGE_MASK(map)));
1912 	assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1913 	    VM_MAP_PAGE_MASK(map)));
1914 
1915 	new_entry->is_shared = FALSE;
1916 	new_entry->is_sub_map = FALSE;
1917 	new_entry->use_pmap = TRUE;
1918 	VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1919 	VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1920 
1921 	new_entry->needs_copy = FALSE;
1922 
1923 	new_entry->inheritance = VM_INHERIT_DEFAULT;
1924 	new_entry->protection = VM_PROT_DEFAULT;
1925 	new_entry->max_protection = VM_PROT_ALL;
1926 	new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1927 	new_entry->wired_count = 0;
1928 	new_entry->user_wired_count = 0;
1929 
1930 	new_entry->in_transition = FALSE;
1931 	new_entry->needs_wakeup = FALSE;
1932 	new_entry->no_cache = FALSE;
1933 	new_entry->permanent = FALSE;
1934 	new_entry->superpage_size = FALSE;
1935 	if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1936 		new_entry->map_aligned = TRUE;
1937 	} else {
1938 		new_entry->map_aligned = FALSE;
1939 	}
1940 
1941 	new_entry->used_for_jit = FALSE;
1942 	new_entry->pmap_cs_associated = FALSE;
1943 	new_entry->zero_wired_pages = FALSE;
1944 	new_entry->iokit_acct = FALSE;
1945 	new_entry->vme_resilient_codesign = FALSE;
1946 	new_entry->vme_resilient_media = FALSE;
1947 	if (vmk_flags.vmkf_atomic_entry) {
1948 		new_entry->vme_atomic = TRUE;
1949 	} else {
1950 		new_entry->vme_atomic = FALSE;
1951 	}
1952 
1953 	VME_ALIAS_SET(new_entry, tag);
1954 
1955 	/*
1956 	 *	Insert the new entry into the list
1957 	 */
1958 
1959 	vm_map_store_entry_link(map, entry, new_entry, VM_MAP_KERNEL_FLAGS_NONE);
1960 
1961 	map->size += size;
1962 
1963 	/*
1964 	 *	Update the lookup hint
1965 	 */
1966 	SAVE_HINT_MAP_WRITE(map, new_entry);
1967 
1968 	*o_entry = new_entry;
1969 	return KERN_SUCCESS;
1970 
1971 noSpace:
1972 
1973 	vm_map_entry_dispose(map, new_entry);
1974 	vm_map_unlock(map);
1975 	return KERN_NO_SPACE;
1976 }
1977 
1978 int vm_map_pmap_enter_print = FALSE;
1979 int vm_map_pmap_enter_enable = FALSE;
1980 
1981 /*
1982  *	Routine:	vm_map_pmap_enter [internal only]
1983  *
1984  *	Description:
1985  *		Force pages from the specified object to be entered into
1986  *		the pmap at the specified address if they are present.
1987  *		As soon as a page not found in the object the scan ends.
1988  *
1989  *	Returns:
1990  *		Nothing.
1991  *
1992  *	In/out conditions:
1993  *		The source map should not be locked on entry.
1994  */
1995 __unused static void
vm_map_pmap_enter(vm_map_t map,vm_map_offset_t addr,vm_map_offset_t end_addr,vm_object_t object,vm_object_offset_t offset,vm_prot_t protection)1996 vm_map_pmap_enter(
1997 	vm_map_t                map,
1998 	vm_map_offset_t         addr,
1999 	vm_map_offset_t         end_addr,
2000 	vm_object_t             object,
2001 	vm_object_offset_t      offset,
2002 	vm_prot_t               protection)
2003 {
2004 	int                     type_of_fault;
2005 	kern_return_t           kr;
2006 	struct vm_object_fault_info fault_info = {};
2007 
2008 	if (map->pmap == 0) {
2009 		return;
2010 	}
2011 
2012 	assert(VM_MAP_PAGE_SHIFT(map) == PAGE_SHIFT);
2013 
2014 	while (addr < end_addr) {
2015 		vm_page_t       m;
2016 
2017 
2018 		/*
2019 		 * TODO:
2020 		 * From vm_map_enter(), we come into this function without the map
2021 		 * lock held or the object lock held.
2022 		 * We haven't taken a reference on the object either.
2023 		 * We should do a proper lookup on the map to make sure
2024 		 * that things are sane before we go locking objects that
2025 		 * could have been deallocated from under us.
2026 		 */
2027 
2028 		vm_object_lock(object);
2029 
2030 		m = vm_page_lookup(object, offset);
2031 
2032 		if (m == VM_PAGE_NULL || m->vmp_busy || m->vmp_fictitious ||
2033 		    (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_absent))) {
2034 			vm_object_unlock(object);
2035 			return;
2036 		}
2037 
2038 		if (vm_map_pmap_enter_print) {
2039 			printf("vm_map_pmap_enter:");
2040 			printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
2041 			    map, (unsigned long long)addr, object, (unsigned long long)offset);
2042 		}
2043 		type_of_fault = DBG_CACHE_HIT_FAULT;
2044 		kr = vm_fault_enter(m, map->pmap,
2045 		    addr,
2046 		    PAGE_SIZE, 0,
2047 		    protection, protection,
2048 		    VM_PAGE_WIRED(m),
2049 		    FALSE,                 /* change_wiring */
2050 		    VM_KERN_MEMORY_NONE,                 /* tag - not wiring */
2051 		    &fault_info,
2052 		    NULL,                  /* need_retry */
2053 		    &type_of_fault);
2054 
2055 		vm_object_unlock(object);
2056 
2057 		offset += PAGE_SIZE_64;
2058 		addr += PAGE_SIZE;
2059 	}
2060 }
2061 
2062 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
2063 kern_return_t
vm_map_random_address_for_size(vm_map_t map,vm_map_offset_t * address,vm_map_size_t size)2064 vm_map_random_address_for_size(
2065 	vm_map_t        map,
2066 	vm_map_offset_t *address,
2067 	vm_map_size_t   size)
2068 {
2069 	kern_return_t   kr = KERN_SUCCESS;
2070 	int             tries = 0;
2071 	vm_map_offset_t random_addr = 0;
2072 	vm_map_offset_t hole_end;
2073 
2074 	vm_map_entry_t  next_entry = VM_MAP_ENTRY_NULL;
2075 	vm_map_entry_t  prev_entry = VM_MAP_ENTRY_NULL;
2076 	vm_map_size_t   vm_hole_size = 0;
2077 	vm_map_size_t   addr_space_size;
2078 
2079 	addr_space_size = vm_map_max(map) - vm_map_min(map);
2080 
2081 	assert(VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map)));
2082 
2083 	while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2084 		if (startup_phase < STARTUP_SUB_ZALLOC) {
2085 			random_addr = (vm_map_offset_t)early_random();
2086 		} else {
2087 			random_addr = (vm_map_offset_t)random();
2088 		}
2089 		random_addr <<= VM_MAP_PAGE_SHIFT(map);
2090 		random_addr = vm_map_trunc_page(
2091 			vm_map_min(map) + (random_addr % addr_space_size),
2092 			VM_MAP_PAGE_MASK(map));
2093 
2094 #if CONFIG_PROB_GZALLOC
2095 		if (map->pmap == kernel_pmap && pgz_owned(random_addr)) {
2096 			continue;
2097 		}
2098 #endif /* CONFIG_PROB_GZALLOC */
2099 
2100 		if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
2101 			if (prev_entry == vm_map_to_entry(map)) {
2102 				next_entry = vm_map_first_entry(map);
2103 			} else {
2104 				next_entry = prev_entry->vme_next;
2105 			}
2106 			if (next_entry == vm_map_to_entry(map)) {
2107 				hole_end = vm_map_max(map);
2108 			} else {
2109 				hole_end = next_entry->vme_start;
2110 			}
2111 			vm_hole_size = hole_end - random_addr;
2112 			if (vm_hole_size >= size) {
2113 				*address = random_addr;
2114 				break;
2115 			}
2116 		}
2117 		tries++;
2118 	}
2119 
2120 	if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2121 		kr = KERN_NO_SPACE;
2122 	}
2123 	return kr;
2124 }
2125 
2126 static boolean_t
vm_memory_malloc_no_cow(int alias)2127 vm_memory_malloc_no_cow(
2128 	int alias)
2129 {
2130 	uint64_t alias_mask;
2131 
2132 	if (alias > 63) {
2133 		return FALSE;
2134 	}
2135 
2136 	alias_mask = 1ULL << alias;
2137 	if (alias_mask & vm_memory_malloc_no_cow_mask) {
2138 		return TRUE;
2139 	}
2140 	return FALSE;
2141 }
2142 
2143 uint64_t vm_map_enter_RLIMIT_AS_count = 0;
2144 uint64_t vm_map_enter_RLIMIT_DATA_count = 0;
2145 /*
2146  *	Routine:	vm_map_enter
2147  *
2148  *	Description:
2149  *		Allocate a range in the specified virtual address map.
2150  *		The resulting range will refer to memory defined by
2151  *		the given memory object and offset into that object.
2152  *
2153  *		Arguments are as defined in the vm_map call.
2154  */
2155 static unsigned int vm_map_enter_restore_successes = 0;
2156 static unsigned int vm_map_enter_restore_failures = 0;
2157 kern_return_t
vm_map_enter(vm_map_t map,vm_map_offset_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t alias,vm_object_t object,vm_object_offset_t offset,boolean_t needs_copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)2158 vm_map_enter(
2159 	vm_map_t                map,
2160 	vm_map_offset_t         *address,       /* IN/OUT */
2161 	vm_map_size_t           size,
2162 	vm_map_offset_t         mask,
2163 	int                     flags,
2164 	vm_map_kernel_flags_t   vmk_flags,
2165 	vm_tag_t                alias,
2166 	vm_object_t             object,
2167 	vm_object_offset_t      offset,
2168 	boolean_t               needs_copy,
2169 	vm_prot_t               cur_protection,
2170 	vm_prot_t               max_protection,
2171 	vm_inherit_t            inheritance)
2172 {
2173 	vm_map_entry_t          entry, new_entry;
2174 	vm_map_offset_t         start, tmp_start, tmp_offset;
2175 	vm_map_offset_t         end, tmp_end;
2176 	vm_map_offset_t         tmp2_start, tmp2_end;
2177 	vm_map_offset_t         desired_empty_end;
2178 	vm_map_offset_t         step;
2179 	kern_return_t           result = KERN_SUCCESS;
2180 	vm_map_t                zap_old_map = VM_MAP_NULL;
2181 	vm_map_t                zap_new_map = VM_MAP_NULL;
2182 	boolean_t               map_locked = FALSE;
2183 	boolean_t               pmap_empty = TRUE;
2184 	boolean_t               new_mapping_established = FALSE;
2185 	boolean_t               keep_map_locked = vmk_flags.vmkf_keep_map_locked;
2186 	boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
2187 	boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
2188 	boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2189 	boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
2190 	boolean_t               is_submap = vmk_flags.vmkf_submap;
2191 	boolean_t               permanent = (((flags & VM_FLAGS_PERMANENT) != 0) || vmk_flags.vmkf_permanent);
2192 	boolean_t               no_copy_on_read = vmk_flags.vmkf_no_copy_on_read;
2193 	boolean_t               entry_for_jit = vmk_flags.vmkf_map_jit;
2194 	boolean_t               iokit_acct = vmk_flags.vmkf_iokit_acct;
2195 	boolean_t               translated_allow_execute = vmk_flags.vmkf_translated_allow_execute;
2196 	boolean_t               resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
2197 	boolean_t               resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
2198 	boolean_t               random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
2199 	unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
2200 	vm_tag_t                user_alias;
2201 	vm_map_offset_t         effective_min_offset, effective_max_offset;
2202 	kern_return_t           kr;
2203 	boolean_t               clear_map_aligned = FALSE;
2204 	vm_map_entry_t          hole_entry;
2205 	vm_map_size_t           chunk_size = 0;
2206 	vm_object_t             caller_object;
2207 
2208 	caller_object = object;
2209 
2210 	assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2211 
2212 	if (flags & VM_FLAGS_4GB_CHUNK) {
2213 #if defined(__LP64__)
2214 		chunk_size = (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2215 #else /* __LP64__ */
2216 		chunk_size = ANON_CHUNK_SIZE;
2217 #endif /* __LP64__ */
2218 	} else {
2219 		chunk_size = ANON_CHUNK_SIZE;
2220 	}
2221 
2222 	if (superpage_size) {
2223 		switch (superpage_size) {
2224 			/*
2225 			 * Note that the current implementation only supports
2226 			 * a single size for superpages, SUPERPAGE_SIZE, per
2227 			 * architecture. As soon as more sizes are supposed
2228 			 * to be supported, SUPERPAGE_SIZE has to be replaced
2229 			 * with a lookup of the size depending on superpage_size.
2230 			 */
2231 #ifdef __x86_64__
2232 		case SUPERPAGE_SIZE_ANY:
2233 			/* handle it like 2 MB and round up to page size */
2234 			size = (size + 2 * 1024 * 1024 - 1) & ~(2 * 1024 * 1024 - 1);
2235 			OS_FALLTHROUGH;
2236 		case SUPERPAGE_SIZE_2MB:
2237 			break;
2238 #endif
2239 		default:
2240 			return KERN_INVALID_ARGUMENT;
2241 		}
2242 		mask = SUPERPAGE_SIZE - 1;
2243 		if (size & (SUPERPAGE_SIZE - 1)) {
2244 			return KERN_INVALID_ARGUMENT;
2245 		}
2246 		inheritance = VM_INHERIT_NONE;  /* fork() children won't inherit superpages */
2247 	}
2248 
2249 
2250 	if ((cur_protection & VM_PROT_WRITE) &&
2251 	    (cur_protection & VM_PROT_EXECUTE) &&
2252 #if XNU_TARGET_OS_OSX
2253 	    map->pmap != kernel_pmap &&
2254 	    (cs_process_global_enforcement() ||
2255 	    (vmk_flags.vmkf_cs_enforcement_override
2256 	    ? vmk_flags.vmkf_cs_enforcement
2257 	    : (vm_map_cs_enforcement(map)
2258 #if __arm64__
2259 	    || !VM_MAP_IS_EXOTIC(map)
2260 #endif /* __arm64__ */
2261 	    ))) &&
2262 #endif /* XNU_TARGET_OS_OSX */
2263 	    (VM_MAP_POLICY_WX_FAIL(map) ||
2264 	    VM_MAP_POLICY_WX_STRIP_X(map)) &&
2265 	    !entry_for_jit) {
2266 		boolean_t vm_protect_wx_fail = VM_MAP_POLICY_WX_FAIL(map);
2267 
2268 		DTRACE_VM3(cs_wx,
2269 		    uint64_t, 0,
2270 		    uint64_t, 0,
2271 		    vm_prot_t, cur_protection);
2272 		printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. %s\n",
2273 		    proc_selfpid(),
2274 		    (current_task()->bsd_info
2275 		    ? proc_name_address(current_task()->bsd_info)
2276 		    : "?"),
2277 		    __FUNCTION__,
2278 		    (vm_protect_wx_fail ? "failing" : "turning off execute"));
2279 		cur_protection &= ~VM_PROT_EXECUTE;
2280 		if (vm_protect_wx_fail) {
2281 			return KERN_PROTECTION_FAILURE;
2282 		}
2283 	}
2284 
2285 	/*
2286 	 * If the task has requested executable lockdown,
2287 	 * deny any new executable mapping.
2288 	 */
2289 	if (map->map_disallow_new_exec == TRUE) {
2290 		if (cur_protection & VM_PROT_EXECUTE) {
2291 			return KERN_PROTECTION_FAILURE;
2292 		}
2293 	}
2294 
2295 	if (resilient_codesign) {
2296 		assert(!is_submap);
2297 		int reject_prot = (needs_copy ? VM_PROT_ALLEXEC : (VM_PROT_WRITE | VM_PROT_ALLEXEC));
2298 		if ((cur_protection | max_protection) & reject_prot) {
2299 			return KERN_PROTECTION_FAILURE;
2300 		}
2301 	}
2302 
2303 	if (resilient_media) {
2304 		assert(!is_submap);
2305 //		assert(!needs_copy);
2306 		if (object != VM_OBJECT_NULL &&
2307 		    !object->internal) {
2308 			/*
2309 			 * This mapping is directly backed by an external
2310 			 * memory manager (e.g. a vnode pager for a file):
2311 			 * we would not have any safe place to inject
2312 			 * a zero-filled page if an actual page is not
2313 			 * available, without possibly impacting the actual
2314 			 * contents of the mapped object (e.g. the file),
2315 			 * so we can't provide any media resiliency here.
2316 			 */
2317 			return KERN_INVALID_ARGUMENT;
2318 		}
2319 	}
2320 
2321 	if (is_submap) {
2322 		if (purgable) {
2323 			/* submaps can not be purgeable */
2324 			return KERN_INVALID_ARGUMENT;
2325 		}
2326 		if (object == VM_OBJECT_NULL) {
2327 			/* submaps can not be created lazily */
2328 			return KERN_INVALID_ARGUMENT;
2329 		}
2330 	}
2331 	if (vmk_flags.vmkf_already) {
2332 		/*
2333 		 * VM_FLAGS_ALREADY says that it's OK if the same mapping
2334 		 * is already present.  For it to be meaningul, the requested
2335 		 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2336 		 * we shouldn't try and remove what was mapped there first
2337 		 * (!VM_FLAGS_OVERWRITE).
2338 		 */
2339 		if ((flags & VM_FLAGS_ANYWHERE) ||
2340 		    (flags & VM_FLAGS_OVERWRITE)) {
2341 			return KERN_INVALID_ARGUMENT;
2342 		}
2343 	}
2344 
2345 	effective_min_offset = map->min_offset;
2346 
2347 	if (vmk_flags.vmkf_beyond_max) {
2348 		/*
2349 		 * Allow an insertion beyond the map's max offset.
2350 		 */
2351 #if     !defined(__arm__)
2352 		if (vm_map_is_64bit(map)) {
2353 			effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
2354 		} else
2355 #endif  /* __arm__ */
2356 		effective_max_offset = 0x00000000FFFFF000ULL;
2357 	} else {
2358 #if XNU_TARGET_OS_OSX
2359 		if (__improbable(vmk_flags.vmkf_32bit_map_va)) {
2360 			effective_max_offset = MIN(map->max_offset, 0x00000000FFFFF000ULL);
2361 		} else {
2362 			effective_max_offset = map->max_offset;
2363 		}
2364 #else /* XNU_TARGET_OS_OSX */
2365 		effective_max_offset = map->max_offset;
2366 #endif /* XNU_TARGET_OS_OSX */
2367 	}
2368 
2369 	if (size == 0 ||
2370 	    (offset & MIN(VM_MAP_PAGE_MASK(map), PAGE_MASK_64)) != 0) {
2371 		*address = 0;
2372 		return KERN_INVALID_ARGUMENT;
2373 	}
2374 
2375 	if (map->pmap == kernel_pmap) {
2376 		user_alias = VM_KERN_MEMORY_NONE;
2377 	} else {
2378 		user_alias = alias;
2379 	}
2380 
2381 	if (user_alias == VM_MEMORY_MALLOC_MEDIUM) {
2382 		chunk_size = MALLOC_MEDIUM_CHUNK_SIZE;
2383 	}
2384 
2385 #define RETURN(value)   { result = value; goto BailOut; }
2386 
2387 	assertf(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK), "0x%llx", (uint64_t)*address);
2388 	assertf(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK), "0x%llx", (uint64_t)size);
2389 	if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK) {
2390 		assertf(page_aligned(*address), "0x%llx", (uint64_t)*address);
2391 		assertf(page_aligned(size), "0x%llx", (uint64_t)size);
2392 	}
2393 
2394 	if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK &&
2395 	    !VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
2396 		/*
2397 		 * In most cases, the caller rounds the size up to the
2398 		 * map's page size.
2399 		 * If we get a size that is explicitly not map-aligned here,
2400 		 * we'll have to respect the caller's wish and mark the
2401 		 * mapping as "not map-aligned" to avoid tripping the
2402 		 * map alignment checks later.
2403 		 */
2404 		clear_map_aligned = TRUE;
2405 	}
2406 	if (!anywhere &&
2407 	    VM_MAP_PAGE_MASK(map) >= PAGE_MASK &&
2408 	    !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2409 		/*
2410 		 * We've been asked to map at a fixed address and that
2411 		 * address is not aligned to the map's specific alignment.
2412 		 * The caller should know what it's doing (i.e. most likely
2413 		 * mapping some fragmented copy map, transferring memory from
2414 		 * a VM map with a different alignment), so clear map_aligned
2415 		 * for this new VM map entry and proceed.
2416 		 */
2417 		clear_map_aligned = TRUE;
2418 	}
2419 
2420 	/*
2421 	 * Only zero-fill objects are allowed to be purgable.
2422 	 * LP64todo - limit purgable objects to 32-bits for now
2423 	 */
2424 	if (purgable &&
2425 	    (offset != 0 ||
2426 	    (object != VM_OBJECT_NULL &&
2427 	    (object->vo_size != size ||
2428 	    object->purgable == VM_PURGABLE_DENY))
2429 	    || size > ANON_MAX_SIZE)) { /* LP64todo: remove when dp capable */
2430 		return KERN_INVALID_ARGUMENT;
2431 	}
2432 
2433 	if (!anywhere && overwrite) {
2434 		/*
2435 		 * Create a temporary VM map to hold the old mappings in the
2436 		 * affected area while we create the new one.
2437 		 * This avoids releasing the VM map lock in
2438 		 * vm_map_entry_delete() and allows atomicity
2439 		 * when we want to replace some mappings with a new one.
2440 		 * It also allows us to restore the old VM mappings if the
2441 		 * new mapping fails.
2442 		 */
2443 		zap_old_map = vm_map_create_options(PMAP_NULL,
2444 		    *address,
2445 		    *address + size,
2446 		    VM_MAP_CREATE_ZAP_OPTIONS(map));
2447 		vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
2448 	}
2449 
2450 StartAgain:;
2451 
2452 	start = *address;
2453 
2454 	if (anywhere) {
2455 		vm_map_lock(map);
2456 		map_locked = TRUE;
2457 
2458 		if (entry_for_jit) {
2459 			if (map->jit_entry_exists &&
2460 			    !VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map)) {
2461 				result = KERN_INVALID_ARGUMENT;
2462 				goto BailOut;
2463 			}
2464 			if (VM_MAP_POLICY_ALLOW_JIT_RANDOM_ADDRESS(map)) {
2465 				random_address = TRUE;
2466 			}
2467 		}
2468 
2469 		if (random_address) {
2470 			/*
2471 			 * Get a random start address.
2472 			 */
2473 			result = vm_map_random_address_for_size(map, address, size);
2474 			if (result != KERN_SUCCESS) {
2475 				goto BailOut;
2476 			}
2477 			start = *address;
2478 		}
2479 #if XNU_TARGET_OS_OSX
2480 		else if ((start == 0 || start == vm_map_min(map)) &&
2481 		    !map->disable_vmentry_reuse &&
2482 		    map->vmmap_high_start != 0) {
2483 			start = map->vmmap_high_start;
2484 		}
2485 #endif /* XNU_TARGET_OS_OSX */
2486 
2487 
2488 		/*
2489 		 *	Calculate the first possible address.
2490 		 */
2491 
2492 		if (start < effective_min_offset) {
2493 			start = effective_min_offset;
2494 		}
2495 		if (start > effective_max_offset) {
2496 			RETURN(KERN_NO_SPACE);
2497 		}
2498 
2499 		/*
2500 		 *	Look for the first possible address;
2501 		 *	if there's already something at this
2502 		 *	address, we have to start after it.
2503 		 */
2504 
2505 		if (map->disable_vmentry_reuse == TRUE) {
2506 			VM_MAP_HIGHEST_ENTRY(map, entry, start);
2507 		} else {
2508 			if (map->holelistenabled) {
2509 				hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
2510 
2511 				if (hole_entry == NULL) {
2512 					/*
2513 					 * No more space in the map?
2514 					 */
2515 					result = KERN_NO_SPACE;
2516 					goto BailOut;
2517 				} else {
2518 					boolean_t found_hole = FALSE;
2519 
2520 					do {
2521 						if (hole_entry->vme_start >= start) {
2522 							start = hole_entry->vme_start;
2523 							found_hole = TRUE;
2524 							break;
2525 						}
2526 
2527 						if (hole_entry->vme_end > start) {
2528 							found_hole = TRUE;
2529 							break;
2530 						}
2531 						hole_entry = hole_entry->vme_next;
2532 					} while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
2533 
2534 					if (found_hole == FALSE) {
2535 						result = KERN_NO_SPACE;
2536 						goto BailOut;
2537 					}
2538 
2539 					entry = hole_entry;
2540 
2541 					if (start == 0) {
2542 						start += PAGE_SIZE_64;
2543 					}
2544 				}
2545 			} else {
2546 				assert(first_free_is_valid(map));
2547 
2548 				entry = map->first_free;
2549 
2550 				if (entry == vm_map_to_entry(map)) {
2551 					entry = NULL;
2552 				} else {
2553 					if (entry->vme_next == vm_map_to_entry(map)) {
2554 						/*
2555 						 * Hole at the end of the map.
2556 						 */
2557 						entry = NULL;
2558 					} else {
2559 						if (start < (entry->vme_next)->vme_start) {
2560 							start = entry->vme_end;
2561 							start = vm_map_round_page(start,
2562 							    VM_MAP_PAGE_MASK(map));
2563 						} else {
2564 							/*
2565 							 * Need to do a lookup.
2566 							 */
2567 							entry = NULL;
2568 						}
2569 					}
2570 				}
2571 
2572 				if (entry == NULL) {
2573 					vm_map_entry_t  tmp_entry;
2574 					if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2575 						assert(!entry_for_jit);
2576 						start = tmp_entry->vme_end;
2577 						start = vm_map_round_page(start,
2578 						    VM_MAP_PAGE_MASK(map));
2579 					}
2580 					entry = tmp_entry;
2581 				}
2582 			}
2583 		}
2584 
2585 		/*
2586 		 *	In any case, the "entry" always precedes
2587 		 *	the proposed new region throughout the
2588 		 *	loop:
2589 		 */
2590 
2591 		while (TRUE) {
2592 			vm_map_entry_t  next;
2593 
2594 			/*
2595 			 *	Find the end of the proposed new region.
2596 			 *	Be sure we didn't go beyond the end, or
2597 			 *	wrap around the address.
2598 			 */
2599 
2600 			end = ((start + mask) & ~mask);
2601 			end = vm_map_round_page(end,
2602 			    VM_MAP_PAGE_MASK(map));
2603 			if (end < start) {
2604 				RETURN(KERN_NO_SPACE);
2605 			}
2606 			start = end;
2607 			assert(VM_MAP_PAGE_ALIGNED(start,
2608 			    VM_MAP_PAGE_MASK(map)));
2609 			end += size;
2610 
2611 			/* We want an entire page of empty space, but don't increase the allocation size. */
2612 			desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
2613 
2614 			if ((desired_empty_end > effective_max_offset) || (desired_empty_end < start)) {
2615 				if (map->wait_for_space) {
2616 					assert(!keep_map_locked);
2617 					if (size <= (effective_max_offset -
2618 					    effective_min_offset)) {
2619 						assert_wait((event_t)map,
2620 						    THREAD_ABORTSAFE);
2621 						vm_map_unlock(map);
2622 						map_locked = FALSE;
2623 						thread_block(THREAD_CONTINUE_NULL);
2624 						goto StartAgain;
2625 					}
2626 				}
2627 				RETURN(KERN_NO_SPACE);
2628 			}
2629 
2630 			next = entry->vme_next;
2631 
2632 			if (map->holelistenabled) {
2633 				if (entry->vme_end >= desired_empty_end) {
2634 					break;
2635 				}
2636 			} else {
2637 				/*
2638 				 *	If there are no more entries, we must win.
2639 				 *
2640 				 *	OR
2641 				 *
2642 				 *	If there is another entry, it must be
2643 				 *	after the end of the potential new region.
2644 				 */
2645 
2646 				if (next == vm_map_to_entry(map)) {
2647 					break;
2648 				}
2649 
2650 				if (next->vme_start >= desired_empty_end) {
2651 					break;
2652 				}
2653 			}
2654 
2655 			/*
2656 			 *	Didn't fit -- move to the next entry.
2657 			 */
2658 
2659 			entry = next;
2660 
2661 			if (map->holelistenabled) {
2662 				if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
2663 					/*
2664 					 * Wrapped around
2665 					 */
2666 					result = KERN_NO_SPACE;
2667 					goto BailOut;
2668 				}
2669 				start = entry->vme_start;
2670 			} else {
2671 				start = entry->vme_end;
2672 			}
2673 
2674 			start = vm_map_round_page(start,
2675 			    VM_MAP_PAGE_MASK(map));
2676 		}
2677 
2678 		if (map->holelistenabled) {
2679 			if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2680 				panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.", entry, (unsigned long long)entry->vme_start);
2681 			}
2682 		}
2683 
2684 		*address = start;
2685 		assert(VM_MAP_PAGE_ALIGNED(*address,
2686 		    VM_MAP_PAGE_MASK(map)));
2687 	} else {
2688 		if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT &&
2689 		    !overwrite &&
2690 		    user_alias == VM_MEMORY_REALLOC) {
2691 			/*
2692 			 * Force realloc() to switch to a new allocation,
2693 			 * to prevent 4k-fragmented virtual ranges.
2694 			 */
2695 //			DEBUG4K_ERROR("no realloc in place");
2696 			return KERN_NO_SPACE;
2697 		}
2698 
2699 		/*
2700 		 *	Verify that:
2701 		 *		the address doesn't itself violate
2702 		 *		the mask requirement.
2703 		 */
2704 
2705 		vm_map_lock(map);
2706 		map_locked = TRUE;
2707 		if ((start & mask) != 0) {
2708 			RETURN(KERN_NO_SPACE);
2709 		}
2710 
2711 		/*
2712 		 *	...	the address is within bounds
2713 		 */
2714 
2715 		end = start + size;
2716 
2717 		if ((start < effective_min_offset) ||
2718 		    (end > effective_max_offset) ||
2719 		    (start >= end)) {
2720 			RETURN(KERN_INVALID_ADDRESS);
2721 		}
2722 
2723 		if (overwrite && zap_old_map != VM_MAP_NULL) {
2724 			int remove_flags;
2725 			/*
2726 			 * Fixed mapping and "overwrite" flag: attempt to
2727 			 * remove all existing mappings in the specified
2728 			 * address range, saving them in our "zap_old_map".
2729 			 */
2730 			remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES;
2731 			remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
2732 			if (vmk_flags.vmkf_overwrite_immutable) {
2733 				/* we can overwrite immutable mappings */
2734 				remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2735 			}
2736 			(void) vm_map_delete(map, start, end,
2737 			    remove_flags,
2738 			    zap_old_map);
2739 		}
2740 
2741 		/*
2742 		 *	...	the starting address isn't allocated
2743 		 */
2744 
2745 		if (vm_map_lookup_entry(map, start, &entry)) {
2746 			if (!(vmk_flags.vmkf_already)) {
2747 				RETURN(KERN_NO_SPACE);
2748 			}
2749 			/*
2750 			 * Check if what's already there is what we want.
2751 			 */
2752 			tmp_start = start;
2753 			tmp_offset = offset;
2754 			if (entry->vme_start < start) {
2755 				tmp_start -= start - entry->vme_start;
2756 				tmp_offset -= start - entry->vme_start;
2757 			}
2758 			for (; entry->vme_start < end;
2759 			    entry = entry->vme_next) {
2760 				/*
2761 				 * Check if the mapping's attributes
2762 				 * match the existing map entry.
2763 				 */
2764 				if (entry == vm_map_to_entry(map) ||
2765 				    entry->vme_start != tmp_start ||
2766 				    entry->is_sub_map != is_submap ||
2767 				    VME_OFFSET(entry) != tmp_offset ||
2768 				    entry->needs_copy != needs_copy ||
2769 				    entry->protection != cur_protection ||
2770 				    entry->max_protection != max_protection ||
2771 				    entry->inheritance != inheritance ||
2772 				    entry->iokit_acct != iokit_acct ||
2773 				    VME_ALIAS(entry) != alias) {
2774 					/* not the same mapping ! */
2775 					RETURN(KERN_NO_SPACE);
2776 				}
2777 				/*
2778 				 * Check if the same object is being mapped.
2779 				 */
2780 				if (is_submap) {
2781 					if (VME_SUBMAP(entry) !=
2782 					    (vm_map_t) object) {
2783 						/* not the same submap */
2784 						RETURN(KERN_NO_SPACE);
2785 					}
2786 				} else {
2787 					if (VME_OBJECT(entry) != object) {
2788 						/* not the same VM object... */
2789 						vm_object_t obj2;
2790 
2791 						obj2 = VME_OBJECT(entry);
2792 						if ((obj2 == VM_OBJECT_NULL ||
2793 						    obj2->internal) &&
2794 						    (object == VM_OBJECT_NULL ||
2795 						    object->internal)) {
2796 							/*
2797 							 * ... but both are
2798 							 * anonymous memory,
2799 							 * so equivalent.
2800 							 */
2801 						} else {
2802 							RETURN(KERN_NO_SPACE);
2803 						}
2804 					}
2805 				}
2806 
2807 				tmp_offset += entry->vme_end - entry->vme_start;
2808 				tmp_start += entry->vme_end - entry->vme_start;
2809 				if (entry->vme_end >= end) {
2810 					/* reached the end of our mapping */
2811 					break;
2812 				}
2813 			}
2814 			/* it all matches:  let's use what's already there ! */
2815 			RETURN(KERN_MEMORY_PRESENT);
2816 		}
2817 
2818 		/*
2819 		 *	...	the next region doesn't overlap the
2820 		 *		end point.
2821 		 */
2822 
2823 		if ((entry->vme_next != vm_map_to_entry(map)) &&
2824 		    (entry->vme_next->vme_start < end)) {
2825 			RETURN(KERN_NO_SPACE);
2826 		}
2827 	}
2828 
2829 	/*
2830 	 *	At this point,
2831 	 *		"start" and "end" should define the endpoints of the
2832 	 *			available new range, and
2833 	 *		"entry" should refer to the region before the new
2834 	 *			range, and
2835 	 *
2836 	 *		the map should be locked.
2837 	 */
2838 
2839 	/*
2840 	 *	See whether we can avoid creating a new entry (and object) by
2841 	 *	extending one of our neighbors.  [So far, we only attempt to
2842 	 *	extend from below.]  Note that we can never extend/join
2843 	 *	purgable objects because they need to remain distinct
2844 	 *	entities in order to implement their "volatile object"
2845 	 *	semantics.
2846 	 */
2847 
2848 	if (purgable ||
2849 	    entry_for_jit ||
2850 	    vm_memory_malloc_no_cow(user_alias)) {
2851 		if (object == VM_OBJECT_NULL) {
2852 			object = vm_object_allocate(size);
2853 			object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2854 			object->true_share = FALSE;
2855 			if (purgable) {
2856 				task_t owner;
2857 				object->purgable = VM_PURGABLE_NONVOLATILE;
2858 				if (map->pmap == kernel_pmap) {
2859 					/*
2860 					 * Purgeable mappings made in a kernel
2861 					 * map are "owned" by the kernel itself
2862 					 * rather than the current user task
2863 					 * because they're likely to be used by
2864 					 * more than this user task (see
2865 					 * execargs_purgeable_allocate(), for
2866 					 * example).
2867 					 */
2868 					owner = kernel_task;
2869 				} else {
2870 					owner = current_task();
2871 				}
2872 				assert(object->vo_owner == NULL);
2873 				assert(object->resident_page_count == 0);
2874 				assert(object->wired_page_count == 0);
2875 				vm_object_lock(object);
2876 				vm_purgeable_nonvolatile_enqueue(object, owner);
2877 				vm_object_unlock(object);
2878 			}
2879 			offset = (vm_object_offset_t)0;
2880 		}
2881 	} else if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
2882 		/* no coalescing if address space uses sub-pages */
2883 	} else if ((is_submap == FALSE) &&
2884 	    (object == VM_OBJECT_NULL) &&
2885 	    (entry != vm_map_to_entry(map)) &&
2886 	    (entry->vme_end == start) &&
2887 	    (!entry->is_shared) &&
2888 	    (!entry->is_sub_map) &&
2889 	    (!entry->in_transition) &&
2890 	    (!entry->needs_wakeup) &&
2891 	    (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2892 	    (entry->protection == cur_protection) &&
2893 	    (entry->max_protection == max_protection) &&
2894 	    (entry->inheritance == inheritance) &&
2895 	    ((user_alias == VM_MEMORY_REALLOC) ||
2896 	    (VME_ALIAS(entry) == alias)) &&
2897 	    (entry->no_cache == no_cache) &&
2898 	    (entry->permanent == permanent) &&
2899 	    /* no coalescing for immutable executable mappings */
2900 	    !((entry->protection & VM_PROT_EXECUTE) &&
2901 	    entry->permanent) &&
2902 	    (!entry->superpage_size && !superpage_size) &&
2903 	    /*
2904 	     * No coalescing if not map-aligned, to avoid propagating
2905 	     * that condition any further than needed:
2906 	     */
2907 	    (!entry->map_aligned || !clear_map_aligned) &&
2908 	    (!entry->zero_wired_pages) &&
2909 	    (!entry->used_for_jit && !entry_for_jit) &&
2910 	    (!entry->pmap_cs_associated) &&
2911 	    (entry->iokit_acct == iokit_acct) &&
2912 	    (!entry->vme_resilient_codesign) &&
2913 	    (!entry->vme_resilient_media) &&
2914 	    (!entry->vme_atomic) &&
2915 	    (entry->vme_no_copy_on_read == no_copy_on_read) &&
2916 
2917 	    ((entry->vme_end - entry->vme_start) + size <=
2918 	    (user_alias == VM_MEMORY_REALLOC ?
2919 	    ANON_CHUNK_SIZE :
2920 	    NO_COALESCE_LIMIT)) &&
2921 
2922 	    (entry->wired_count == 0)) {        /* implies user_wired_count == 0 */
2923 		if (vm_object_coalesce(VME_OBJECT(entry),
2924 		    VM_OBJECT_NULL,
2925 		    VME_OFFSET(entry),
2926 		    (vm_object_offset_t) 0,
2927 		    (vm_map_size_t)(entry->vme_end - entry->vme_start),
2928 		    (vm_map_size_t)(end - entry->vme_end))) {
2929 			/*
2930 			 *	Coalesced the two objects - can extend
2931 			 *	the previous map entry to include the
2932 			 *	new range.
2933 			 */
2934 			map->size += (end - entry->vme_end);
2935 			assert(entry->vme_start < end);
2936 			assert(VM_MAP_PAGE_ALIGNED(end,
2937 			    VM_MAP_PAGE_MASK(map)));
2938 			if (__improbable(vm_debug_events)) {
2939 				DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
2940 			}
2941 			entry->vme_end = end;
2942 			if (map->holelistenabled) {
2943 				vm_map_store_update_first_free(map, entry, TRUE);
2944 			} else {
2945 				vm_map_store_update_first_free(map, map->first_free, TRUE);
2946 			}
2947 			new_mapping_established = TRUE;
2948 			RETURN(KERN_SUCCESS);
2949 		}
2950 	}
2951 
2952 	step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2953 	new_entry = NULL;
2954 
2955 	for (tmp2_start = start; tmp2_start < end; tmp2_start += step) {
2956 		tmp2_end = tmp2_start + step;
2957 		/*
2958 		 *	Create a new entry
2959 		 *
2960 		 * XXX FBDP
2961 		 * The reserved "page zero" in each process's address space can
2962 		 * be arbitrarily large.  Splitting it into separate objects and
2963 		 * therefore different VM map entries serves no purpose and just
2964 		 * slows down operations on the VM map, so let's not split the
2965 		 * allocation into chunks if the max protection is NONE.  That
2966 		 * memory should never be accessible, so it will never get to the
2967 		 * default pager.
2968 		 */
2969 		tmp_start = tmp2_start;
2970 		if (object == VM_OBJECT_NULL &&
2971 		    size > chunk_size &&
2972 		    max_protection != VM_PROT_NONE &&
2973 		    superpage_size == 0) {
2974 			tmp_end = tmp_start + chunk_size;
2975 		} else {
2976 			tmp_end = tmp2_end;
2977 		}
2978 		do {
2979 			if (!is_submap &&
2980 			    object != VM_OBJECT_NULL &&
2981 			    object->internal &&
2982 			    offset + (tmp_end - tmp_start) > object->vo_size) {
2983 //				printf("FBDP object %p size 0x%llx overmapping offset 0x%llx size 0x%llx\n", object, object->vo_size, offset, (uint64_t)(tmp_end - tmp_start));
2984 				DTRACE_VM5(vm_map_enter_overmap,
2985 				    vm_map_t, map,
2986 				    vm_map_address_t, tmp_start,
2987 				    vm_map_address_t, tmp_end,
2988 				    vm_object_offset_t, offset,
2989 				    vm_object_size_t, object->vo_size);
2990 			}
2991 			new_entry = vm_map_entry_insert(map,
2992 			    entry, tmp_start, tmp_end,
2993 			    object, offset, vmk_flags,
2994 			    needs_copy, FALSE, FALSE,
2995 			    cur_protection, max_protection,
2996 			    VM_BEHAVIOR_DEFAULT,
2997 			    (entry_for_jit && !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map) ?
2998 			    VM_INHERIT_NONE : inheritance),
2999 			    0,
3000 			    no_cache,
3001 			    permanent,
3002 			    no_copy_on_read,
3003 			    superpage_size,
3004 			    clear_map_aligned,
3005 			    is_submap,
3006 			    entry_for_jit,
3007 			    alias,
3008 			    translated_allow_execute);
3009 
3010 			assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
3011 
3012 			if (resilient_codesign) {
3013 				int reject_prot = (needs_copy ? VM_PROT_ALLEXEC : (VM_PROT_WRITE | VM_PROT_ALLEXEC));
3014 				if (!((cur_protection | max_protection) & reject_prot)) {
3015 					new_entry->vme_resilient_codesign = TRUE;
3016 				}
3017 			}
3018 
3019 			if (resilient_media &&
3020 			    (object == VM_OBJECT_NULL ||
3021 			    object->internal)) {
3022 				new_entry->vme_resilient_media = TRUE;
3023 			}
3024 
3025 			assert(!new_entry->iokit_acct);
3026 			if (!is_submap &&
3027 			    object != VM_OBJECT_NULL &&
3028 			    (object->purgable != VM_PURGABLE_DENY ||
3029 			    object->vo_ledger_tag)) {
3030 				assert(new_entry->use_pmap);
3031 				assert(!new_entry->iokit_acct);
3032 				/*
3033 				 * Turn off pmap accounting since
3034 				 * purgeable (or tagged) objects have their
3035 				 * own ledgers.
3036 				 */
3037 				new_entry->use_pmap = FALSE;
3038 			} else if (!is_submap &&
3039 			    iokit_acct &&
3040 			    object != VM_OBJECT_NULL &&
3041 			    object->internal) {
3042 				/* alternate accounting */
3043 				assert(!new_entry->iokit_acct);
3044 				assert(new_entry->use_pmap);
3045 				new_entry->iokit_acct = TRUE;
3046 				new_entry->use_pmap = FALSE;
3047 				DTRACE_VM4(
3048 					vm_map_iokit_mapped_region,
3049 					vm_map_t, map,
3050 					vm_map_offset_t, new_entry->vme_start,
3051 					vm_map_offset_t, new_entry->vme_end,
3052 					int, VME_ALIAS(new_entry));
3053 				vm_map_iokit_mapped_region(
3054 					map,
3055 					(new_entry->vme_end -
3056 					new_entry->vme_start));
3057 			} else if (!is_submap) {
3058 				assert(!new_entry->iokit_acct);
3059 				assert(new_entry->use_pmap);
3060 			}
3061 
3062 			if (is_submap) {
3063 				vm_map_t        submap;
3064 				boolean_t       submap_is_64bit;
3065 				boolean_t       use_pmap;
3066 
3067 				assert(new_entry->is_sub_map);
3068 				assert(!new_entry->use_pmap);
3069 				assert(!new_entry->iokit_acct);
3070 				submap = (vm_map_t) object;
3071 				submap_is_64bit = vm_map_is_64bit(submap);
3072 				use_pmap = vmk_flags.vmkf_nested_pmap;
3073 #ifndef NO_NESTED_PMAP
3074 				if (use_pmap && submap->pmap == NULL) {
3075 					ledger_t ledger = map->pmap->ledger;
3076 					/* we need a sub pmap to nest... */
3077 					submap->pmap = pmap_create_options(ledger, 0,
3078 					    submap_is_64bit ? PMAP_CREATE_64BIT : 0);
3079 					if (submap->pmap == NULL) {
3080 						/* let's proceed without nesting... */
3081 					}
3082 #if     defined(__arm__) || defined(__arm64__)
3083 					else {
3084 						pmap_set_nested(submap->pmap);
3085 					}
3086 #endif
3087 				}
3088 				if (use_pmap && submap->pmap != NULL) {
3089 					if (VM_MAP_PAGE_SHIFT(map) != VM_MAP_PAGE_SHIFT(submap)) {
3090 						DEBUG4K_ERROR("map %p (%d) submap %p (%d): incompatible page sizes\n", map, VM_MAP_PAGE_SHIFT(map), submap, VM_MAP_PAGE_SHIFT(submap));
3091 						kr = KERN_FAILURE;
3092 					} else {
3093 						kr = pmap_nest(map->pmap,
3094 						    submap->pmap,
3095 						    tmp_start,
3096 						    tmp_end - tmp_start);
3097 					}
3098 					if (kr != KERN_SUCCESS) {
3099 						printf("vm_map_enter: "
3100 						    "pmap_nest(0x%llx,0x%llx) "
3101 						    "error 0x%x\n",
3102 						    (long long)tmp_start,
3103 						    (long long)tmp_end,
3104 						    kr);
3105 					} else {
3106 						/* we're now nested ! */
3107 						new_entry->use_pmap = TRUE;
3108 						pmap_empty = FALSE;
3109 					}
3110 				}
3111 #endif /* NO_NESTED_PMAP */
3112 			}
3113 			entry = new_entry;
3114 
3115 			if (superpage_size) {
3116 				vm_page_t pages, m;
3117 				vm_object_t sp_object;
3118 				vm_object_offset_t sp_offset;
3119 
3120 				VME_OFFSET_SET(entry, 0);
3121 
3122 				/* allocate one superpage */
3123 				kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES - 1, TRUE, 0);
3124 				if (kr != KERN_SUCCESS) {
3125 					/* deallocate whole range... */
3126 					new_mapping_established = TRUE;
3127 					/* ... but only up to "tmp_end" */
3128 					size -= end - tmp_end;
3129 					RETURN(kr);
3130 				}
3131 
3132 				/* create one vm_object per superpage */
3133 				sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
3134 				sp_object->phys_contiguous = TRUE;
3135 				sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages) * PAGE_SIZE;
3136 				VME_OBJECT_SET(entry, sp_object);
3137 				assert(entry->use_pmap);
3138 
3139 				/* enter the base pages into the object */
3140 				vm_object_lock(sp_object);
3141 				for (sp_offset = 0;
3142 				    sp_offset < SUPERPAGE_SIZE;
3143 				    sp_offset += PAGE_SIZE) {
3144 					m = pages;
3145 					pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
3146 					pages = NEXT_PAGE(m);
3147 					*(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3148 					vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
3149 				}
3150 				vm_object_unlock(sp_object);
3151 			}
3152 		} while (tmp_end != tmp2_end &&
3153 		    (tmp_start = tmp_end) &&
3154 		    (tmp_end = (tmp2_end - tmp_end > chunk_size) ?
3155 		    tmp_end + chunk_size : tmp2_end));
3156 	}
3157 
3158 	new_mapping_established = TRUE;
3159 
3160 BailOut:
3161 	assert(map_locked == TRUE);
3162 
3163 	/*
3164 	 * Address space limit enforcement (RLIMIT_AS and RLIMIT_DATA):
3165 	 * If we have identified and possibly established the new mapping(s),
3166 	 * make sure we did not go beyond the address space limit.
3167 	 */
3168 	if (result == KERN_SUCCESS) {
3169 		if (map->size_limit != RLIM_INFINITY &&
3170 		    map->size > map->size_limit) {
3171 			/*
3172 			 * Establishing the requested mappings would exceed
3173 			 * the process's RLIMIT_AS limit: fail with
3174 			 * KERN_NO_SPACE.
3175 			 */
3176 			result = KERN_NO_SPACE;
3177 			printf("%d[%s] %s: map size 0x%llx over RLIMIT_AS 0x%llx\n",
3178 			    proc_selfpid(),
3179 			    (current_task()->bsd_info
3180 			    ? proc_name_address(current_task()->bsd_info)
3181 			    : "?"),
3182 			    __FUNCTION__,
3183 			    (uint64_t) map->size,
3184 			    (uint64_t) map->size_limit);
3185 			DTRACE_VM2(vm_map_enter_RLIMIT_AS,
3186 			    vm_map_size_t, map->size,
3187 			    uint64_t, map->size_limit);
3188 			vm_map_enter_RLIMIT_AS_count++;
3189 		} else if (map->data_limit != RLIM_INFINITY &&
3190 		    map->size > map->data_limit) {
3191 			/*
3192 			 * Establishing the requested mappings would exceed
3193 			 * the process's RLIMIT_DATA limit: fail with
3194 			 * KERN_NO_SPACE.
3195 			 */
3196 			result = KERN_NO_SPACE;
3197 			printf("%d[%s] %s: map size 0x%llx over RLIMIT_DATA 0x%llx\n",
3198 			    proc_selfpid(),
3199 			    (current_task()->bsd_info
3200 			    ? proc_name_address(current_task()->bsd_info)
3201 			    : "?"),
3202 			    __FUNCTION__,
3203 			    (uint64_t) map->size,
3204 			    (uint64_t) map->data_limit);
3205 			DTRACE_VM2(vm_map_enter_RLIMIT_DATA,
3206 			    vm_map_size_t, map->size,
3207 			    uint64_t, map->data_limit);
3208 			vm_map_enter_RLIMIT_DATA_count++;
3209 		}
3210 	}
3211 
3212 	if (result == KERN_SUCCESS) {
3213 		vm_prot_t pager_prot;
3214 		memory_object_t pager;
3215 
3216 #if DEBUG
3217 		if (pmap_empty &&
3218 		    !(vmk_flags.vmkf_no_pmap_check)) {
3219 			assert(pmap_is_empty(map->pmap,
3220 			    *address,
3221 			    *address + size));
3222 		}
3223 #endif /* DEBUG */
3224 
3225 		/*
3226 		 * For "named" VM objects, let the pager know that the
3227 		 * memory object is being mapped.  Some pagers need to keep
3228 		 * track of this, to know when they can reclaim the memory
3229 		 * object, for example.
3230 		 * VM calls memory_object_map() for each mapping (specifying
3231 		 * the protection of each mapping) and calls
3232 		 * memory_object_last_unmap() when all the mappings are gone.
3233 		 */
3234 		pager_prot = max_protection;
3235 		if (needs_copy) {
3236 			/*
3237 			 * Copy-On-Write mapping: won't modify
3238 			 * the memory object.
3239 			 */
3240 			pager_prot &= ~VM_PROT_WRITE;
3241 		}
3242 		if (!is_submap &&
3243 		    object != VM_OBJECT_NULL &&
3244 		    object->named &&
3245 		    object->pager != MEMORY_OBJECT_NULL) {
3246 			vm_object_lock(object);
3247 			pager = object->pager;
3248 			if (object->named &&
3249 			    pager != MEMORY_OBJECT_NULL) {
3250 				assert(object->pager_ready);
3251 				vm_object_mapping_wait(object, THREAD_UNINT);
3252 				vm_object_mapping_begin(object);
3253 				vm_object_unlock(object);
3254 
3255 				kr = memory_object_map(pager, pager_prot);
3256 				assert(kr == KERN_SUCCESS);
3257 
3258 				vm_object_lock(object);
3259 				vm_object_mapping_end(object);
3260 			}
3261 			vm_object_unlock(object);
3262 		}
3263 	}
3264 
3265 	assert(map_locked == TRUE);
3266 
3267 	if (!keep_map_locked) {
3268 		vm_map_unlock(map);
3269 		map_locked = FALSE;
3270 	}
3271 
3272 	/*
3273 	 * We can't hold the map lock if we enter this block.
3274 	 */
3275 
3276 	if (result == KERN_SUCCESS) {
3277 		/*	Wire down the new entry if the user
3278 		 *	requested all new map entries be wired.
3279 		 */
3280 		if ((map->wiring_required) || (superpage_size)) {
3281 			assert(!keep_map_locked);
3282 			pmap_empty = FALSE; /* pmap won't be empty */
3283 			kr = vm_map_wire_kernel(map, start, end,
3284 			    new_entry->protection, VM_KERN_MEMORY_MLOCK,
3285 			    TRUE);
3286 			result = kr;
3287 		}
3288 
3289 	}
3290 
3291 	if (result != KERN_SUCCESS) {
3292 		if (new_mapping_established) {
3293 			/*
3294 			 * The caller had an extra reference on the VM object
3295 			 * it gave us.
3296 			 * We've transferred that reference to the mapping we
3297 			 * just established but we're about to undo that mapping
3298 			 * and release that reference.
3299 			 * The caller expects its reference to be consumed on
3300 			 * success only, so we have to get the extra reference
3301 			 * back for the caller.
3302 			 */
3303 			vm_object_reference(caller_object);
3304 
3305 			/*
3306 			 * We have to get rid of the new mappings since we
3307 			 * won't make them available to the user.
3308 			 * Try and do that atomically, to minimize the risk
3309 			 * that someone else create new mappings that range.
3310 			 */
3311 			zap_new_map = vm_map_create_options(PMAP_NULL,
3312 			    *address,
3313 			    *address + size,
3314 			    VM_MAP_CREATE_ZAP_OPTIONS(map));
3315 			vm_map_set_page_shift(zap_new_map,
3316 			    VM_MAP_PAGE_SHIFT(map));
3317 
3318 			if (!map_locked) {
3319 				vm_map_lock(map);
3320 				map_locked = TRUE;
3321 			}
3322 			(void) vm_map_delete(map, *address, *address + size,
3323 			    (VM_MAP_REMOVE_SAVE_ENTRIES |
3324 			    VM_MAP_REMOVE_NO_MAP_ALIGN),
3325 			    zap_new_map);
3326 		}
3327 		if (zap_old_map != VM_MAP_NULL &&
3328 		    zap_old_map->hdr.nentries != 0) {
3329 			vm_map_entry_t  entry1, entry2;
3330 
3331 			/*
3332 			 * The new mapping failed.  Attempt to restore
3333 			 * the old mappings, saved in the "zap_old_map".
3334 			 */
3335 			if (!map_locked) {
3336 				vm_map_lock(map);
3337 				map_locked = TRUE;
3338 			}
3339 
3340 			/* first check if the coast is still clear */
3341 			start = vm_map_first_entry(zap_old_map)->vme_start;
3342 			end = vm_map_last_entry(zap_old_map)->vme_end;
3343 			if (vm_map_lookup_entry(map, start, &entry1) ||
3344 			    vm_map_lookup_entry(map, end, &entry2) ||
3345 			    entry1 != entry2) {
3346 				/*
3347 				 * Part of that range has already been
3348 				 * re-mapped:  we can't restore the old
3349 				 * mappings...
3350 				 */
3351 				vm_map_enter_restore_failures++;
3352 			} else {
3353 				/*
3354 				 * Transfer the saved map entries from
3355 				 * "zap_old_map" to the original "map",
3356 				 * inserting them all after "entry1".
3357 				 */
3358 				for (entry2 = vm_map_first_entry(zap_old_map);
3359 				    entry2 != vm_map_to_entry(zap_old_map);
3360 				    entry2 = vm_map_first_entry(zap_old_map)) {
3361 					vm_map_size_t entry_size;
3362 
3363 					entry_size = (entry2->vme_end -
3364 					    entry2->vme_start);
3365 					vm_map_store_entry_unlink(zap_old_map,
3366 					    entry2);
3367 					zap_old_map->size -= entry_size;
3368 					vm_map_store_entry_link(map, entry1, entry2,
3369 					    VM_MAP_KERNEL_FLAGS_NONE);
3370 					map->size += entry_size;
3371 					entry1 = entry2;
3372 				}
3373 				if (map->wiring_required) {
3374 					/*
3375 					 * XXX TODO: we should rewire the
3376 					 * old pages here...
3377 					 */
3378 				}
3379 				vm_map_enter_restore_successes++;
3380 			}
3381 		}
3382 	}
3383 
3384 	/*
3385 	 * The caller is responsible for releasing the lock if it requested to
3386 	 * keep the map locked.
3387 	 */
3388 	if (map_locked && !keep_map_locked) {
3389 		vm_map_unlock(map);
3390 	}
3391 
3392 	/*
3393 	 * Get rid of the "zap_maps" and all the map entries that
3394 	 * they may still contain.
3395 	 */
3396 	if (zap_old_map != VM_MAP_NULL) {
3397 		vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3398 		zap_old_map = VM_MAP_NULL;
3399 	}
3400 	if (zap_new_map != VM_MAP_NULL) {
3401 		vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3402 		zap_new_map = VM_MAP_NULL;
3403 	}
3404 
3405 	return result;
3406 
3407 #undef  RETURN
3408 }
3409 
3410 #if __arm64__
3411 extern const struct memory_object_pager_ops fourk_pager_ops;
3412 kern_return_t
vm_map_enter_fourk(vm_map_t map,vm_map_offset_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t alias,vm_object_t object,vm_object_offset_t offset,boolean_t needs_copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)3413 vm_map_enter_fourk(
3414 	vm_map_t                map,
3415 	vm_map_offset_t         *address,       /* IN/OUT */
3416 	vm_map_size_t           size,
3417 	vm_map_offset_t         mask,
3418 	int                     flags,
3419 	vm_map_kernel_flags_t   vmk_flags,
3420 	vm_tag_t                alias,
3421 	vm_object_t             object,
3422 	vm_object_offset_t      offset,
3423 	boolean_t               needs_copy,
3424 	vm_prot_t               cur_protection,
3425 	vm_prot_t               max_protection,
3426 	vm_inherit_t            inheritance)
3427 {
3428 	vm_map_entry_t          entry, new_entry;
3429 	vm_map_offset_t         start, fourk_start;
3430 	vm_map_offset_t         end, fourk_end;
3431 	vm_map_size_t           fourk_size;
3432 	kern_return_t           result = KERN_SUCCESS;
3433 	vm_map_t                zap_old_map = VM_MAP_NULL;
3434 	vm_map_t                zap_new_map = VM_MAP_NULL;
3435 	boolean_t               map_locked = FALSE;
3436 	boolean_t               pmap_empty = TRUE;
3437 	boolean_t               new_mapping_established = FALSE;
3438 	boolean_t               keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3439 	boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
3440 	boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
3441 	boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
3442 	boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
3443 	boolean_t               is_submap = vmk_flags.vmkf_submap;
3444 	boolean_t               permanent = vmk_flags.vmkf_permanent;
3445 	boolean_t               no_copy_on_read = vmk_flags.vmkf_permanent;
3446 	boolean_t               entry_for_jit = vmk_flags.vmkf_map_jit;
3447 //	boolean_t		iokit_acct = vmk_flags.vmkf_iokit_acct;
3448 	boolean_t               translated_allow_execute = vmk_flags.vmkf_translated_allow_execute;
3449 	unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3450 	vm_map_offset_t         effective_min_offset, effective_max_offset;
3451 	kern_return_t           kr;
3452 	boolean_t               clear_map_aligned = FALSE;
3453 	memory_object_t         fourk_mem_obj;
3454 	vm_object_t             fourk_object;
3455 	vm_map_offset_t         fourk_pager_offset;
3456 	int                     fourk_pager_index_start, fourk_pager_index_num;
3457 	int                     cur_idx;
3458 	boolean_t               fourk_copy;
3459 	vm_object_t             copy_object;
3460 	vm_object_offset_t      copy_offset;
3461 
3462 	if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
3463 		panic("%s:%d", __FUNCTION__, __LINE__);
3464 	}
3465 	fourk_mem_obj = MEMORY_OBJECT_NULL;
3466 	fourk_object = VM_OBJECT_NULL;
3467 
3468 	if (superpage_size) {
3469 		return KERN_NOT_SUPPORTED;
3470 	}
3471 
3472 	if ((cur_protection & VM_PROT_WRITE) &&
3473 	    (cur_protection & VM_PROT_EXECUTE) &&
3474 #if XNU_TARGET_OS_OSX
3475 	    map->pmap != kernel_pmap &&
3476 	    (vm_map_cs_enforcement(map)
3477 #if __arm64__
3478 	    || !VM_MAP_IS_EXOTIC(map)
3479 #endif /* __arm64__ */
3480 	    ) &&
3481 #endif /* XNU_TARGET_OS_OSX */
3482 	    !entry_for_jit) {
3483 		DTRACE_VM3(cs_wx,
3484 		    uint64_t, 0,
3485 		    uint64_t, 0,
3486 		    vm_prot_t, cur_protection);
3487 		printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
3488 		    "turning off execute\n",
3489 		    proc_selfpid(),
3490 		    (current_task()->bsd_info
3491 		    ? proc_name_address(current_task()->bsd_info)
3492 		    : "?"),
3493 		    __FUNCTION__);
3494 		cur_protection &= ~VM_PROT_EXECUTE;
3495 	}
3496 
3497 	/*
3498 	 * If the task has requested executable lockdown,
3499 	 * deny any new executable mapping.
3500 	 */
3501 	if (map->map_disallow_new_exec == TRUE) {
3502 		if (cur_protection & VM_PROT_EXECUTE) {
3503 			return KERN_PROTECTION_FAILURE;
3504 		}
3505 	}
3506 
3507 	if (is_submap) {
3508 		return KERN_NOT_SUPPORTED;
3509 	}
3510 	if (vmk_flags.vmkf_already) {
3511 		return KERN_NOT_SUPPORTED;
3512 	}
3513 	if (purgable || entry_for_jit) {
3514 		return KERN_NOT_SUPPORTED;
3515 	}
3516 
3517 	effective_min_offset = map->min_offset;
3518 
3519 	if (vmk_flags.vmkf_beyond_max) {
3520 		return KERN_NOT_SUPPORTED;
3521 	} else {
3522 		effective_max_offset = map->max_offset;
3523 	}
3524 
3525 	if (size == 0 ||
3526 	    (offset & FOURK_PAGE_MASK) != 0) {
3527 		*address = 0;
3528 		return KERN_INVALID_ARGUMENT;
3529 	}
3530 
3531 #define RETURN(value)   { result = value; goto BailOut; }
3532 
3533 	assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3534 	assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3535 
3536 	if (!anywhere && overwrite) {
3537 		return KERN_NOT_SUPPORTED;
3538 	}
3539 	if (!anywhere && overwrite) {
3540 		/*
3541 		 * Create a temporary VM map to hold the old mappings in the
3542 		 * affected area while we create the new one.
3543 		 * This avoids releasing the VM map lock in
3544 		 * vm_map_entry_delete() and allows atomicity
3545 		 * when we want to replace some mappings with a new one.
3546 		 * It also allows us to restore the old VM mappings if the
3547 		 * new mapping fails.
3548 		 */
3549 		zap_old_map = vm_map_create_options(PMAP_NULL,
3550 		    *address,
3551 		    *address + size,
3552 		    VM_MAP_CREATE_ZAP_OPTIONS(map));
3553 		vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3554 	}
3555 
3556 	fourk_start = *address;
3557 	fourk_size = size;
3558 	fourk_end = fourk_start + fourk_size;
3559 
3560 	start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3561 	end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3562 	size = end - start;
3563 
3564 	if (anywhere) {
3565 		return KERN_NOT_SUPPORTED;
3566 	} else {
3567 		/*
3568 		 *	Verify that:
3569 		 *		the address doesn't itself violate
3570 		 *		the mask requirement.
3571 		 */
3572 
3573 		vm_map_lock(map);
3574 		map_locked = TRUE;
3575 		if ((start & mask) != 0) {
3576 			RETURN(KERN_NO_SPACE);
3577 		}
3578 
3579 		/*
3580 		 *	...	the address is within bounds
3581 		 */
3582 
3583 		end = start + size;
3584 
3585 		if ((start < effective_min_offset) ||
3586 		    (end > effective_max_offset) ||
3587 		    (start >= end)) {
3588 			RETURN(KERN_INVALID_ADDRESS);
3589 		}
3590 
3591 		if (overwrite && zap_old_map != VM_MAP_NULL) {
3592 			/*
3593 			 * Fixed mapping and "overwrite" flag: attempt to
3594 			 * remove all existing mappings in the specified
3595 			 * address range, saving them in our "zap_old_map".
3596 			 */
3597 			(void) vm_map_delete(map, start, end,
3598 			    (VM_MAP_REMOVE_SAVE_ENTRIES |
3599 			    VM_MAP_REMOVE_NO_MAP_ALIGN),
3600 			    zap_old_map);
3601 		}
3602 
3603 		/*
3604 		 *	...	the starting address isn't allocated
3605 		 */
3606 		if (vm_map_lookup_entry(map, start, &entry)) {
3607 			vm_object_t cur_object, shadow_object;
3608 
3609 			/*
3610 			 * We might already some 4K mappings
3611 			 * in a 16K page here.
3612 			 */
3613 
3614 			if (entry->vme_end - entry->vme_start
3615 			    != SIXTEENK_PAGE_SIZE) {
3616 				RETURN(KERN_NO_SPACE);
3617 			}
3618 			if (entry->is_sub_map) {
3619 				RETURN(KERN_NO_SPACE);
3620 			}
3621 			if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3622 				RETURN(KERN_NO_SPACE);
3623 			}
3624 
3625 			/* go all the way down the shadow chain */
3626 			cur_object = VME_OBJECT(entry);
3627 			vm_object_lock(cur_object);
3628 			while (cur_object->shadow != VM_OBJECT_NULL) {
3629 				shadow_object = cur_object->shadow;
3630 				vm_object_lock(shadow_object);
3631 				vm_object_unlock(cur_object);
3632 				cur_object = shadow_object;
3633 				shadow_object = VM_OBJECT_NULL;
3634 			}
3635 			if (cur_object->internal ||
3636 			    cur_object->pager == NULL) {
3637 				vm_object_unlock(cur_object);
3638 				RETURN(KERN_NO_SPACE);
3639 			}
3640 			if (cur_object->pager->mo_pager_ops
3641 			    != &fourk_pager_ops) {
3642 				vm_object_unlock(cur_object);
3643 				RETURN(KERN_NO_SPACE);
3644 			}
3645 			fourk_object = cur_object;
3646 			fourk_mem_obj = fourk_object->pager;
3647 
3648 			/* keep the "4K" object alive */
3649 			vm_object_reference_locked(fourk_object);
3650 			memory_object_reference(fourk_mem_obj);
3651 			vm_object_unlock(fourk_object);
3652 
3653 			/* merge permissions */
3654 			entry->protection |= cur_protection;
3655 			entry->max_protection |= max_protection;
3656 
3657 			if ((entry->protection & VM_PROT_WRITE) &&
3658 			    (entry->protection & VM_PROT_ALLEXEC) &&
3659 			    fourk_binary_compatibility_unsafe &&
3660 			    fourk_binary_compatibility_allow_wx) {
3661 				/* write+execute: need to be "jit" */
3662 				entry->used_for_jit = TRUE;
3663 			}
3664 			goto map_in_fourk_pager;
3665 		}
3666 
3667 		/*
3668 		 *	...	the next region doesn't overlap the
3669 		 *		end point.
3670 		 */
3671 
3672 		if ((entry->vme_next != vm_map_to_entry(map)) &&
3673 		    (entry->vme_next->vme_start < end)) {
3674 			RETURN(KERN_NO_SPACE);
3675 		}
3676 	}
3677 
3678 	/*
3679 	 *	At this point,
3680 	 *		"start" and "end" should define the endpoints of the
3681 	 *			available new range, and
3682 	 *		"entry" should refer to the region before the new
3683 	 *			range, and
3684 	 *
3685 	 *		the map should be locked.
3686 	 */
3687 
3688 	/* create a new "4K" pager */
3689 	fourk_mem_obj = fourk_pager_create();
3690 	fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3691 	assert(fourk_object);
3692 
3693 	/* keep the "4" object alive */
3694 	vm_object_reference(fourk_object);
3695 
3696 	/* create a "copy" object, to map the "4K" object copy-on-write */
3697 	fourk_copy = TRUE;
3698 	result = vm_object_copy_strategically(fourk_object,
3699 	    0,
3700 	    end - start,
3701 	    &copy_object,
3702 	    &copy_offset,
3703 	    &fourk_copy);
3704 	assert(result == KERN_SUCCESS);
3705 	assert(copy_object != VM_OBJECT_NULL);
3706 	assert(copy_offset == 0);
3707 
3708 	/* map the "4K" pager's copy object */
3709 	new_entry =
3710 	    vm_map_entry_insert(map, entry,
3711 	    vm_map_trunc_page(start,
3712 	    VM_MAP_PAGE_MASK(map)),
3713 	    vm_map_round_page(end,
3714 	    VM_MAP_PAGE_MASK(map)),
3715 	    copy_object,
3716 	    0,                         /* offset */
3717 	    vmk_flags,
3718 	    FALSE,                         /* needs_copy */
3719 	    FALSE,
3720 	    FALSE,
3721 	    cur_protection, max_protection,
3722 	    VM_BEHAVIOR_DEFAULT,
3723 	    (entry_for_jit && !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map) ?
3724 	    VM_INHERIT_NONE : inheritance),
3725 	    0,
3726 	    no_cache,
3727 	    permanent,
3728 	    no_copy_on_read,
3729 	    superpage_size,
3730 	    clear_map_aligned,
3731 	    is_submap,
3732 	    FALSE,                         /* jit */
3733 	    alias,
3734 	    translated_allow_execute);
3735 	entry = new_entry;
3736 
3737 #if VM_MAP_DEBUG_FOURK
3738 	if (vm_map_debug_fourk) {
3739 		printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3740 		    map,
3741 		    (uint64_t) entry->vme_start,
3742 		    (uint64_t) entry->vme_end,
3743 		    fourk_mem_obj);
3744 	}
3745 #endif /* VM_MAP_DEBUG_FOURK */
3746 
3747 	new_mapping_established = TRUE;
3748 
3749 map_in_fourk_pager:
3750 	/* "map" the original "object" where it belongs in the "4K" pager */
3751 	fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3752 	fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3753 	if (fourk_size > SIXTEENK_PAGE_SIZE) {
3754 		fourk_pager_index_num = 4;
3755 	} else {
3756 		fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3757 	}
3758 	if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3759 		fourk_pager_index_num = 4 - fourk_pager_index_start;
3760 	}
3761 	for (cur_idx = 0;
3762 	    cur_idx < fourk_pager_index_num;
3763 	    cur_idx++) {
3764 		vm_object_t             old_object;
3765 		vm_object_offset_t      old_offset;
3766 
3767 		kr = fourk_pager_populate(fourk_mem_obj,
3768 		    TRUE,                       /* overwrite */
3769 		    fourk_pager_index_start + cur_idx,
3770 		    object,
3771 		    (object
3772 		    ? (offset +
3773 		    (cur_idx * FOURK_PAGE_SIZE))
3774 		    : 0),
3775 		    &old_object,
3776 		    &old_offset);
3777 #if VM_MAP_DEBUG_FOURK
3778 		if (vm_map_debug_fourk) {
3779 			if (old_object == (vm_object_t) -1 &&
3780 			    old_offset == (vm_object_offset_t) -1) {
3781 				printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3782 				    "pager [%p:0x%llx] "
3783 				    "populate[%d] "
3784 				    "[object:%p,offset:0x%llx]\n",
3785 				    map,
3786 				    (uint64_t) entry->vme_start,
3787 				    (uint64_t) entry->vme_end,
3788 				    fourk_mem_obj,
3789 				    VME_OFFSET(entry),
3790 				    fourk_pager_index_start + cur_idx,
3791 				    object,
3792 				    (object
3793 				    ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3794 				    : 0));
3795 			} else {
3796 				printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3797 				    "pager [%p:0x%llx] "
3798 				    "populate[%d] [object:%p,offset:0x%llx] "
3799 				    "old [%p:0x%llx]\n",
3800 				    map,
3801 				    (uint64_t) entry->vme_start,
3802 				    (uint64_t) entry->vme_end,
3803 				    fourk_mem_obj,
3804 				    VME_OFFSET(entry),
3805 				    fourk_pager_index_start + cur_idx,
3806 				    object,
3807 				    (object
3808 				    ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3809 				    : 0),
3810 				    old_object,
3811 				    old_offset);
3812 			}
3813 		}
3814 #endif /* VM_MAP_DEBUG_FOURK */
3815 
3816 		assert(kr == KERN_SUCCESS);
3817 		if (object != old_object &&
3818 		    object != VM_OBJECT_NULL &&
3819 		    object != (vm_object_t) -1) {
3820 			vm_object_reference(object);
3821 		}
3822 		if (object != old_object &&
3823 		    old_object != VM_OBJECT_NULL &&
3824 		    old_object != (vm_object_t) -1) {
3825 			vm_object_deallocate(old_object);
3826 		}
3827 	}
3828 
3829 BailOut:
3830 	assert(map_locked == TRUE);
3831 
3832 	if (result == KERN_SUCCESS) {
3833 		vm_prot_t pager_prot;
3834 		memory_object_t pager;
3835 
3836 #if DEBUG
3837 		if (pmap_empty &&
3838 		    !(vmk_flags.vmkf_no_pmap_check)) {
3839 			assert(pmap_is_empty(map->pmap,
3840 			    *address,
3841 			    *address + size));
3842 		}
3843 #endif /* DEBUG */
3844 
3845 		/*
3846 		 * For "named" VM objects, let the pager know that the
3847 		 * memory object is being mapped.  Some pagers need to keep
3848 		 * track of this, to know when they can reclaim the memory
3849 		 * object, for example.
3850 		 * VM calls memory_object_map() for each mapping (specifying
3851 		 * the protection of each mapping) and calls
3852 		 * memory_object_last_unmap() when all the mappings are gone.
3853 		 */
3854 		pager_prot = max_protection;
3855 		if (needs_copy) {
3856 			/*
3857 			 * Copy-On-Write mapping: won't modify
3858 			 * the memory object.
3859 			 */
3860 			pager_prot &= ~VM_PROT_WRITE;
3861 		}
3862 		if (!is_submap &&
3863 		    object != VM_OBJECT_NULL &&
3864 		    object->named &&
3865 		    object->pager != MEMORY_OBJECT_NULL) {
3866 			vm_object_lock(object);
3867 			pager = object->pager;
3868 			if (object->named &&
3869 			    pager != MEMORY_OBJECT_NULL) {
3870 				assert(object->pager_ready);
3871 				vm_object_mapping_wait(object, THREAD_UNINT);
3872 				vm_object_mapping_begin(object);
3873 				vm_object_unlock(object);
3874 
3875 				kr = memory_object_map(pager, pager_prot);
3876 				assert(kr == KERN_SUCCESS);
3877 
3878 				vm_object_lock(object);
3879 				vm_object_mapping_end(object);
3880 			}
3881 			vm_object_unlock(object);
3882 		}
3883 		if (!is_submap &&
3884 		    fourk_object != VM_OBJECT_NULL &&
3885 		    fourk_object->named &&
3886 		    fourk_object->pager != MEMORY_OBJECT_NULL) {
3887 			vm_object_lock(fourk_object);
3888 			pager = fourk_object->pager;
3889 			if (fourk_object->named &&
3890 			    pager != MEMORY_OBJECT_NULL) {
3891 				assert(fourk_object->pager_ready);
3892 				vm_object_mapping_wait(fourk_object,
3893 				    THREAD_UNINT);
3894 				vm_object_mapping_begin(fourk_object);
3895 				vm_object_unlock(fourk_object);
3896 
3897 				kr = memory_object_map(pager, VM_PROT_READ);
3898 				assert(kr == KERN_SUCCESS);
3899 
3900 				vm_object_lock(fourk_object);
3901 				vm_object_mapping_end(fourk_object);
3902 			}
3903 			vm_object_unlock(fourk_object);
3904 		}
3905 	}
3906 
3907 	if (fourk_object != VM_OBJECT_NULL) {
3908 		vm_object_deallocate(fourk_object);
3909 		fourk_object = VM_OBJECT_NULL;
3910 		memory_object_deallocate(fourk_mem_obj);
3911 		fourk_mem_obj = MEMORY_OBJECT_NULL;
3912 	}
3913 
3914 	assert(map_locked == TRUE);
3915 
3916 	if (!keep_map_locked) {
3917 		vm_map_unlock(map);
3918 		map_locked = FALSE;
3919 	}
3920 
3921 	/*
3922 	 * We can't hold the map lock if we enter this block.
3923 	 */
3924 
3925 	if (result == KERN_SUCCESS) {
3926 		/*	Wire down the new entry if the user
3927 		 *	requested all new map entries be wired.
3928 		 */
3929 		if ((map->wiring_required) || (superpage_size)) {
3930 			assert(!keep_map_locked);
3931 			pmap_empty = FALSE; /* pmap won't be empty */
3932 			kr = vm_map_wire_kernel(map, start, end,
3933 			    new_entry->protection, VM_KERN_MEMORY_MLOCK,
3934 			    TRUE);
3935 			result = kr;
3936 		}
3937 
3938 	}
3939 
3940 	if (result != KERN_SUCCESS) {
3941 		if (new_mapping_established) {
3942 			/*
3943 			 * We have to get rid of the new mappings since we
3944 			 * won't make them available to the user.
3945 			 * Try and do that atomically, to minimize the risk
3946 			 * that someone else create new mappings that range.
3947 			 */
3948 			zap_new_map = vm_map_create_options(PMAP_NULL,
3949 			    *address,
3950 			    *address + size,
3951 			    VM_MAP_CREATE_ZAP_OPTIONS(map));
3952 			vm_map_set_page_shift(zap_new_map,
3953 			    VM_MAP_PAGE_SHIFT(map));
3954 
3955 			if (!map_locked) {
3956 				vm_map_lock(map);
3957 				map_locked = TRUE;
3958 			}
3959 			(void) vm_map_delete(map, *address, *address + size,
3960 			    (VM_MAP_REMOVE_SAVE_ENTRIES |
3961 			    VM_MAP_REMOVE_NO_MAP_ALIGN),
3962 			    zap_new_map);
3963 		}
3964 		if (zap_old_map != VM_MAP_NULL &&
3965 		    zap_old_map->hdr.nentries != 0) {
3966 			vm_map_entry_t  entry1, entry2;
3967 
3968 			/*
3969 			 * The new mapping failed.  Attempt to restore
3970 			 * the old mappings, saved in the "zap_old_map".
3971 			 */
3972 			if (!map_locked) {
3973 				vm_map_lock(map);
3974 				map_locked = TRUE;
3975 			}
3976 
3977 			/* first check if the coast is still clear */
3978 			start = vm_map_first_entry(zap_old_map)->vme_start;
3979 			end = vm_map_last_entry(zap_old_map)->vme_end;
3980 			if (vm_map_lookup_entry(map, start, &entry1) ||
3981 			    vm_map_lookup_entry(map, end, &entry2) ||
3982 			    entry1 != entry2) {
3983 				/*
3984 				 * Part of that range has already been
3985 				 * re-mapped:  we can't restore the old
3986 				 * mappings...
3987 				 */
3988 				vm_map_enter_restore_failures++;
3989 			} else {
3990 				/*
3991 				 * Transfer the saved map entries from
3992 				 * "zap_old_map" to the original "map",
3993 				 * inserting them all after "entry1".
3994 				 */
3995 				for (entry2 = vm_map_first_entry(zap_old_map);
3996 				    entry2 != vm_map_to_entry(zap_old_map);
3997 				    entry2 = vm_map_first_entry(zap_old_map)) {
3998 					vm_map_size_t entry_size;
3999 
4000 					entry_size = (entry2->vme_end -
4001 					    entry2->vme_start);
4002 					vm_map_store_entry_unlink(zap_old_map,
4003 					    entry2);
4004 					zap_old_map->size -= entry_size;
4005 					vm_map_store_entry_link(map, entry1, entry2,
4006 					    VM_MAP_KERNEL_FLAGS_NONE);
4007 					map->size += entry_size;
4008 					entry1 = entry2;
4009 				}
4010 				if (map->wiring_required) {
4011 					/*
4012 					 * XXX TODO: we should rewire the
4013 					 * old pages here...
4014 					 */
4015 				}
4016 				vm_map_enter_restore_successes++;
4017 			}
4018 		}
4019 	}
4020 
4021 	/*
4022 	 * The caller is responsible for releasing the lock if it requested to
4023 	 * keep the map locked.
4024 	 */
4025 	if (map_locked && !keep_map_locked) {
4026 		vm_map_unlock(map);
4027 	}
4028 
4029 	/*
4030 	 * Get rid of the "zap_maps" and all the map entries that
4031 	 * they may still contain.
4032 	 */
4033 	if (zap_old_map != VM_MAP_NULL) {
4034 		vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
4035 		zap_old_map = VM_MAP_NULL;
4036 	}
4037 	if (zap_new_map != VM_MAP_NULL) {
4038 		vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
4039 		zap_new_map = VM_MAP_NULL;
4040 	}
4041 
4042 	return result;
4043 
4044 #undef  RETURN
4045 }
4046 #endif /* __arm64__ */
4047 
4048 /*
4049  * Counters for the prefault optimization.
4050  */
4051 int64_t vm_prefault_nb_pages = 0;
4052 int64_t vm_prefault_nb_bailout = 0;
4053 
4054 static kern_return_t
vm_map_enter_mem_object_helper(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,ipc_port_t port,vm_object_offset_t offset,boolean_t copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance,upl_page_list_ptr_t page_list,unsigned int page_list_count)4055 vm_map_enter_mem_object_helper(
4056 	vm_map_t                target_map,
4057 	vm_map_offset_t         *address,
4058 	vm_map_size_t           initial_size,
4059 	vm_map_offset_t         mask,
4060 	int                     flags,
4061 	vm_map_kernel_flags_t   vmk_flags,
4062 	vm_tag_t                tag,
4063 	ipc_port_t              port,
4064 	vm_object_offset_t      offset,
4065 	boolean_t               copy,
4066 	vm_prot_t               cur_protection,
4067 	vm_prot_t               max_protection,
4068 	vm_inherit_t            inheritance,
4069 	upl_page_list_ptr_t     page_list,
4070 	unsigned int            page_list_count)
4071 {
4072 	vm_map_address_t        map_addr;
4073 	vm_map_size_t           map_size;
4074 	vm_object_t             object;
4075 	vm_object_size_t        size;
4076 	kern_return_t           result;
4077 	boolean_t               mask_cur_protection, mask_max_protection;
4078 	boolean_t               kernel_prefault, try_prefault = (page_list_count != 0);
4079 	vm_map_offset_t         offset_in_mapping = 0;
4080 #if __arm64__
4081 	boolean_t               fourk = vmk_flags.vmkf_fourk;
4082 #endif /* __arm64__ */
4083 
4084 	if (VM_MAP_PAGE_SHIFT(target_map) < PAGE_SHIFT) {
4085 		/* XXX TODO4K prefaulting depends on page size... */
4086 		try_prefault = FALSE;
4087 	}
4088 
4089 	assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
4090 
4091 	mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
4092 	mask_max_protection = max_protection & VM_PROT_IS_MASK;
4093 	cur_protection &= ~VM_PROT_IS_MASK;
4094 	max_protection &= ~VM_PROT_IS_MASK;
4095 
4096 	/*
4097 	 * Check arguments for validity
4098 	 */
4099 	if ((target_map == VM_MAP_NULL) ||
4100 	    (cur_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4101 	    (max_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4102 	    (inheritance > VM_INHERIT_LAST_VALID) ||
4103 	    (try_prefault && (copy || !page_list)) ||
4104 	    initial_size == 0) {
4105 		return KERN_INVALID_ARGUMENT;
4106 	}
4107 
4108 #if __arm64__
4109 	if (fourk && VM_MAP_PAGE_SHIFT(target_map) < PAGE_SHIFT) {
4110 		/* no "fourk" if map is using a sub-page page size */
4111 		fourk = FALSE;
4112 	}
4113 	if (fourk) {
4114 		map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
4115 		map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
4116 	} else
4117 #endif /* __arm64__ */
4118 	{
4119 		map_addr = vm_map_trunc_page(*address,
4120 		    VM_MAP_PAGE_MASK(target_map));
4121 		map_size = vm_map_round_page(initial_size,
4122 		    VM_MAP_PAGE_MASK(target_map));
4123 	}
4124 	size = vm_object_round_page(initial_size);
4125 
4126 	/*
4127 	 * Find the vm object (if any) corresponding to this port.
4128 	 */
4129 	if (!IP_VALID(port)) {
4130 		object = VM_OBJECT_NULL;
4131 		offset = 0;
4132 		copy = FALSE;
4133 	} else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
4134 		vm_named_entry_t        named_entry;
4135 		vm_object_offset_t      data_offset;
4136 
4137 		named_entry = mach_memory_entry_from_port(port);
4138 
4139 		if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4140 		    VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4141 			data_offset = named_entry->data_offset;
4142 			offset += named_entry->data_offset;
4143 		} else {
4144 			data_offset = 0;
4145 		}
4146 
4147 		/* a few checks to make sure user is obeying rules */
4148 		if (size == 0) {
4149 			if (offset >= named_entry->size) {
4150 				return KERN_INVALID_RIGHT;
4151 			}
4152 			size = named_entry->size - offset;
4153 		}
4154 		if (mask_max_protection) {
4155 			max_protection &= named_entry->protection;
4156 		}
4157 		if (mask_cur_protection) {
4158 			cur_protection &= named_entry->protection;
4159 		}
4160 		if ((named_entry->protection & max_protection) !=
4161 		    max_protection) {
4162 			return KERN_INVALID_RIGHT;
4163 		}
4164 		if ((named_entry->protection & cur_protection) !=
4165 		    cur_protection) {
4166 			return KERN_INVALID_RIGHT;
4167 		}
4168 		if (offset + size < offset) {
4169 			/* overflow */
4170 			return KERN_INVALID_ARGUMENT;
4171 		}
4172 		if (named_entry->size < (offset + initial_size)) {
4173 			return KERN_INVALID_ARGUMENT;
4174 		}
4175 
4176 		if (named_entry->is_copy) {
4177 			/* for a vm_map_copy, we can only map it whole */
4178 			if ((size != named_entry->size) &&
4179 			    (vm_map_round_page(size,
4180 			    VM_MAP_PAGE_MASK(target_map)) ==
4181 			    named_entry->size)) {
4182 				/* XXX FBDP use the rounded size... */
4183 				size = vm_map_round_page(
4184 					size,
4185 					VM_MAP_PAGE_MASK(target_map));
4186 			}
4187 		}
4188 
4189 		/* the callers parameter offset is defined to be the */
4190 		/* offset from beginning of named entry offset in object */
4191 		offset = offset + named_entry->offset;
4192 
4193 		if (!VM_MAP_PAGE_ALIGNED(size,
4194 		    VM_MAP_PAGE_MASK(target_map))) {
4195 			/*
4196 			 * Let's not map more than requested;
4197 			 * vm_map_enter() will handle this "not map-aligned"
4198 			 * case.
4199 			 */
4200 			map_size = size;
4201 		}
4202 
4203 		if (named_entry->is_sub_map) {
4204 			vm_map_t                submap;
4205 
4206 			if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4207 			    VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4208 				panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
4209 			}
4210 
4211 			submap = named_entry->backing.map;
4212 			vm_map_reference(submap);
4213 
4214 			vmk_flags.vmkf_submap = TRUE;
4215 
4216 			result = vm_map_enter(target_map,
4217 			    &map_addr,
4218 			    map_size,
4219 			    mask,
4220 			    flags,
4221 			    vmk_flags,
4222 			    tag,
4223 			    (vm_object_t)(uintptr_t) submap,
4224 			    offset,
4225 			    copy,
4226 			    cur_protection,
4227 			    max_protection,
4228 			    inheritance);
4229 			if (result != KERN_SUCCESS) {
4230 				vm_map_deallocate(submap);
4231 			} else {
4232 				/*
4233 				 * No need to lock "submap" just to check its
4234 				 * "mapped" flag: that flag is never reset
4235 				 * once it's been set and if we race, we'll
4236 				 * just end up setting it twice, which is OK.
4237 				 */
4238 				if (submap->mapped_in_other_pmaps == FALSE &&
4239 				    vm_map_pmap(submap) != PMAP_NULL &&
4240 				    vm_map_pmap(submap) !=
4241 				    vm_map_pmap(target_map)) {
4242 					/*
4243 					 * This submap is being mapped in a map
4244 					 * that uses a different pmap.
4245 					 * Set its "mapped_in_other_pmaps" flag
4246 					 * to indicate that we now need to
4247 					 * remove mappings from all pmaps rather
4248 					 * than just the submap's pmap.
4249 					 */
4250 					vm_map_lock(submap);
4251 					submap->mapped_in_other_pmaps = TRUE;
4252 					vm_map_unlock(submap);
4253 				}
4254 				*address = map_addr;
4255 			}
4256 			return result;
4257 		} else if (named_entry->is_copy) {
4258 			kern_return_t   kr;
4259 			vm_map_copy_t   copy_map;
4260 			vm_map_entry_t  copy_entry;
4261 			vm_map_offset_t copy_addr;
4262 			vm_map_copy_t   target_copy_map;
4263 			vm_map_offset_t overmap_start, overmap_end;
4264 			vm_map_offset_t trimmed_start;
4265 			vm_map_size_t   target_size;
4266 
4267 			if (flags & ~(VM_FLAGS_FIXED |
4268 			    VM_FLAGS_ANYWHERE |
4269 			    VM_FLAGS_OVERWRITE |
4270 			    VM_FLAGS_RETURN_4K_DATA_ADDR |
4271 			    VM_FLAGS_RETURN_DATA_ADDR |
4272 			    VM_FLAGS_ALIAS_MASK)) {
4273 				return KERN_INVALID_ARGUMENT;
4274 			}
4275 
4276 			copy_map = named_entry->backing.copy;
4277 			assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
4278 			if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
4279 				/* unsupported type; should not happen */
4280 				printf("vm_map_enter_mem_object: "
4281 				    "memory_entry->backing.copy "
4282 				    "unsupported type 0x%x\n",
4283 				    copy_map->type);
4284 				return KERN_INVALID_ARGUMENT;
4285 			}
4286 
4287 			if (VM_MAP_PAGE_SHIFT(target_map) != copy_map->cpy_hdr.page_shift) {
4288 				DEBUG4K_SHARE("copy_map %p offset %llx size 0x%llx pgshift %d -> target_map %p pgshift %d\n", copy_map, offset, (uint64_t)map_size, copy_map->cpy_hdr.page_shift, target_map, VM_MAP_PAGE_SHIFT(target_map));
4289 			}
4290 
4291 			if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4292 			    VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4293 				offset_in_mapping = offset & VM_MAP_PAGE_MASK(target_map);
4294 				if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4295 					offset_in_mapping &= ~((signed)(0xFFF));
4296 				}
4297 			}
4298 
4299 			target_copy_map = VM_MAP_COPY_NULL;
4300 			target_size = copy_map->size;
4301 			overmap_start = 0;
4302 			overmap_end = 0;
4303 			trimmed_start = 0;
4304 			if (copy_map->cpy_hdr.page_shift != VM_MAP_PAGE_SHIFT(target_map)) {
4305 				DEBUG4K_ADJUST("adjusting...\n");
4306 				kr = vm_map_copy_adjust_to_target(
4307 					copy_map,
4308 					offset /* includes data_offset */,
4309 					initial_size,
4310 					target_map,
4311 					copy,
4312 					&target_copy_map,
4313 					&overmap_start,
4314 					&overmap_end,
4315 					&trimmed_start);
4316 				if (kr != KERN_SUCCESS) {
4317 					return kr;
4318 				}
4319 				target_size = target_copy_map->size;
4320 				if (trimmed_start >= data_offset) {
4321 					data_offset = offset & VM_MAP_PAGE_MASK(target_map);
4322 				} else {
4323 					data_offset -= trimmed_start;
4324 				}
4325 			} else {
4326 				target_copy_map = copy_map;
4327 			}
4328 
4329 			/* reserve a contiguous range */
4330 			kr = vm_map_enter(target_map,
4331 			    &map_addr,
4332 			    vm_map_round_page(target_size, VM_MAP_PAGE_MASK(target_map)),
4333 			    mask,
4334 			    flags & (VM_FLAGS_ANYWHERE |
4335 			    VM_FLAGS_OVERWRITE |
4336 			    VM_FLAGS_RETURN_4K_DATA_ADDR |
4337 			    VM_FLAGS_RETURN_DATA_ADDR),
4338 			    vmk_flags,
4339 			    tag,
4340 			    VM_OBJECT_NULL,
4341 			    0,
4342 			    FALSE,               /* copy */
4343 			    cur_protection,
4344 			    max_protection,
4345 			    inheritance);
4346 			if (kr != KERN_SUCCESS) {
4347 				DEBUG4K_ERROR("kr 0x%x\n", kr);
4348 				if (target_copy_map != copy_map) {
4349 					vm_map_copy_discard(target_copy_map);
4350 					target_copy_map = VM_MAP_COPY_NULL;
4351 				}
4352 				return kr;
4353 			}
4354 
4355 			copy_addr = map_addr;
4356 
4357 			for (copy_entry = vm_map_copy_first_entry(target_copy_map);
4358 			    copy_entry != vm_map_copy_to_entry(target_copy_map);
4359 			    copy_entry = copy_entry->vme_next) {
4360 				int                     remap_flags;
4361 				vm_map_kernel_flags_t   vmk_remap_flags;
4362 				vm_map_t                copy_submap;
4363 				vm_object_t             copy_object;
4364 				vm_map_size_t           copy_size;
4365 				vm_object_offset_t      copy_offset;
4366 				int                     copy_vm_alias;
4367 
4368 				remap_flags = 0;
4369 				vmk_remap_flags = VM_MAP_KERNEL_FLAGS_NONE;
4370 
4371 				copy_object = VME_OBJECT(copy_entry);
4372 				copy_offset = VME_OFFSET(copy_entry);
4373 				copy_size = (copy_entry->vme_end -
4374 				    copy_entry->vme_start);
4375 				VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
4376 				if (copy_vm_alias == 0) {
4377 					/*
4378 					 * Caller does not want a specific
4379 					 * alias for this new mapping:  use
4380 					 * the alias of the original mapping.
4381 					 */
4382 					copy_vm_alias = VME_ALIAS(copy_entry);
4383 				}
4384 
4385 				/* sanity check */
4386 				if ((copy_addr + copy_size) >
4387 				    (map_addr +
4388 				    overmap_start + overmap_end +
4389 				    named_entry->size /* XXX full size */)) {
4390 					/* over-mapping too much !? */
4391 					kr = KERN_INVALID_ARGUMENT;
4392 					DEBUG4K_ERROR("kr 0x%x\n", kr);
4393 					/* abort */
4394 					break;
4395 				}
4396 
4397 				/* take a reference on the object */
4398 				if (copy_entry->is_sub_map) {
4399 					vmk_remap_flags.vmkf_submap = TRUE;
4400 					copy_submap = VME_SUBMAP(copy_entry);
4401 					vm_map_lock(copy_submap);
4402 					vm_map_reference(copy_submap);
4403 					vm_map_unlock(copy_submap);
4404 					copy_object = (vm_object_t)(uintptr_t) copy_submap;
4405 				} else if (!copy &&
4406 				    copy_object != VM_OBJECT_NULL &&
4407 				    (copy_entry->needs_copy ||
4408 				    copy_object->shadowed ||
4409 				    (!copy_object->true_share &&
4410 				    !copy_entry->is_shared &&
4411 				    copy_object->vo_size > copy_size))) {
4412 					/*
4413 					 * We need to resolve our side of this
4414 					 * "symmetric" copy-on-write now; we
4415 					 * need a new object to map and share,
4416 					 * instead of the current one which
4417 					 * might still be shared with the
4418 					 * original mapping.
4419 					 *
4420 					 * Note: A "vm_map_copy_t" does not
4421 					 * have a lock but we're protected by
4422 					 * the named entry's lock here.
4423 					 */
4424 					// assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4425 					VME_OBJECT_SHADOW(copy_entry, copy_size);
4426 					if (!copy_entry->needs_copy &&
4427 					    copy_entry->protection & VM_PROT_WRITE) {
4428 						vm_prot_t prot;
4429 
4430 						prot = copy_entry->protection & ~VM_PROT_WRITE;
4431 						vm_object_pmap_protect(copy_object,
4432 						    copy_offset,
4433 						    copy_size,
4434 						    PMAP_NULL,
4435 						    PAGE_SIZE,
4436 						    0,
4437 						    prot);
4438 					}
4439 
4440 					copy_entry->needs_copy = FALSE;
4441 					copy_entry->is_shared = TRUE;
4442 					copy_object = VME_OBJECT(copy_entry);
4443 					copy_offset = VME_OFFSET(copy_entry);
4444 					vm_object_lock(copy_object);
4445 					vm_object_reference_locked(copy_object);
4446 					if (copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4447 						/* we're about to make a shared mapping of this object */
4448 						copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4449 						copy_object->true_share = TRUE;
4450 					}
4451 					vm_object_unlock(copy_object);
4452 				} else {
4453 					/*
4454 					 * We already have the right object
4455 					 * to map.
4456 					 */
4457 					copy_object = VME_OBJECT(copy_entry);
4458 					vm_object_reference(copy_object);
4459 				}
4460 
4461 				/* over-map the object into destination */
4462 				remap_flags |= flags;
4463 				remap_flags |= VM_FLAGS_FIXED;
4464 				remap_flags |= VM_FLAGS_OVERWRITE;
4465 				remap_flags &= ~VM_FLAGS_ANYWHERE;
4466 				if (!copy && !copy_entry->is_sub_map) {
4467 					/*
4468 					 * copy-on-write should have been
4469 					 * resolved at this point, or we would
4470 					 * end up sharing instead of copying.
4471 					 */
4472 					assert(!copy_entry->needs_copy);
4473 				}
4474 #if XNU_TARGET_OS_OSX
4475 				if (copy_entry->used_for_jit) {
4476 					vmk_remap_flags.vmkf_map_jit = TRUE;
4477 				}
4478 #endif /* XNU_TARGET_OS_OSX */
4479 
4480 				assertf((copy_vm_alias & VME_ALIAS_MASK) == copy_vm_alias,
4481 				    "VM Tag truncated from 0x%x to 0x%x\n", copy_vm_alias, (copy_vm_alias & VME_ALIAS_MASK));
4482 				kr = vm_map_enter(target_map,
4483 				    &copy_addr,
4484 				    copy_size,
4485 				    (vm_map_offset_t) 0,
4486 				    remap_flags,
4487 				    vmk_remap_flags,
4488 				    (vm_tag_t) copy_vm_alias, /* see comment at end of vm_fault_unwire re. cast*/
4489 				    copy_object,
4490 				    copy_offset,
4491 				    ((copy_object == NULL)
4492 				    ? FALSE
4493 				    : (copy || copy_entry->needs_copy)),
4494 				    cur_protection,
4495 				    max_protection,
4496 				    inheritance);
4497 				if (kr != KERN_SUCCESS) {
4498 					DEBUG4K_SHARE("failed kr 0x%x\n", kr);
4499 					if (copy_entry->is_sub_map) {
4500 						vm_map_deallocate(copy_submap);
4501 					} else {
4502 						vm_object_deallocate(copy_object);
4503 					}
4504 					/* abort */
4505 					break;
4506 				}
4507 
4508 				/* next mapping */
4509 				copy_addr += copy_size;
4510 			}
4511 
4512 			if (kr == KERN_SUCCESS) {
4513 				if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4514 				    VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4515 					*address = map_addr + offset_in_mapping;
4516 				} else {
4517 					*address = map_addr;
4518 				}
4519 				if (overmap_start) {
4520 					*address += overmap_start;
4521 					DEBUG4K_SHARE("map %p map_addr 0x%llx offset_in_mapping 0x%llx overmap_start 0x%llx -> *address 0x%llx\n", target_map, (uint64_t)map_addr, (uint64_t) offset_in_mapping, (uint64_t)overmap_start, (uint64_t)*address);
4522 				}
4523 			}
4524 			if (target_copy_map != copy_map) {
4525 				vm_map_copy_discard(target_copy_map);
4526 				target_copy_map = VM_MAP_COPY_NULL;
4527 			}
4528 
4529 			if (kr != KERN_SUCCESS) {
4530 				if (!(flags & VM_FLAGS_OVERWRITE)) {
4531 					/* deallocate the contiguous range */
4532 					(void) vm_deallocate(target_map,
4533 					    map_addr,
4534 					    map_size);
4535 				}
4536 			}
4537 
4538 			return kr;
4539 		}
4540 
4541 		if (named_entry->is_object) {
4542 			unsigned int    access;
4543 			vm_prot_t       protections;
4544 			unsigned int    wimg_mode;
4545 
4546 			/* we are mapping a VM object */
4547 
4548 			protections = named_entry->protection & (VM_PROT_ALL | VM_PROT_ALLEXEC);
4549 			access = GET_MAP_MEM(named_entry->protection);
4550 
4551 			if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4552 			    VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4553 				offset_in_mapping = offset - VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(target_map));
4554 				if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4555 					offset_in_mapping &= ~((signed)(0xFFF));
4556 				}
4557 				offset = VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(target_map));
4558 				map_size = VM_MAP_ROUND_PAGE((offset + offset_in_mapping + initial_size) - offset, VM_MAP_PAGE_MASK(target_map));
4559 			}
4560 
4561 			object = vm_named_entry_to_vm_object(named_entry);
4562 			assert(object != VM_OBJECT_NULL);
4563 			vm_object_lock(object);
4564 
4565 			vm_object_reference_locked(object);
4566 
4567 			wimg_mode = object->wimg_bits;
4568 			vm_prot_to_wimg(access, &wimg_mode);
4569 			if (object->wimg_bits != wimg_mode) {
4570 				vm_object_change_wimg_mode(object, wimg_mode);
4571 			}
4572 
4573 			vm_object_unlock(object);
4574 		} else {
4575 			panic("invalid VM named entry %p", named_entry);
4576 		}
4577 	} else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4578 		/*
4579 		 * JMM - This is temporary until we unify named entries
4580 		 * and raw memory objects.
4581 		 *
4582 		 * Detected fake ip_kotype for a memory object.  In
4583 		 * this case, the port isn't really a port at all, but
4584 		 * instead is just a raw memory object.
4585 		 */
4586 		if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4587 		    VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4588 			panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4589 		}
4590 
4591 		object = memory_object_to_vm_object((memory_object_t)port);
4592 		if (object == VM_OBJECT_NULL) {
4593 			return KERN_INVALID_OBJECT;
4594 		}
4595 		vm_object_reference(object);
4596 
4597 		/* wait for object (if any) to be ready */
4598 		if (object != VM_OBJECT_NULL) {
4599 			if (object == kernel_object) {
4600 				printf("Warning: Attempt to map kernel object"
4601 				    " by a non-private kernel entity\n");
4602 				return KERN_INVALID_OBJECT;
4603 			}
4604 			if (!object->pager_ready) {
4605 				vm_object_lock(object);
4606 
4607 				while (!object->pager_ready) {
4608 					vm_object_wait(object,
4609 					    VM_OBJECT_EVENT_PAGER_READY,
4610 					    THREAD_UNINT);
4611 					vm_object_lock(object);
4612 				}
4613 				vm_object_unlock(object);
4614 			}
4615 		}
4616 	} else {
4617 		return KERN_INVALID_OBJECT;
4618 	}
4619 
4620 	if (object != VM_OBJECT_NULL &&
4621 	    object->named &&
4622 	    object->pager != MEMORY_OBJECT_NULL &&
4623 	    object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4624 		memory_object_t pager;
4625 		vm_prot_t       pager_prot;
4626 		kern_return_t   kr;
4627 
4628 		/*
4629 		 * For "named" VM objects, let the pager know that the
4630 		 * memory object is being mapped.  Some pagers need to keep
4631 		 * track of this, to know when they can reclaim the memory
4632 		 * object, for example.
4633 		 * VM calls memory_object_map() for each mapping (specifying
4634 		 * the protection of each mapping) and calls
4635 		 * memory_object_last_unmap() when all the mappings are gone.
4636 		 */
4637 		pager_prot = max_protection;
4638 		if (copy) {
4639 			/*
4640 			 * Copy-On-Write mapping: won't modify the
4641 			 * memory object.
4642 			 */
4643 			pager_prot &= ~VM_PROT_WRITE;
4644 		}
4645 		vm_object_lock(object);
4646 		pager = object->pager;
4647 		if (object->named &&
4648 		    pager != MEMORY_OBJECT_NULL &&
4649 		    object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4650 			assert(object->pager_ready);
4651 			vm_object_mapping_wait(object, THREAD_UNINT);
4652 			vm_object_mapping_begin(object);
4653 			vm_object_unlock(object);
4654 
4655 			kr = memory_object_map(pager, pager_prot);
4656 			assert(kr == KERN_SUCCESS);
4657 
4658 			vm_object_lock(object);
4659 			vm_object_mapping_end(object);
4660 		}
4661 		vm_object_unlock(object);
4662 	}
4663 
4664 	/*
4665 	 *	Perform the copy if requested
4666 	 */
4667 
4668 	if (copy) {
4669 		vm_object_t             new_object;
4670 		vm_object_offset_t      new_offset;
4671 
4672 		result = vm_object_copy_strategically(object, offset,
4673 		    map_size,
4674 		    &new_object, &new_offset,
4675 		    &copy);
4676 
4677 
4678 		if (result == KERN_MEMORY_RESTART_COPY) {
4679 			boolean_t success;
4680 			boolean_t src_needs_copy;
4681 
4682 			/*
4683 			 * XXX
4684 			 * We currently ignore src_needs_copy.
4685 			 * This really is the issue of how to make
4686 			 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4687 			 * non-kernel users to use. Solution forthcoming.
4688 			 * In the meantime, since we don't allow non-kernel
4689 			 * memory managers to specify symmetric copy,
4690 			 * we won't run into problems here.
4691 			 */
4692 			new_object = object;
4693 			new_offset = offset;
4694 			success = vm_object_copy_quickly(new_object,
4695 			    new_offset,
4696 			    map_size,
4697 			    &src_needs_copy,
4698 			    &copy);
4699 			assert(success);
4700 			result = KERN_SUCCESS;
4701 		}
4702 		/*
4703 		 *	Throw away the reference to the
4704 		 *	original object, as it won't be mapped.
4705 		 */
4706 
4707 		vm_object_deallocate(object);
4708 
4709 		if (result != KERN_SUCCESS) {
4710 			return result;
4711 		}
4712 
4713 		object = new_object;
4714 		offset = new_offset;
4715 	}
4716 
4717 	/*
4718 	 * If non-kernel users want to try to prefault pages, the mapping and prefault
4719 	 * needs to be atomic.
4720 	 */
4721 	kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4722 	vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4723 
4724 #if __arm64__
4725 	if (fourk) {
4726 		/* map this object in a "4K" pager */
4727 		result = vm_map_enter_fourk(target_map,
4728 		    &map_addr,
4729 		    map_size,
4730 		    (vm_map_offset_t) mask,
4731 		    flags,
4732 		    vmk_flags,
4733 		    tag,
4734 		    object,
4735 		    offset,
4736 		    copy,
4737 		    cur_protection,
4738 		    max_protection,
4739 		    inheritance);
4740 	} else
4741 #endif /* __arm64__ */
4742 	{
4743 		result = vm_map_enter(target_map,
4744 		    &map_addr, map_size,
4745 		    (vm_map_offset_t)mask,
4746 		    flags,
4747 		    vmk_flags,
4748 		    tag,
4749 		    object, offset,
4750 		    copy,
4751 		    cur_protection, max_protection,
4752 		    inheritance);
4753 	}
4754 	if (result != KERN_SUCCESS) {
4755 		vm_object_deallocate(object);
4756 	}
4757 
4758 	/*
4759 	 * Try to prefault, and do not forget to release the vm map lock.
4760 	 */
4761 	if (result == KERN_SUCCESS && try_prefault) {
4762 		mach_vm_address_t va = map_addr;
4763 		kern_return_t kr = KERN_SUCCESS;
4764 		unsigned int i = 0;
4765 		int pmap_options;
4766 
4767 		pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
4768 		if (object->internal) {
4769 			pmap_options |= PMAP_OPTIONS_INTERNAL;
4770 		}
4771 
4772 		for (i = 0; i < page_list_count; ++i) {
4773 			if (!UPL_VALID_PAGE(page_list, i)) {
4774 				if (kernel_prefault) {
4775 					assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4776 					result = KERN_MEMORY_ERROR;
4777 					break;
4778 				}
4779 			} else {
4780 				/*
4781 				 * If this function call failed, we should stop
4782 				 * trying to optimize, other calls are likely
4783 				 * going to fail too.
4784 				 *
4785 				 * We are not gonna report an error for such
4786 				 * failure though. That's an optimization, not
4787 				 * something critical.
4788 				 */
4789 				kr = pmap_enter_options(target_map->pmap,
4790 				    va, UPL_PHYS_PAGE(page_list, i),
4791 				    cur_protection, VM_PROT_NONE,
4792 				    0, TRUE, pmap_options, NULL);
4793 				if (kr != KERN_SUCCESS) {
4794 					OSIncrementAtomic64(&vm_prefault_nb_bailout);
4795 					if (kernel_prefault) {
4796 						result = kr;
4797 					}
4798 					break;
4799 				}
4800 				OSIncrementAtomic64(&vm_prefault_nb_pages);
4801 			}
4802 
4803 			/* Next virtual address */
4804 			va += PAGE_SIZE;
4805 		}
4806 		if (vmk_flags.vmkf_keep_map_locked) {
4807 			vm_map_unlock(target_map);
4808 		}
4809 	}
4810 
4811 	if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4812 	    VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4813 		*address = map_addr + offset_in_mapping;
4814 	} else {
4815 		*address = map_addr;
4816 	}
4817 	return result;
4818 }
4819 
4820 kern_return_t
vm_map_enter_mem_object(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,ipc_port_t port,vm_object_offset_t offset,boolean_t copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)4821 vm_map_enter_mem_object(
4822 	vm_map_t                target_map,
4823 	vm_map_offset_t         *address,
4824 	vm_map_size_t           initial_size,
4825 	vm_map_offset_t         mask,
4826 	int                     flags,
4827 	vm_map_kernel_flags_t   vmk_flags,
4828 	vm_tag_t                tag,
4829 	ipc_port_t              port,
4830 	vm_object_offset_t      offset,
4831 	boolean_t               copy,
4832 	vm_prot_t               cur_protection,
4833 	vm_prot_t               max_protection,
4834 	vm_inherit_t            inheritance)
4835 {
4836 	kern_return_t ret;
4837 
4838 	ret = vm_map_enter_mem_object_helper(target_map,
4839 	    address,
4840 	    initial_size,
4841 	    mask,
4842 	    flags,
4843 	    vmk_flags,
4844 	    tag,
4845 	    port,
4846 	    offset,
4847 	    copy,
4848 	    cur_protection,
4849 	    max_protection,
4850 	    inheritance,
4851 	    NULL,
4852 	    0);
4853 
4854 #if KASAN
4855 	if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4856 		kasan_notify_address(*address, initial_size);
4857 	}
4858 #endif
4859 
4860 	return ret;
4861 }
4862 
4863 kern_return_t
vm_map_enter_mem_object_prefault(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,ipc_port_t port,vm_object_offset_t offset,vm_prot_t cur_protection,vm_prot_t max_protection,upl_page_list_ptr_t page_list,unsigned int page_list_count)4864 vm_map_enter_mem_object_prefault(
4865 	vm_map_t                target_map,
4866 	vm_map_offset_t         *address,
4867 	vm_map_size_t           initial_size,
4868 	vm_map_offset_t         mask,
4869 	int                     flags,
4870 	vm_map_kernel_flags_t   vmk_flags,
4871 	vm_tag_t                tag,
4872 	ipc_port_t              port,
4873 	vm_object_offset_t      offset,
4874 	vm_prot_t               cur_protection,
4875 	vm_prot_t               max_protection,
4876 	upl_page_list_ptr_t     page_list,
4877 	unsigned int            page_list_count)
4878 {
4879 	kern_return_t ret;
4880 
4881 	ret = vm_map_enter_mem_object_helper(target_map,
4882 	    address,
4883 	    initial_size,
4884 	    mask,
4885 	    flags,
4886 	    vmk_flags,
4887 	    tag,
4888 	    port,
4889 	    offset,
4890 	    FALSE,
4891 	    cur_protection,
4892 	    max_protection,
4893 	    VM_INHERIT_DEFAULT,
4894 	    page_list,
4895 	    page_list_count);
4896 
4897 #if KASAN
4898 	if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4899 		kasan_notify_address(*address, initial_size);
4900 	}
4901 #endif
4902 
4903 	return ret;
4904 }
4905 
4906 
4907 kern_return_t
vm_map_enter_mem_object_control(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,memory_object_control_t control,vm_object_offset_t offset,boolean_t copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)4908 vm_map_enter_mem_object_control(
4909 	vm_map_t                target_map,
4910 	vm_map_offset_t         *address,
4911 	vm_map_size_t           initial_size,
4912 	vm_map_offset_t         mask,
4913 	int                     flags,
4914 	vm_map_kernel_flags_t   vmk_flags,
4915 	vm_tag_t                tag,
4916 	memory_object_control_t control,
4917 	vm_object_offset_t      offset,
4918 	boolean_t               copy,
4919 	vm_prot_t               cur_protection,
4920 	vm_prot_t               max_protection,
4921 	vm_inherit_t            inheritance)
4922 {
4923 	vm_map_address_t        map_addr;
4924 	vm_map_size_t           map_size;
4925 	vm_object_t             object;
4926 	vm_object_size_t        size;
4927 	kern_return_t           result;
4928 	memory_object_t         pager;
4929 	vm_prot_t               pager_prot;
4930 	kern_return_t           kr;
4931 #if __arm64__
4932 	boolean_t               fourk = vmk_flags.vmkf_fourk;
4933 #endif /* __arm64__ */
4934 
4935 	/*
4936 	 * Check arguments for validity
4937 	 */
4938 	if ((target_map == VM_MAP_NULL) ||
4939 	    (cur_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4940 	    (max_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4941 	    (inheritance > VM_INHERIT_LAST_VALID) ||
4942 	    initial_size == 0) {
4943 		return KERN_INVALID_ARGUMENT;
4944 	}
4945 
4946 #if __arm64__
4947 	if (fourk && VM_MAP_PAGE_MASK(target_map) < PAGE_MASK) {
4948 		fourk = FALSE;
4949 	}
4950 
4951 	if (fourk) {
4952 		map_addr = vm_map_trunc_page(*address,
4953 		    FOURK_PAGE_MASK);
4954 		map_size = vm_map_round_page(initial_size,
4955 		    FOURK_PAGE_MASK);
4956 	} else
4957 #endif /* __arm64__ */
4958 	{
4959 		map_addr = vm_map_trunc_page(*address,
4960 		    VM_MAP_PAGE_MASK(target_map));
4961 		map_size = vm_map_round_page(initial_size,
4962 		    VM_MAP_PAGE_MASK(target_map));
4963 	}
4964 	size = vm_object_round_page(initial_size);
4965 
4966 	object = memory_object_control_to_vm_object(control);
4967 
4968 	if (object == VM_OBJECT_NULL) {
4969 		return KERN_INVALID_OBJECT;
4970 	}
4971 
4972 	if (object == kernel_object) {
4973 		printf("Warning: Attempt to map kernel object"
4974 		    " by a non-private kernel entity\n");
4975 		return KERN_INVALID_OBJECT;
4976 	}
4977 
4978 	vm_object_lock(object);
4979 	object->ref_count++;
4980 
4981 	/*
4982 	 * For "named" VM objects, let the pager know that the
4983 	 * memory object is being mapped.  Some pagers need to keep
4984 	 * track of this, to know when they can reclaim the memory
4985 	 * object, for example.
4986 	 * VM calls memory_object_map() for each mapping (specifying
4987 	 * the protection of each mapping) and calls
4988 	 * memory_object_last_unmap() when all the mappings are gone.
4989 	 */
4990 	pager_prot = max_protection;
4991 	if (copy) {
4992 		pager_prot &= ~VM_PROT_WRITE;
4993 	}
4994 	pager = object->pager;
4995 	if (object->named &&
4996 	    pager != MEMORY_OBJECT_NULL &&
4997 	    object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4998 		assert(object->pager_ready);
4999 		vm_object_mapping_wait(object, THREAD_UNINT);
5000 		vm_object_mapping_begin(object);
5001 		vm_object_unlock(object);
5002 
5003 		kr = memory_object_map(pager, pager_prot);
5004 		assert(kr == KERN_SUCCESS);
5005 
5006 		vm_object_lock(object);
5007 		vm_object_mapping_end(object);
5008 	}
5009 	vm_object_unlock(object);
5010 
5011 	/*
5012 	 *	Perform the copy if requested
5013 	 */
5014 
5015 	if (copy) {
5016 		vm_object_t             new_object;
5017 		vm_object_offset_t      new_offset;
5018 
5019 		result = vm_object_copy_strategically(object, offset, size,
5020 		    &new_object, &new_offset,
5021 		    &copy);
5022 
5023 
5024 		if (result == KERN_MEMORY_RESTART_COPY) {
5025 			boolean_t success;
5026 			boolean_t src_needs_copy;
5027 
5028 			/*
5029 			 * XXX
5030 			 * We currently ignore src_needs_copy.
5031 			 * This really is the issue of how to make
5032 			 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
5033 			 * non-kernel users to use. Solution forthcoming.
5034 			 * In the meantime, since we don't allow non-kernel
5035 			 * memory managers to specify symmetric copy,
5036 			 * we won't run into problems here.
5037 			 */
5038 			new_object = object;
5039 			new_offset = offset;
5040 			success = vm_object_copy_quickly(new_object,
5041 			    new_offset, size,
5042 			    &src_needs_copy,
5043 			    &copy);
5044 			assert(success);
5045 			result = KERN_SUCCESS;
5046 		}
5047 		/*
5048 		 *	Throw away the reference to the
5049 		 *	original object, as it won't be mapped.
5050 		 */
5051 
5052 		vm_object_deallocate(object);
5053 
5054 		if (result != KERN_SUCCESS) {
5055 			return result;
5056 		}
5057 
5058 		object = new_object;
5059 		offset = new_offset;
5060 	}
5061 
5062 #if __arm64__
5063 	if (fourk) {
5064 		result = vm_map_enter_fourk(target_map,
5065 		    &map_addr,
5066 		    map_size,
5067 		    (vm_map_offset_t)mask,
5068 		    flags,
5069 		    vmk_flags,
5070 		    tag,
5071 		    object, offset,
5072 		    copy,
5073 		    cur_protection, max_protection,
5074 		    inheritance);
5075 	} else
5076 #endif /* __arm64__ */
5077 	{
5078 		result = vm_map_enter(target_map,
5079 		    &map_addr, map_size,
5080 		    (vm_map_offset_t)mask,
5081 		    flags,
5082 		    vmk_flags,
5083 		    tag,
5084 		    object, offset,
5085 		    copy,
5086 		    cur_protection, max_protection,
5087 		    inheritance);
5088 	}
5089 	if (result != KERN_SUCCESS) {
5090 		vm_object_deallocate(object);
5091 	}
5092 	*address = map_addr;
5093 
5094 	return result;
5095 }
5096 
5097 
5098 #if     VM_CPM
5099 
5100 #ifdef MACH_ASSERT
5101 extern pmap_paddr_t     avail_start, avail_end;
5102 #endif
5103 
5104 /*
5105  *	Allocate memory in the specified map, with the caveat that
5106  *	the memory is physically contiguous.  This call may fail
5107  *	if the system can't find sufficient contiguous memory.
5108  *	This call may cause or lead to heart-stopping amounts of
5109  *	paging activity.
5110  *
5111  *	Memory obtained from this call should be freed in the
5112  *	normal way, viz., via vm_deallocate.
5113  */
5114 kern_return_t
vm_map_enter_cpm(vm_map_t map,vm_map_offset_t * addr,vm_map_size_t size,int flags)5115 vm_map_enter_cpm(
5116 	vm_map_t                map,
5117 	vm_map_offset_t *addr,
5118 	vm_map_size_t           size,
5119 	int                     flags)
5120 {
5121 	vm_object_t             cpm_obj;
5122 	pmap_t                  pmap;
5123 	vm_page_t               m, pages;
5124 	kern_return_t           kr;
5125 	vm_map_offset_t         va, start, end, offset;
5126 #if     MACH_ASSERT
5127 	vm_map_offset_t         prev_addr = 0;
5128 #endif  /* MACH_ASSERT */
5129 
5130 	boolean_t               anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
5131 	vm_tag_t tag;
5132 
5133 	if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
5134 		/* XXX TODO4K do we need to support this? */
5135 		*addr = 0;
5136 		return KERN_NOT_SUPPORTED;
5137 	}
5138 
5139 	VM_GET_FLAGS_ALIAS(flags, tag);
5140 
5141 	if (size == 0) {
5142 		*addr = 0;
5143 		return KERN_SUCCESS;
5144 	}
5145 	if (anywhere) {
5146 		*addr = vm_map_min(map);
5147 	} else {
5148 		*addr = vm_map_trunc_page(*addr,
5149 		    VM_MAP_PAGE_MASK(map));
5150 	}
5151 	size = vm_map_round_page(size,
5152 	    VM_MAP_PAGE_MASK(map));
5153 
5154 	/*
5155 	 * LP64todo - cpm_allocate should probably allow
5156 	 * allocations of >4GB, but not with the current
5157 	 * algorithm, so just cast down the size for now.
5158 	 */
5159 	if (size > VM_MAX_ADDRESS) {
5160 		return KERN_RESOURCE_SHORTAGE;
5161 	}
5162 	if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
5163 	    &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS) {
5164 		return kr;
5165 	}
5166 
5167 	cpm_obj = vm_object_allocate((vm_object_size_t)size);
5168 	assert(cpm_obj != VM_OBJECT_NULL);
5169 	assert(cpm_obj->internal);
5170 	assert(cpm_obj->vo_size == (vm_object_size_t)size);
5171 	assert(cpm_obj->can_persist == FALSE);
5172 	assert(cpm_obj->pager_created == FALSE);
5173 	assert(cpm_obj->pageout == FALSE);
5174 	assert(cpm_obj->shadow == VM_OBJECT_NULL);
5175 
5176 	/*
5177 	 *	Insert pages into object.
5178 	 */
5179 
5180 	vm_object_lock(cpm_obj);
5181 	for (offset = 0; offset < size; offset += PAGE_SIZE) {
5182 		m = pages;
5183 		pages = NEXT_PAGE(m);
5184 		*(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
5185 
5186 		assert(!m->vmp_gobbled);
5187 		assert(!m->vmp_wanted);
5188 		assert(!m->vmp_pageout);
5189 		assert(!m->vmp_tabled);
5190 		assert(VM_PAGE_WIRED(m));
5191 		assert(m->vmp_busy);
5192 		assert(VM_PAGE_GET_PHYS_PAGE(m) >= (avail_start >> PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m) <= (avail_end >> PAGE_SHIFT));
5193 
5194 		m->vmp_busy = FALSE;
5195 		vm_page_insert(m, cpm_obj, offset);
5196 	}
5197 	assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
5198 	vm_object_unlock(cpm_obj);
5199 
5200 	/*
5201 	 *	Hang onto a reference on the object in case a
5202 	 *	multi-threaded application for some reason decides
5203 	 *	to deallocate the portion of the address space into
5204 	 *	which we will insert this object.
5205 	 *
5206 	 *	Unfortunately, we must insert the object now before
5207 	 *	we can talk to the pmap module about which addresses
5208 	 *	must be wired down.  Hence, the race with a multi-
5209 	 *	threaded app.
5210 	 */
5211 	vm_object_reference(cpm_obj);
5212 
5213 	/*
5214 	 *	Insert object into map.
5215 	 */
5216 
5217 	kr = vm_map_enter(
5218 		map,
5219 		addr,
5220 		size,
5221 		(vm_map_offset_t)0,
5222 		flags,
5223 		VM_MAP_KERNEL_FLAGS_NONE,
5224 		cpm_obj,
5225 		(vm_object_offset_t)0,
5226 		FALSE,
5227 		VM_PROT_ALL,
5228 		VM_PROT_ALL,
5229 		VM_INHERIT_DEFAULT);
5230 
5231 	if (kr != KERN_SUCCESS) {
5232 		/*
5233 		 *	A CPM object doesn't have can_persist set,
5234 		 *	so all we have to do is deallocate it to
5235 		 *	free up these pages.
5236 		 */
5237 		assert(cpm_obj->pager_created == FALSE);
5238 		assert(cpm_obj->can_persist == FALSE);
5239 		assert(cpm_obj->pageout == FALSE);
5240 		assert(cpm_obj->shadow == VM_OBJECT_NULL);
5241 		vm_object_deallocate(cpm_obj); /* kill acquired ref */
5242 		vm_object_deallocate(cpm_obj); /* kill creation ref */
5243 	}
5244 
5245 	/*
5246 	 *	Inform the physical mapping system that the
5247 	 *	range of addresses may not fault, so that
5248 	 *	page tables and such can be locked down as well.
5249 	 */
5250 	start = *addr;
5251 	end = start + size;
5252 	pmap = vm_map_pmap(map);
5253 	pmap_pageable(pmap, start, end, FALSE);
5254 
5255 	/*
5256 	 *	Enter each page into the pmap, to avoid faults.
5257 	 *	Note that this loop could be coded more efficiently,
5258 	 *	if the need arose, rather than looking up each page
5259 	 *	again.
5260 	 */
5261 	for (offset = 0, va = start; offset < size;
5262 	    va += PAGE_SIZE, offset += PAGE_SIZE) {
5263 		int type_of_fault;
5264 
5265 		vm_object_lock(cpm_obj);
5266 		m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5267 		assert(m != VM_PAGE_NULL);
5268 
5269 		vm_page_zero_fill(m);
5270 
5271 		type_of_fault = DBG_ZERO_FILL_FAULT;
5272 
5273 		vm_fault_enter(m, pmap, va,
5274 		    PAGE_SIZE, 0,
5275 		    VM_PROT_ALL, VM_PROT_WRITE,
5276 		    VM_PAGE_WIRED(m),
5277 		    FALSE,                             /* change_wiring */
5278 		    VM_KERN_MEMORY_NONE,                             /* tag - not wiring */
5279 		    FALSE,                             /* no_cache */
5280 		    FALSE,                             /* cs_bypass */
5281 		    0,                                 /* user_tag */
5282 		    0,                             /* pmap_options */
5283 		    NULL,                              /* need_retry */
5284 		    &type_of_fault);
5285 
5286 		vm_object_unlock(cpm_obj);
5287 	}
5288 
5289 #if     MACH_ASSERT
5290 	/*
5291 	 *	Verify ordering in address space.
5292 	 */
5293 	for (offset = 0; offset < size; offset += PAGE_SIZE) {
5294 		vm_object_lock(cpm_obj);
5295 		m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5296 		vm_object_unlock(cpm_obj);
5297 		if (m == VM_PAGE_NULL) {
5298 			panic("vm_allocate_cpm:  obj %p off 0x%llx no page",
5299 			    cpm_obj, (uint64_t)offset);
5300 		}
5301 		assert(m->vmp_tabled);
5302 		assert(!m->vmp_busy);
5303 		assert(!m->vmp_wanted);
5304 		assert(!m->vmp_fictitious);
5305 		assert(!m->vmp_private);
5306 		assert(!m->vmp_absent);
5307 		assert(!m->vmp_error);
5308 		assert(!m->vmp_cleaning);
5309 		assert(!m->vmp_laundry);
5310 		assert(!m->vmp_precious);
5311 		assert(!m->vmp_clustered);
5312 		if (offset != 0) {
5313 			if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
5314 				printf("start 0x%llx end 0x%llx va 0x%llx\n",
5315 				    (uint64_t)start, (uint64_t)end, (uint64_t)va);
5316 				printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
5317 				printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
5318 				panic("vm_allocate_cpm:  pages not contig!");
5319 			}
5320 		}
5321 		prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
5322 	}
5323 #endif  /* MACH_ASSERT */
5324 
5325 	vm_object_deallocate(cpm_obj); /* kill extra ref */
5326 
5327 	return kr;
5328 }
5329 
5330 
5331 #else   /* VM_CPM */
5332 
5333 /*
5334  *	Interface is defined in all cases, but unless the kernel
5335  *	is built explicitly for this option, the interface does
5336  *	nothing.
5337  */
5338 
5339 kern_return_t
vm_map_enter_cpm(__unused vm_map_t map,__unused vm_map_offset_t * addr,__unused vm_map_size_t size,__unused int flags)5340 vm_map_enter_cpm(
5341 	__unused vm_map_t       map,
5342 	__unused vm_map_offset_t        *addr,
5343 	__unused vm_map_size_t  size,
5344 	__unused int            flags)
5345 {
5346 	return KERN_FAILURE;
5347 }
5348 #endif /* VM_CPM */
5349 
5350 /* Not used without nested pmaps */
5351 #ifndef NO_NESTED_PMAP
5352 /*
5353  * Clip and unnest a portion of a nested submap mapping.
5354  */
5355 
5356 
5357 static void
vm_map_clip_unnest(vm_map_t map,vm_map_entry_t entry,vm_map_offset_t start_unnest,vm_map_offset_t end_unnest)5358 vm_map_clip_unnest(
5359 	vm_map_t        map,
5360 	vm_map_entry_t  entry,
5361 	vm_map_offset_t start_unnest,
5362 	vm_map_offset_t end_unnest)
5363 {
5364 	vm_map_offset_t old_start_unnest = start_unnest;
5365 	vm_map_offset_t old_end_unnest = end_unnest;
5366 
5367 	assert(entry->is_sub_map);
5368 	assert(VME_SUBMAP(entry) != NULL);
5369 	assert(entry->use_pmap);
5370 
5371 	/*
5372 	 * Query the platform for the optimal unnest range.
5373 	 * DRK: There's some duplication of effort here, since
5374 	 * callers may have adjusted the range to some extent. This
5375 	 * routine was introduced to support 1GiB subtree nesting
5376 	 * for x86 platforms, which can also nest on 2MiB boundaries
5377 	 * depending on size/alignment.
5378 	 */
5379 	if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
5380 		assert(VME_SUBMAP(entry)->is_nested_map);
5381 		assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
5382 		log_unnest_badness(map,
5383 		    old_start_unnest,
5384 		    old_end_unnest,
5385 		    VME_SUBMAP(entry)->is_nested_map,
5386 		    (entry->vme_start +
5387 		    VME_SUBMAP(entry)->lowest_unnestable_start -
5388 		    VME_OFFSET(entry)));
5389 	}
5390 
5391 	if (entry->vme_start > start_unnest ||
5392 	    entry->vme_end < end_unnest) {
5393 		panic("vm_map_clip_unnest(0x%llx,0x%llx): "
5394 		    "bad nested entry: start=0x%llx end=0x%llx\n",
5395 		    (long long)start_unnest, (long long)end_unnest,
5396 		    (long long)entry->vme_start, (long long)entry->vme_end);
5397 	}
5398 
5399 	if (start_unnest > entry->vme_start) {
5400 		_vm_map_clip_start(&map->hdr,
5401 		    entry,
5402 		    start_unnest);
5403 		if (map->holelistenabled) {
5404 			vm_map_store_update_first_free(map, NULL, FALSE);
5405 		} else {
5406 			vm_map_store_update_first_free(map, map->first_free, FALSE);
5407 		}
5408 	}
5409 	if (entry->vme_end > end_unnest) {
5410 		_vm_map_clip_end(&map->hdr,
5411 		    entry,
5412 		    end_unnest);
5413 		if (map->holelistenabled) {
5414 			vm_map_store_update_first_free(map, NULL, FALSE);
5415 		} else {
5416 			vm_map_store_update_first_free(map, map->first_free, FALSE);
5417 		}
5418 	}
5419 
5420 	pmap_unnest(map->pmap,
5421 	    entry->vme_start,
5422 	    entry->vme_end - entry->vme_start);
5423 	if ((map->mapped_in_other_pmaps) && os_ref_get_count_raw(&map->map_refcnt) != 0) {
5424 		/* clean up parent map/maps */
5425 		vm_map_submap_pmap_clean(
5426 			map, entry->vme_start,
5427 			entry->vme_end,
5428 			VME_SUBMAP(entry),
5429 			VME_OFFSET(entry));
5430 	}
5431 	entry->use_pmap = FALSE;
5432 	if ((map->pmap != kernel_pmap) &&
5433 	    (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
5434 		VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
5435 	}
5436 }
5437 #endif  /* NO_NESTED_PMAP */
5438 
5439 /*
5440  *	vm_map_clip_start:	[ internal use only ]
5441  *
5442  *	Asserts that the given entry begins at or after
5443  *	the specified address; if necessary,
5444  *	it splits the entry into two.
5445  */
5446 void
vm_map_clip_start(vm_map_t map,vm_map_entry_t entry,vm_map_offset_t startaddr)5447 vm_map_clip_start(
5448 	vm_map_t        map,
5449 	vm_map_entry_t  entry,
5450 	vm_map_offset_t startaddr)
5451 {
5452 #ifndef NO_NESTED_PMAP
5453 	if (entry->is_sub_map &&
5454 	    entry->use_pmap &&
5455 	    startaddr >= entry->vme_start) {
5456 		vm_map_offset_t start_unnest, end_unnest;
5457 
5458 		/*
5459 		 * Make sure "startaddr" is no longer in a nested range
5460 		 * before we clip.  Unnest only the minimum range the platform
5461 		 * can handle.
5462 		 * vm_map_clip_unnest may perform additional adjustments to
5463 		 * the unnest range.
5464 		 */
5465 		start_unnest = startaddr & ~(pmap_shared_region_size_min(map->pmap) - 1);
5466 		end_unnest = start_unnest + pmap_shared_region_size_min(map->pmap);
5467 		vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5468 	}
5469 #endif /* NO_NESTED_PMAP */
5470 	if (startaddr > entry->vme_start) {
5471 		if (VME_OBJECT(entry) &&
5472 		    !entry->is_sub_map &&
5473 		    VME_OBJECT(entry)->phys_contiguous) {
5474 			pmap_remove(map->pmap,
5475 			    (addr64_t)(entry->vme_start),
5476 			    (addr64_t)(entry->vme_end));
5477 		}
5478 		if (entry->vme_atomic) {
5479 			panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)", map, entry);
5480 		}
5481 
5482 		DTRACE_VM5(
5483 			vm_map_clip_start,
5484 			vm_map_t, map,
5485 			vm_map_offset_t, entry->vme_start,
5486 			vm_map_offset_t, entry->vme_end,
5487 			vm_map_offset_t, startaddr,
5488 			int, VME_ALIAS(entry));
5489 
5490 		_vm_map_clip_start(&map->hdr, entry, startaddr);
5491 		if (map->holelistenabled) {
5492 			vm_map_store_update_first_free(map, NULL, FALSE);
5493 		} else {
5494 			vm_map_store_update_first_free(map, map->first_free, FALSE);
5495 		}
5496 	}
5497 }
5498 
5499 
5500 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5501 	MACRO_BEGIN \
5502 	if ((startaddr) > (entry)->vme_start) \
5503 	        _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5504 	MACRO_END
5505 
5506 /*
5507  *	This routine is called only when it is known that
5508  *	the entry must be split.
5509  */
5510 static void
_vm_map_clip_start(struct vm_map_header * map_header,vm_map_entry_t entry,vm_map_offset_t start)5511 _vm_map_clip_start(
5512 	struct vm_map_header    *map_header,
5513 	vm_map_entry_t          entry,
5514 	vm_map_offset_t         start)
5515 {
5516 	vm_map_entry_t  new_entry;
5517 
5518 	/*
5519 	 *	Split off the front portion --
5520 	 *	note that we must insert the new
5521 	 *	entry BEFORE this one, so that
5522 	 *	this entry has the specified starting
5523 	 *	address.
5524 	 */
5525 
5526 	if (entry->map_aligned) {
5527 		assert(VM_MAP_PAGE_ALIGNED(start,
5528 		    VM_MAP_HDR_PAGE_MASK(map_header)));
5529 	}
5530 
5531 	new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5532 	vm_map_entry_copy_full(new_entry, entry);
5533 
5534 	new_entry->vme_end = start;
5535 	assert(new_entry->vme_start < new_entry->vme_end);
5536 	VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
5537 	assert(start < entry->vme_end);
5538 	entry->vme_start = start;
5539 
5540 	_vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
5541 
5542 	if (entry->is_sub_map) {
5543 		vm_map_reference(VME_SUBMAP(new_entry));
5544 	} else {
5545 		vm_object_reference(VME_OBJECT(new_entry));
5546 	}
5547 }
5548 
5549 
5550 /*
5551  *	vm_map_clip_end:	[ internal use only ]
5552  *
5553  *	Asserts that the given entry ends at or before
5554  *	the specified address; if necessary,
5555  *	it splits the entry into two.
5556  */
5557 void
vm_map_clip_end(vm_map_t map,vm_map_entry_t entry,vm_map_offset_t endaddr)5558 vm_map_clip_end(
5559 	vm_map_t        map,
5560 	vm_map_entry_t  entry,
5561 	vm_map_offset_t endaddr)
5562 {
5563 	if (endaddr > entry->vme_end) {
5564 		/*
5565 		 * Within the scope of this clipping, limit "endaddr" to
5566 		 * the end of this map entry...
5567 		 */
5568 		endaddr = entry->vme_end;
5569 	}
5570 #ifndef NO_NESTED_PMAP
5571 	if (entry->is_sub_map && entry->use_pmap) {
5572 		vm_map_offset_t start_unnest, end_unnest;
5573 
5574 		/*
5575 		 * Make sure the range between the start of this entry and
5576 		 * the new "endaddr" is no longer nested before we clip.
5577 		 * Unnest only the minimum range the platform can handle.
5578 		 * vm_map_clip_unnest may perform additional adjustments to
5579 		 * the unnest range.
5580 		 */
5581 		start_unnest = entry->vme_start;
5582 		end_unnest =
5583 		    (endaddr + pmap_shared_region_size_min(map->pmap) - 1) &
5584 		    ~(pmap_shared_region_size_min(map->pmap) - 1);
5585 		vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5586 	}
5587 #endif /* NO_NESTED_PMAP */
5588 	if (endaddr < entry->vme_end) {
5589 		if (VME_OBJECT(entry) &&
5590 		    !entry->is_sub_map &&
5591 		    VME_OBJECT(entry)->phys_contiguous) {
5592 			pmap_remove(map->pmap,
5593 			    (addr64_t)(entry->vme_start),
5594 			    (addr64_t)(entry->vme_end));
5595 		}
5596 		if (entry->vme_atomic) {
5597 			panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)", map, entry);
5598 		}
5599 		DTRACE_VM5(
5600 			vm_map_clip_end,
5601 			vm_map_t, map,
5602 			vm_map_offset_t, entry->vme_start,
5603 			vm_map_offset_t, entry->vme_end,
5604 			vm_map_offset_t, endaddr,
5605 			int, VME_ALIAS(entry));
5606 
5607 		_vm_map_clip_end(&map->hdr, entry, endaddr);
5608 		if (map->holelistenabled) {
5609 			vm_map_store_update_first_free(map, NULL, FALSE);
5610 		} else {
5611 			vm_map_store_update_first_free(map, map->first_free, FALSE);
5612 		}
5613 	}
5614 }
5615 
5616 
5617 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5618 	MACRO_BEGIN \
5619 	if ((endaddr) < (entry)->vme_end) \
5620 	        _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5621 	MACRO_END
5622 
5623 /*
5624  *	This routine is called only when it is known that
5625  *	the entry must be split.
5626  */
5627 static void
_vm_map_clip_end(struct vm_map_header * map_header,vm_map_entry_t entry,vm_map_offset_t end)5628 _vm_map_clip_end(
5629 	struct vm_map_header    *map_header,
5630 	vm_map_entry_t          entry,
5631 	vm_map_offset_t         end)
5632 {
5633 	vm_map_entry_t  new_entry;
5634 
5635 	/*
5636 	 *	Create a new entry and insert it
5637 	 *	AFTER the specified entry
5638 	 */
5639 
5640 	if (entry->map_aligned) {
5641 		assert(VM_MAP_PAGE_ALIGNED(end,
5642 		    VM_MAP_HDR_PAGE_MASK(map_header)));
5643 	}
5644 
5645 	new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5646 	vm_map_entry_copy_full(new_entry, entry);
5647 
5648 	assert(entry->vme_start < end);
5649 	new_entry->vme_start = entry->vme_end = end;
5650 	VME_OFFSET_SET(new_entry,
5651 	    VME_OFFSET(new_entry) + (end - entry->vme_start));
5652 	assert(new_entry->vme_start < new_entry->vme_end);
5653 
5654 	_vm_map_store_entry_link(map_header, entry, new_entry);
5655 
5656 	if (entry->is_sub_map) {
5657 		vm_map_reference(VME_SUBMAP(new_entry));
5658 	} else {
5659 		vm_object_reference(VME_OBJECT(new_entry));
5660 	}
5661 }
5662 
5663 
5664 /*
5665  *	VM_MAP_RANGE_CHECK:	[ internal use only ]
5666  *
5667  *	Asserts that the starting and ending region
5668  *	addresses fall within the valid range of the map.
5669  */
5670 #define VM_MAP_RANGE_CHECK(map, start, end)     \
5671 	MACRO_BEGIN                             \
5672 	if (start < vm_map_min(map))            \
5673 	        start = vm_map_min(map);        \
5674 	if (end > vm_map_max(map))              \
5675 	        end = vm_map_max(map);          \
5676 	if (start > end)                        \
5677 	        start = end;                    \
5678 	MACRO_END
5679 
5680 /*
5681  *	vm_map_range_check:	[ internal use only ]
5682  *
5683  *	Check that the region defined by the specified start and
5684  *	end addresses are wholly contained within a single map
5685  *	entry or set of adjacent map entries of the spacified map,
5686  *	i.e. the specified region contains no unmapped space.
5687  *	If any or all of the region is unmapped, FALSE is returned.
5688  *	Otherwise, TRUE is returned and if the output argument 'entry'
5689  *	is not NULL it points to the map entry containing the start
5690  *	of the region.
5691  *
5692  *	The map is locked for reading on entry and is left locked.
5693  */
5694 static boolean_t
vm_map_range_check(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_map_entry_t * entry)5695 vm_map_range_check(
5696 	vm_map_t                map,
5697 	vm_map_offset_t         start,
5698 	vm_map_offset_t         end,
5699 	vm_map_entry_t          *entry)
5700 {
5701 	vm_map_entry_t          cur;
5702 	vm_map_offset_t         prev;
5703 
5704 	/*
5705 	 *      Basic sanity checks first
5706 	 */
5707 	if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
5708 		return FALSE;
5709 	}
5710 
5711 	/*
5712 	 *      Check first if the region starts within a valid
5713 	 *	mapping for the map.
5714 	 */
5715 	if (!vm_map_lookup_entry(map, start, &cur)) {
5716 		return FALSE;
5717 	}
5718 
5719 	/*
5720 	 *	Optimize for the case that the region is contained
5721 	 *	in a single map entry.
5722 	 */
5723 	if (entry != (vm_map_entry_t *) NULL) {
5724 		*entry = cur;
5725 	}
5726 	if (end <= cur->vme_end) {
5727 		return TRUE;
5728 	}
5729 
5730 	/*
5731 	 *      If the region is not wholly contained within a
5732 	 *      single entry, walk the entries looking for holes.
5733 	 */
5734 	prev = cur->vme_end;
5735 	cur = cur->vme_next;
5736 	while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
5737 		if (end <= cur->vme_end) {
5738 			return TRUE;
5739 		}
5740 		prev = cur->vme_end;
5741 		cur = cur->vme_next;
5742 	}
5743 	return FALSE;
5744 }
5745 
5746 /*
5747  *	vm_map_submap:		[ kernel use only ]
5748  *
5749  *	Mark the given range as handled by a subordinate map.
5750  *
5751  *	This range must have been created with vm_map_find using
5752  *	the vm_submap_object, and no other operations may have been
5753  *	performed on this range prior to calling vm_map_submap.
5754  *
5755  *	Only a limited number of operations can be performed
5756  *	within this rage after calling vm_map_submap:
5757  *		vm_fault
5758  *	[Don't try vm_map_copyin!]
5759  *
5760  *	To remove a submapping, one must first remove the
5761  *	range from the superior map, and then destroy the
5762  *	submap (if desired).  [Better yet, don't try it.]
5763  */
5764 kern_return_t
vm_map_submap(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_map_t submap,vm_map_offset_t offset,__unused boolean_t use_pmap)5765 vm_map_submap(
5766 	vm_map_t        map,
5767 	vm_map_offset_t start,
5768 	vm_map_offset_t end,
5769 	vm_map_t        submap,
5770 	vm_map_offset_t offset,
5771 #ifdef NO_NESTED_PMAP
5772 	__unused
5773 #endif  /* NO_NESTED_PMAP */
5774 	boolean_t       use_pmap)
5775 {
5776 	vm_map_entry_t          entry;
5777 	kern_return_t           result = KERN_INVALID_ARGUMENT;
5778 	vm_object_t             object;
5779 
5780 	vm_map_lock(map);
5781 
5782 	if (!vm_map_lookup_entry(map, start, &entry)) {
5783 		entry = entry->vme_next;
5784 	}
5785 
5786 	if (entry == vm_map_to_entry(map) ||
5787 	    entry->is_sub_map) {
5788 		vm_map_unlock(map);
5789 		return KERN_INVALID_ARGUMENT;
5790 	}
5791 
5792 	vm_map_clip_start(map, entry, start);
5793 	vm_map_clip_end(map, entry, end);
5794 
5795 	if ((entry->vme_start == start) && (entry->vme_end == end) &&
5796 	    (!entry->is_sub_map) &&
5797 	    ((object = VME_OBJECT(entry)) == vm_submap_object) &&
5798 	    (object->resident_page_count == 0) &&
5799 	    (object->copy == VM_OBJECT_NULL) &&
5800 	    (object->shadow == VM_OBJECT_NULL) &&
5801 	    (!object->pager_created)) {
5802 		VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
5803 		VME_OBJECT_SET(entry, VM_OBJECT_NULL);
5804 		vm_object_deallocate(object);
5805 		entry->is_sub_map = TRUE;
5806 		entry->use_pmap = FALSE;
5807 		VME_SUBMAP_SET(entry, submap);
5808 		vm_map_reference(submap);
5809 		if (submap->mapped_in_other_pmaps == FALSE &&
5810 		    vm_map_pmap(submap) != PMAP_NULL &&
5811 		    vm_map_pmap(submap) != vm_map_pmap(map)) {
5812 			/*
5813 			 * This submap is being mapped in a map
5814 			 * that uses a different pmap.
5815 			 * Set its "mapped_in_other_pmaps" flag
5816 			 * to indicate that we now need to
5817 			 * remove mappings from all pmaps rather
5818 			 * than just the submap's pmap.
5819 			 */
5820 			submap->mapped_in_other_pmaps = TRUE;
5821 		}
5822 
5823 #ifndef NO_NESTED_PMAP
5824 		if (use_pmap) {
5825 			/* nest if platform code will allow */
5826 			if (submap->pmap == NULL) {
5827 				ledger_t ledger = map->pmap->ledger;
5828 				submap->pmap = pmap_create_options(ledger,
5829 				    (vm_map_size_t) 0, 0);
5830 				if (submap->pmap == PMAP_NULL) {
5831 					vm_map_unlock(map);
5832 					return KERN_NO_SPACE;
5833 				}
5834 #if     defined(__arm__) || defined(__arm64__)
5835 				pmap_set_nested(submap->pmap);
5836 #endif
5837 			}
5838 			result = pmap_nest(map->pmap,
5839 			    (VME_SUBMAP(entry))->pmap,
5840 			    (addr64_t)start,
5841 			    (uint64_t)(end - start));
5842 			if (result) {
5843 				panic("vm_map_submap: pmap_nest failed, rc = %08X", result);
5844 			}
5845 			entry->use_pmap = TRUE;
5846 		}
5847 #else   /* NO_NESTED_PMAP */
5848 		pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
5849 #endif  /* NO_NESTED_PMAP */
5850 		result = KERN_SUCCESS;
5851 	}
5852 	vm_map_unlock(map);
5853 
5854 	return result;
5855 }
5856 
5857 /*
5858  *	vm_map_protect:
5859  *
5860  *	Sets the protection of the specified address
5861  *	region in the target map.  If "set_max" is
5862  *	specified, the maximum protection is to be set;
5863  *	otherwise, only the current protection is affected.
5864  */
5865 kern_return_t
vm_map_protect(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t new_prot,boolean_t set_max)5866 vm_map_protect(
5867 	vm_map_t        map,
5868 	vm_map_offset_t start,
5869 	vm_map_offset_t end,
5870 	vm_prot_t       new_prot,
5871 	boolean_t       set_max)
5872 {
5873 	vm_map_entry_t                  current;
5874 	vm_map_offset_t                 prev;
5875 	vm_map_entry_t                  entry;
5876 	vm_prot_t                       new_max;
5877 	int                             pmap_options = 0;
5878 	kern_return_t                   kr;
5879 
5880 	if (new_prot & VM_PROT_COPY) {
5881 		vm_map_offset_t         new_start;
5882 		vm_prot_t               cur_prot, max_prot;
5883 		vm_map_kernel_flags_t   kflags;
5884 
5885 		/* LP64todo - see below */
5886 		if (start >= map->max_offset) {
5887 			return KERN_INVALID_ADDRESS;
5888 		}
5889 
5890 		if ((new_prot & VM_PROT_ALLEXEC) &&
5891 		    map->pmap != kernel_pmap &&
5892 		    (vm_map_cs_enforcement(map)
5893 #if XNU_TARGET_OS_OSX && __arm64__
5894 		    || !VM_MAP_IS_EXOTIC(map)
5895 #endif /* XNU_TARGET_OS_OSX && __arm64__ */
5896 		    ) &&
5897 		    VM_MAP_POLICY_WX_FAIL(map)) {
5898 			DTRACE_VM3(cs_wx,
5899 			    uint64_t, (uint64_t) start,
5900 			    uint64_t, (uint64_t) end,
5901 			    vm_prot_t, new_prot);
5902 			printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5903 			    proc_selfpid(),
5904 			    (current_task()->bsd_info
5905 			    ? proc_name_address(current_task()->bsd_info)
5906 			    : "?"),
5907 			    __FUNCTION__);
5908 			return KERN_PROTECTION_FAILURE;
5909 		}
5910 
5911 		/*
5912 		 * Let vm_map_remap_extract() know that it will need to:
5913 		 * + make a copy of the mapping
5914 		 * + add VM_PROT_WRITE to the max protections
5915 		 * + remove any protections that are no longer allowed from the
5916 		 *   max protections (to avoid any WRITE/EXECUTE conflict, for
5917 		 *   example).
5918 		 * Note that "max_prot" is an IN/OUT parameter only for this
5919 		 * specific (VM_PROT_COPY) case.  It's usually an OUT parameter
5920 		 * only.
5921 		 */
5922 		max_prot = new_prot & (VM_PROT_ALL | VM_PROT_ALLEXEC);
5923 		cur_prot = VM_PROT_NONE;
5924 		kflags = VM_MAP_KERNEL_FLAGS_NONE;
5925 		kflags.vmkf_remap_prot_copy = TRUE;
5926 		kflags.vmkf_overwrite_immutable = TRUE;
5927 		new_start = start;
5928 		kr = vm_map_remap(map,
5929 		    &new_start,
5930 		    end - start,
5931 		    0, /* mask */
5932 		    VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
5933 		    kflags,
5934 		    0,
5935 		    map,
5936 		    start,
5937 		    TRUE, /* copy-on-write remapping! */
5938 		    &cur_prot, /* IN/OUT */
5939 		    &max_prot, /* IN/OUT */
5940 		    VM_INHERIT_DEFAULT);
5941 		if (kr != KERN_SUCCESS) {
5942 			return kr;
5943 		}
5944 		new_prot &= ~VM_PROT_COPY;
5945 	}
5946 
5947 	vm_map_lock(map);
5948 
5949 	/* LP64todo - remove this check when vm_map_commpage64()
5950 	 * no longer has to stuff in a map_entry for the commpage
5951 	 * above the map's max_offset.
5952 	 */
5953 	if (start >= map->max_offset) {
5954 		vm_map_unlock(map);
5955 		return KERN_INVALID_ADDRESS;
5956 	}
5957 
5958 	while (1) {
5959 		/*
5960 		 *      Lookup the entry.  If it doesn't start in a valid
5961 		 *	entry, return an error.
5962 		 */
5963 		if (!vm_map_lookup_entry(map, start, &entry)) {
5964 			vm_map_unlock(map);
5965 			return KERN_INVALID_ADDRESS;
5966 		}
5967 
5968 		if (entry->superpage_size && (start & (SUPERPAGE_SIZE - 1))) { /* extend request to whole entry */
5969 			start = SUPERPAGE_ROUND_DOWN(start);
5970 			continue;
5971 		}
5972 		break;
5973 	}
5974 	if (entry->superpage_size) {
5975 		end = SUPERPAGE_ROUND_UP(end);
5976 	}
5977 
5978 	/*
5979 	 *	Make a first pass to check for protection and address
5980 	 *	violations.
5981 	 */
5982 
5983 	current = entry;
5984 	prev = current->vme_start;
5985 	while ((current != vm_map_to_entry(map)) &&
5986 	    (current->vme_start < end)) {
5987 		/*
5988 		 * If there is a hole, return an error.
5989 		 */
5990 		if (current->vme_start != prev) {
5991 			vm_map_unlock(map);
5992 			return KERN_INVALID_ADDRESS;
5993 		}
5994 
5995 		new_max = current->max_protection;
5996 
5997 #if defined(__x86_64__)
5998 		/* Allow max mask to include execute prot bits if this map doesn't enforce CS */
5999 		if (set_max && (new_prot & VM_PROT_ALLEXEC) && !vm_map_cs_enforcement(map)) {
6000 			new_max = (new_max & ~VM_PROT_ALLEXEC) | (new_prot & VM_PROT_ALLEXEC);
6001 		}
6002 #endif
6003 		if ((new_prot & new_max) != new_prot) {
6004 			vm_map_unlock(map);
6005 			return KERN_PROTECTION_FAILURE;
6006 		}
6007 
6008 		if (current->used_for_jit &&
6009 		    pmap_has_prot_policy(map->pmap, current->translated_allow_execute, current->protection)) {
6010 			vm_map_unlock(map);
6011 			return KERN_PROTECTION_FAILURE;
6012 		}
6013 
6014 		if ((new_prot & VM_PROT_WRITE) &&
6015 		    (new_prot & VM_PROT_ALLEXEC) &&
6016 #if XNU_TARGET_OS_OSX
6017 		    map->pmap != kernel_pmap &&
6018 		    (vm_map_cs_enforcement(map)
6019 #if __arm64__
6020 		    || !VM_MAP_IS_EXOTIC(map)
6021 #endif /* __arm64__ */
6022 		    ) &&
6023 #endif /* XNU_TARGET_OS_OSX */
6024 		    !(current->used_for_jit)) {
6025 			DTRACE_VM3(cs_wx,
6026 			    uint64_t, (uint64_t) current->vme_start,
6027 			    uint64_t, (uint64_t) current->vme_end,
6028 			    vm_prot_t, new_prot);
6029 			printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
6030 			    proc_selfpid(),
6031 			    (current_task()->bsd_info
6032 			    ? proc_name_address(current_task()->bsd_info)
6033 			    : "?"),
6034 			    __FUNCTION__);
6035 			new_prot &= ~VM_PROT_ALLEXEC;
6036 			if (VM_MAP_POLICY_WX_FAIL(map)) {
6037 				vm_map_unlock(map);
6038 				return KERN_PROTECTION_FAILURE;
6039 			}
6040 		}
6041 
6042 		/*
6043 		 * If the task has requested executable lockdown,
6044 		 * deny both:
6045 		 * - adding executable protections OR
6046 		 * - adding write protections to an existing executable mapping.
6047 		 */
6048 		if (map->map_disallow_new_exec == TRUE) {
6049 			if ((new_prot & VM_PROT_ALLEXEC) ||
6050 			    ((current->protection & VM_PROT_EXECUTE) && (new_prot & VM_PROT_WRITE))) {
6051 				vm_map_unlock(map);
6052 				return KERN_PROTECTION_FAILURE;
6053 			}
6054 		}
6055 
6056 		prev = current->vme_end;
6057 		current = current->vme_next;
6058 	}
6059 
6060 #if __arm64__
6061 	if (end > prev &&
6062 	    end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
6063 		vm_map_entry_t prev_entry;
6064 
6065 		prev_entry = current->vme_prev;
6066 		if (prev_entry != vm_map_to_entry(map) &&
6067 		    !prev_entry->map_aligned &&
6068 		    (vm_map_round_page(prev_entry->vme_end,
6069 		    VM_MAP_PAGE_MASK(map))
6070 		    == end)) {
6071 			/*
6072 			 * The last entry in our range is not "map-aligned"
6073 			 * but it would have reached all the way to "end"
6074 			 * if it had been map-aligned, so this is not really
6075 			 * a hole in the range and we can proceed.
6076 			 */
6077 			prev = end;
6078 		}
6079 	}
6080 #endif /* __arm64__ */
6081 
6082 	if (end > prev) {
6083 		vm_map_unlock(map);
6084 		return KERN_INVALID_ADDRESS;
6085 	}
6086 
6087 	/*
6088 	 *	Go back and fix up protections.
6089 	 *	Clip to start here if the range starts within
6090 	 *	the entry.
6091 	 */
6092 
6093 	current = entry;
6094 	if (current != vm_map_to_entry(map)) {
6095 		/* clip and unnest if necessary */
6096 		vm_map_clip_start(map, current, start);
6097 	}
6098 
6099 	while ((current != vm_map_to_entry(map)) &&
6100 	    (current->vme_start < end)) {
6101 		vm_prot_t       old_prot;
6102 
6103 		vm_map_clip_end(map, current, end);
6104 
6105 		if (current->is_sub_map) {
6106 			/* clipping did unnest if needed */
6107 			assert(!current->use_pmap);
6108 		}
6109 
6110 		old_prot = current->protection;
6111 
6112 		if (set_max) {
6113 			current->max_protection = new_prot;
6114 			/* Consider either EXECUTE or UEXEC as EXECUTE for this masking */
6115 			current->protection = (new_prot & old_prot);
6116 		} else {
6117 			current->protection = new_prot;
6118 		}
6119 
6120 		/*
6121 		 *	Update physical map if necessary.
6122 		 *	If the request is to turn off write protection,
6123 		 *	we won't do it for real (in pmap). This is because
6124 		 *	it would cause copy-on-write to fail.  We've already
6125 		 *	set, the new protection in the map, so if a
6126 		 *	write-protect fault occurred, it will be fixed up
6127 		 *	properly, COW or not.
6128 		 */
6129 		if (current->protection != old_prot) {
6130 			/* Look one level in we support nested pmaps */
6131 			/* from mapped submaps which are direct entries */
6132 			/* in our map */
6133 
6134 			vm_prot_t prot;
6135 
6136 			prot = current->protection;
6137 			if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
6138 				prot &= ~VM_PROT_WRITE;
6139 			} else {
6140 				assert(!VME_OBJECT(current)->code_signed);
6141 				assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
6142 			}
6143 
6144 			if (override_nx(map, VME_ALIAS(current)) && prot) {
6145 				prot |= VM_PROT_EXECUTE;
6146 			}
6147 
6148 #if DEVELOPMENT || DEBUG
6149 			if (!(old_prot & VM_PROT_EXECUTE) &&
6150 			    (prot & VM_PROT_EXECUTE) &&
6151 			    panic_on_unsigned_execute &&
6152 			    (proc_selfcsflags() & CS_KILL)) {
6153 				panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
6154 			}
6155 #endif /* DEVELOPMENT || DEBUG */
6156 
6157 			if (pmap_has_prot_policy(map->pmap, current->translated_allow_execute, prot)) {
6158 				if (current->wired_count) {
6159 					panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x",
6160 					    map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
6161 				}
6162 
6163 				/* If the pmap layer cares about this
6164 				 * protection type, force a fault for
6165 				 * each page so that vm_fault will
6166 				 * repopulate the page with the full
6167 				 * set of protections.
6168 				 */
6169 				/*
6170 				 * TODO: We don't seem to need this,
6171 				 * but this is due to an internal
6172 				 * implementation detail of
6173 				 * pmap_protect.  Do we want to rely
6174 				 * on this?
6175 				 */
6176 				prot = VM_PROT_NONE;
6177 			}
6178 
6179 			if (current->is_sub_map && current->use_pmap) {
6180 				pmap_protect(VME_SUBMAP(current)->pmap,
6181 				    current->vme_start,
6182 				    current->vme_end,
6183 				    prot);
6184 			} else {
6185 				if (prot & VM_PROT_WRITE) {
6186 					if (VME_OBJECT(current) == compressor_object) {
6187 						/*
6188 						 * For write requests on the
6189 						 * compressor, we wil ask the
6190 						 * pmap layer to prevent us from
6191 						 * taking a write fault when we
6192 						 * attempt to access the mapping
6193 						 * next.
6194 						 */
6195 						pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
6196 					}
6197 				}
6198 
6199 				pmap_protect_options(map->pmap,
6200 				    current->vme_start,
6201 				    current->vme_end,
6202 				    prot,
6203 				    pmap_options,
6204 				    NULL);
6205 			}
6206 		}
6207 		current = current->vme_next;
6208 	}
6209 
6210 	current = entry;
6211 	while ((current != vm_map_to_entry(map)) &&
6212 	    (current->vme_start <= end)) {
6213 		vm_map_simplify_entry(map, current);
6214 		current = current->vme_next;
6215 	}
6216 
6217 	vm_map_unlock(map);
6218 	return KERN_SUCCESS;
6219 }
6220 
6221 /*
6222  *	vm_map_inherit:
6223  *
6224  *	Sets the inheritance of the specified address
6225  *	range in the target map.  Inheritance
6226  *	affects how the map will be shared with
6227  *	child maps at the time of vm_map_fork.
6228  */
6229 kern_return_t
vm_map_inherit(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_inherit_t new_inheritance)6230 vm_map_inherit(
6231 	vm_map_t        map,
6232 	vm_map_offset_t start,
6233 	vm_map_offset_t end,
6234 	vm_inherit_t    new_inheritance)
6235 {
6236 	vm_map_entry_t  entry;
6237 	vm_map_entry_t  temp_entry;
6238 
6239 	vm_map_lock(map);
6240 
6241 	VM_MAP_RANGE_CHECK(map, start, end);
6242 
6243 	if (vm_map_lookup_entry(map, start, &temp_entry)) {
6244 		entry = temp_entry;
6245 	} else {
6246 		temp_entry = temp_entry->vme_next;
6247 		entry = temp_entry;
6248 	}
6249 
6250 	/* first check entire range for submaps which can't support the */
6251 	/* given inheritance. */
6252 	while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6253 		if (entry->is_sub_map) {
6254 			if (new_inheritance == VM_INHERIT_COPY) {
6255 				vm_map_unlock(map);
6256 				return KERN_INVALID_ARGUMENT;
6257 			}
6258 		}
6259 
6260 		entry = entry->vme_next;
6261 	}
6262 
6263 	entry = temp_entry;
6264 	if (entry != vm_map_to_entry(map)) {
6265 		/* clip and unnest if necessary */
6266 		vm_map_clip_start(map, entry, start);
6267 	}
6268 
6269 	while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6270 		vm_map_clip_end(map, entry, end);
6271 		if (entry->is_sub_map) {
6272 			/* clip did unnest if needed */
6273 			assert(!entry->use_pmap);
6274 		}
6275 
6276 		entry->inheritance = new_inheritance;
6277 
6278 		entry = entry->vme_next;
6279 	}
6280 
6281 	vm_map_unlock(map);
6282 	return KERN_SUCCESS;
6283 }
6284 
6285 /*
6286  * Update the accounting for the amount of wired memory in this map.  If the user has
6287  * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
6288  */
6289 
6290 static kern_return_t
add_wire_counts(vm_map_t map,vm_map_entry_t entry,boolean_t user_wire)6291 add_wire_counts(
6292 	vm_map_t        map,
6293 	vm_map_entry_t  entry,
6294 	boolean_t       user_wire)
6295 {
6296 	vm_map_size_t   size;
6297 
6298 	if (user_wire) {
6299 		unsigned int total_wire_count =  vm_page_wire_count + vm_lopage_free_count;
6300 
6301 		/*
6302 		 * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
6303 		 * this map entry.
6304 		 */
6305 
6306 		if (entry->user_wired_count == 0) {
6307 			size = entry->vme_end - entry->vme_start;
6308 
6309 			/*
6310 			 * Since this is the first time the user is wiring this map entry, check to see if we're
6311 			 * exceeding the user wire limits.  There is a per map limit which is the smaller of either
6312 			 * the process's rlimit or the global vm_per_task_user_wire_limit which caps this value.  There is also
6313 			 * a system-wide limit on the amount of memory all users can wire.  If the user is over either
6314 			 * limit, then we fail.
6315 			 */
6316 
6317 			if (size + map->user_wire_size > MIN(map->user_wire_limit, vm_per_task_user_wire_limit) ||
6318 			    size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6319 				if (size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6320 #if DEVELOPMENT || DEBUG
6321 					if (panic_on_mlock_failure) {
6322 						panic("mlock: Over global wire limit. %llu bytes wired and requested to wire %llu bytes more", ptoa_64(total_wire_count), (uint64_t) size);
6323 					}
6324 #endif /* DEVELOPMENT || DEBUG */
6325 					os_atomic_inc(&vm_add_wire_count_over_global_limit, relaxed);
6326 				} else {
6327 					os_atomic_inc(&vm_add_wire_count_over_user_limit, relaxed);
6328 #if DEVELOPMENT || DEBUG
6329 					if (panic_on_mlock_failure) {
6330 						panic("mlock: Over process wire limit. %llu bytes wired and requested to wire %llu bytes more", (uint64_t) map->user_wire_size, (uint64_t) size);
6331 					}
6332 #endif /* DEVELOPMENT || DEBUG */
6333 				}
6334 				return KERN_RESOURCE_SHORTAGE;
6335 			}
6336 
6337 			/*
6338 			 * The first time the user wires an entry, we also increment the wired_count and add this to
6339 			 * the total that has been wired in the map.
6340 			 */
6341 
6342 			if (entry->wired_count >= MAX_WIRE_COUNT) {
6343 				return KERN_FAILURE;
6344 			}
6345 
6346 			entry->wired_count++;
6347 			map->user_wire_size += size;
6348 		}
6349 
6350 		if (entry->user_wired_count >= MAX_WIRE_COUNT) {
6351 			return KERN_FAILURE;
6352 		}
6353 
6354 		entry->user_wired_count++;
6355 	} else {
6356 		/*
6357 		 * The kernel's wiring the memory.  Just bump the count and continue.
6358 		 */
6359 
6360 		if (entry->wired_count >= MAX_WIRE_COUNT) {
6361 			panic("vm_map_wire: too many wirings");
6362 		}
6363 
6364 		entry->wired_count++;
6365 	}
6366 
6367 	return KERN_SUCCESS;
6368 }
6369 
6370 /*
6371  * Update the memory wiring accounting now that the given map entry is being unwired.
6372  */
6373 
6374 static void
subtract_wire_counts(vm_map_t map,vm_map_entry_t entry,boolean_t user_wire)6375 subtract_wire_counts(
6376 	vm_map_t        map,
6377 	vm_map_entry_t  entry,
6378 	boolean_t       user_wire)
6379 {
6380 	if (user_wire) {
6381 		/*
6382 		 * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
6383 		 */
6384 
6385 		if (entry->user_wired_count == 1) {
6386 			/*
6387 			 * We're removing the last user wire reference.  Decrement the wired_count and the total
6388 			 * user wired memory for this map.
6389 			 */
6390 
6391 			assert(entry->wired_count >= 1);
6392 			entry->wired_count--;
6393 			map->user_wire_size -= entry->vme_end - entry->vme_start;
6394 		}
6395 
6396 		assert(entry->user_wired_count >= 1);
6397 		entry->user_wired_count--;
6398 	} else {
6399 		/*
6400 		 * The kernel is unwiring the memory.   Just update the count.
6401 		 */
6402 
6403 		assert(entry->wired_count >= 1);
6404 		entry->wired_count--;
6405 	}
6406 }
6407 
6408 int cs_executable_wire = 0;
6409 
6410 /*
6411  *	vm_map_wire:
6412  *
6413  *	Sets the pageability of the specified address range in the
6414  *	target map as wired.  Regions specified as not pageable require
6415  *	locked-down physical memory and physical page maps.  The
6416  *	access_type variable indicates types of accesses that must not
6417  *	generate page faults.  This is checked against protection of
6418  *	memory being locked-down.
6419  *
6420  *	The map must not be locked, but a reference must remain to the
6421  *	map throughout the call.
6422  */
6423 static kern_return_t
vm_map_wire_nested(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t caller_prot,vm_tag_t tag,boolean_t user_wire,pmap_t map_pmap,vm_map_offset_t pmap_addr,ppnum_t * physpage_p)6424 vm_map_wire_nested(
6425 	vm_map_t                map,
6426 	vm_map_offset_t         start,
6427 	vm_map_offset_t         end,
6428 	vm_prot_t               caller_prot,
6429 	vm_tag_t                tag,
6430 	boolean_t               user_wire,
6431 	pmap_t                  map_pmap,
6432 	vm_map_offset_t         pmap_addr,
6433 	ppnum_t                 *physpage_p)
6434 {
6435 	vm_map_entry_t          entry;
6436 	vm_prot_t               access_type;
6437 	struct vm_map_entry     *first_entry, tmp_entry;
6438 	vm_map_t                real_map;
6439 	vm_map_offset_t         s, e;
6440 	kern_return_t           rc;
6441 	boolean_t               need_wakeup;
6442 	boolean_t               main_map = FALSE;
6443 	wait_interrupt_t        interruptible_state;
6444 	thread_t                cur_thread;
6445 	unsigned int            last_timestamp;
6446 	vm_map_size_t           size;
6447 	boolean_t               wire_and_extract;
6448 	vm_prot_t               extra_prots;
6449 
6450 	extra_prots = VM_PROT_COPY;
6451 	extra_prots |= VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6452 #if XNU_TARGET_OS_OSX
6453 	if (map->pmap == kernel_pmap ||
6454 	    !vm_map_cs_enforcement(map)) {
6455 		extra_prots &= ~VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6456 	}
6457 #endif /* XNU_TARGET_OS_OSX */
6458 
6459 	access_type = (caller_prot & (VM_PROT_ALL | VM_PROT_ALLEXEC));
6460 
6461 	wire_and_extract = FALSE;
6462 	if (physpage_p != NULL) {
6463 		/*
6464 		 * The caller wants the physical page number of the
6465 		 * wired page.  We return only one physical page number
6466 		 * so this works for only one page at a time.
6467 		 */
6468 		if ((end - start) != PAGE_SIZE) {
6469 			return KERN_INVALID_ARGUMENT;
6470 		}
6471 		wire_and_extract = TRUE;
6472 		*physpage_p = 0;
6473 	}
6474 
6475 	vm_map_lock(map);
6476 	if (map_pmap == NULL) {
6477 		main_map = TRUE;
6478 	}
6479 	last_timestamp = map->timestamp;
6480 
6481 	VM_MAP_RANGE_CHECK(map, start, end);
6482 	assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6483 	assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
6484 
6485 	if (start == end) {
6486 		/* We wired what the caller asked for, zero pages */
6487 		vm_map_unlock(map);
6488 		return KERN_SUCCESS;
6489 	}
6490 
6491 	need_wakeup = FALSE;
6492 	cur_thread = current_thread();
6493 
6494 	s = start;
6495 	rc = KERN_SUCCESS;
6496 
6497 	if (vm_map_lookup_entry(map, s, &first_entry)) {
6498 		entry = first_entry;
6499 		/*
6500 		 * vm_map_clip_start will be done later.
6501 		 * We don't want to unnest any nested submaps here !
6502 		 */
6503 	} else {
6504 		/* Start address is not in map */
6505 		rc = KERN_INVALID_ADDRESS;
6506 		goto done;
6507 	}
6508 
6509 	while ((entry != vm_map_to_entry(map)) && (s < end)) {
6510 		/*
6511 		 * At this point, we have wired from "start" to "s".
6512 		 * We still need to wire from "s" to "end".
6513 		 *
6514 		 * "entry" hasn't been clipped, so it could start before "s"
6515 		 * and/or end after "end".
6516 		 */
6517 
6518 		/* "e" is how far we want to wire in this entry */
6519 		e = entry->vme_end;
6520 		if (e > end) {
6521 			e = end;
6522 		}
6523 
6524 		/*
6525 		 * If another thread is wiring/unwiring this entry then
6526 		 * block after informing other thread to wake us up.
6527 		 */
6528 		if (entry->in_transition) {
6529 			wait_result_t wait_result;
6530 
6531 			/*
6532 			 * We have not clipped the entry.  Make sure that
6533 			 * the start address is in range so that the lookup
6534 			 * below will succeed.
6535 			 * "s" is the current starting point: we've already
6536 			 * wired from "start" to "s" and we still have
6537 			 * to wire from "s" to "end".
6538 			 */
6539 
6540 			entry->needs_wakeup = TRUE;
6541 
6542 			/*
6543 			 * wake up anybody waiting on entries that we have
6544 			 * already wired.
6545 			 */
6546 			if (need_wakeup) {
6547 				vm_map_entry_wakeup(map);
6548 				need_wakeup = FALSE;
6549 			}
6550 			/*
6551 			 * User wiring is interruptible
6552 			 */
6553 			wait_result = vm_map_entry_wait(map,
6554 			    (user_wire) ? THREAD_ABORTSAFE :
6555 			    THREAD_UNINT);
6556 			if (user_wire && wait_result == THREAD_INTERRUPTED) {
6557 				/*
6558 				 * undo the wirings we have done so far
6559 				 * We do not clear the needs_wakeup flag,
6560 				 * because we cannot tell if we were the
6561 				 * only one waiting.
6562 				 */
6563 				rc = KERN_FAILURE;
6564 				goto done;
6565 			}
6566 
6567 			/*
6568 			 * Cannot avoid a lookup here. reset timestamp.
6569 			 */
6570 			last_timestamp = map->timestamp;
6571 
6572 			/*
6573 			 * The entry could have been clipped, look it up again.
6574 			 * Worse that can happen is, it may not exist anymore.
6575 			 */
6576 			if (!vm_map_lookup_entry(map, s, &first_entry)) {
6577 				/*
6578 				 * User: undo everything upto the previous
6579 				 * entry.  let vm_map_unwire worry about
6580 				 * checking the validity of the range.
6581 				 */
6582 				rc = KERN_FAILURE;
6583 				goto done;
6584 			}
6585 			entry = first_entry;
6586 			continue;
6587 		}
6588 
6589 		if (entry->is_sub_map) {
6590 			vm_map_offset_t sub_start;
6591 			vm_map_offset_t sub_end;
6592 			vm_map_offset_t local_start;
6593 			vm_map_offset_t local_end;
6594 			pmap_t          pmap;
6595 
6596 			if (wire_and_extract) {
6597 				/*
6598 				 * Wiring would result in copy-on-write
6599 				 * which would not be compatible with
6600 				 * the sharing we have with the original
6601 				 * provider of this memory.
6602 				 */
6603 				rc = KERN_INVALID_ARGUMENT;
6604 				goto done;
6605 			}
6606 
6607 			vm_map_clip_start(map, entry, s);
6608 			vm_map_clip_end(map, entry, end);
6609 
6610 			sub_start = VME_OFFSET(entry);
6611 			sub_end = entry->vme_end;
6612 			sub_end += VME_OFFSET(entry) - entry->vme_start;
6613 
6614 			local_end = entry->vme_end;
6615 			if (map_pmap == NULL) {
6616 				vm_object_t             object;
6617 				vm_object_offset_t      offset;
6618 				vm_prot_t               prot;
6619 				boolean_t               wired;
6620 				vm_map_entry_t          local_entry;
6621 				vm_map_version_t         version;
6622 				vm_map_t                lookup_map;
6623 
6624 				if (entry->use_pmap) {
6625 					pmap = VME_SUBMAP(entry)->pmap;
6626 					/* ppc implementation requires that */
6627 					/* submaps pmap address ranges line */
6628 					/* up with parent map */
6629 #ifdef notdef
6630 					pmap_addr = sub_start;
6631 #endif
6632 					pmap_addr = s;
6633 				} else {
6634 					pmap = map->pmap;
6635 					pmap_addr = s;
6636 				}
6637 
6638 				if (entry->wired_count) {
6639 					if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6640 						goto done;
6641 					}
6642 
6643 					/*
6644 					 * The map was not unlocked:
6645 					 * no need to goto re-lookup.
6646 					 * Just go directly to next entry.
6647 					 */
6648 					entry = entry->vme_next;
6649 					s = entry->vme_start;
6650 					continue;
6651 				}
6652 
6653 				/* call vm_map_lookup_locked to */
6654 				/* cause any needs copy to be   */
6655 				/* evaluated */
6656 				local_start = entry->vme_start;
6657 				lookup_map = map;
6658 				vm_map_lock_write_to_read(map);
6659 				rc = vm_map_lookup_locked(
6660 					&lookup_map, local_start,
6661 					(access_type | extra_prots),
6662 					OBJECT_LOCK_EXCLUSIVE,
6663 					&version, &object,
6664 					&offset, &prot, &wired,
6665 					NULL,
6666 					&real_map, NULL);
6667 				if (rc != KERN_SUCCESS) {
6668 					vm_map_unlock_read(lookup_map);
6669 					assert(map_pmap == NULL);
6670 					vm_map_unwire(map, start,
6671 					    s, user_wire);
6672 					return rc;
6673 				}
6674 				vm_object_unlock(object);
6675 				if (real_map != lookup_map) {
6676 					vm_map_unlock(real_map);
6677 				}
6678 				vm_map_unlock_read(lookup_map);
6679 				vm_map_lock(map);
6680 
6681 				/* we unlocked, so must re-lookup */
6682 				if (!vm_map_lookup_entry(map,
6683 				    local_start,
6684 				    &local_entry)) {
6685 					rc = KERN_FAILURE;
6686 					goto done;
6687 				}
6688 
6689 				/*
6690 				 * entry could have been "simplified",
6691 				 * so re-clip
6692 				 */
6693 				entry = local_entry;
6694 				assert(s == local_start);
6695 				vm_map_clip_start(map, entry, s);
6696 				vm_map_clip_end(map, entry, end);
6697 				/* re-compute "e" */
6698 				e = entry->vme_end;
6699 				if (e > end) {
6700 					e = end;
6701 				}
6702 
6703 				/* did we have a change of type? */
6704 				if (!entry->is_sub_map) {
6705 					last_timestamp = map->timestamp;
6706 					continue;
6707 				}
6708 			} else {
6709 				local_start = entry->vme_start;
6710 				pmap = map_pmap;
6711 			}
6712 
6713 			if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6714 				goto done;
6715 			}
6716 
6717 			entry->in_transition = TRUE;
6718 
6719 			vm_map_unlock(map);
6720 			rc = vm_map_wire_nested(VME_SUBMAP(entry),
6721 			    sub_start, sub_end,
6722 			    caller_prot, tag,
6723 			    user_wire, pmap, pmap_addr,
6724 			    NULL);
6725 			vm_map_lock(map);
6726 
6727 			/*
6728 			 * Find the entry again.  It could have been clipped
6729 			 * after we unlocked the map.
6730 			 */
6731 			if (!vm_map_lookup_entry(map, local_start,
6732 			    &first_entry)) {
6733 				panic("vm_map_wire: re-lookup failed");
6734 			}
6735 			entry = first_entry;
6736 
6737 			assert(local_start == s);
6738 			/* re-compute "e" */
6739 			e = entry->vme_end;
6740 			if (e > end) {
6741 				e = end;
6742 			}
6743 
6744 			last_timestamp = map->timestamp;
6745 			while ((entry != vm_map_to_entry(map)) &&
6746 			    (entry->vme_start < e)) {
6747 				assert(entry->in_transition);
6748 				entry->in_transition = FALSE;
6749 				if (entry->needs_wakeup) {
6750 					entry->needs_wakeup = FALSE;
6751 					need_wakeup = TRUE;
6752 				}
6753 				if (rc != KERN_SUCCESS) {/* from vm_*_wire */
6754 					subtract_wire_counts(map, entry, user_wire);
6755 				}
6756 				entry = entry->vme_next;
6757 			}
6758 			if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
6759 				goto done;
6760 			}
6761 
6762 			/* no need to relookup again */
6763 			s = entry->vme_start;
6764 			continue;
6765 		}
6766 
6767 		/*
6768 		 * If this entry is already wired then increment
6769 		 * the appropriate wire reference count.
6770 		 */
6771 		if (entry->wired_count) {
6772 			if ((entry->protection & access_type) != access_type) {
6773 				/* found a protection problem */
6774 
6775 				/*
6776 				 * XXX FBDP
6777 				 * We should always return an error
6778 				 * in this case but since we didn't
6779 				 * enforce it before, let's do
6780 				 * it only for the new "wire_and_extract"
6781 				 * code path for now...
6782 				 */
6783 				if (wire_and_extract) {
6784 					rc = KERN_PROTECTION_FAILURE;
6785 					goto done;
6786 				}
6787 			}
6788 
6789 			/*
6790 			 * entry is already wired down, get our reference
6791 			 * after clipping to our range.
6792 			 */
6793 			vm_map_clip_start(map, entry, s);
6794 			vm_map_clip_end(map, entry, end);
6795 
6796 			if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6797 				goto done;
6798 			}
6799 
6800 			if (wire_and_extract) {
6801 				vm_object_t             object;
6802 				vm_object_offset_t      offset;
6803 				vm_page_t               m;
6804 
6805 				/*
6806 				 * We don't have to "wire" the page again
6807 				 * bit we still have to "extract" its
6808 				 * physical page number, after some sanity
6809 				 * checks.
6810 				 */
6811 				assert((entry->vme_end - entry->vme_start)
6812 				    == PAGE_SIZE);
6813 				assert(!entry->needs_copy);
6814 				assert(!entry->is_sub_map);
6815 				assert(VME_OBJECT(entry));
6816 				if (((entry->vme_end - entry->vme_start)
6817 				    != PAGE_SIZE) ||
6818 				    entry->needs_copy ||
6819 				    entry->is_sub_map ||
6820 				    VME_OBJECT(entry) == VM_OBJECT_NULL) {
6821 					rc = KERN_INVALID_ARGUMENT;
6822 					goto done;
6823 				}
6824 
6825 				object = VME_OBJECT(entry);
6826 				offset = VME_OFFSET(entry);
6827 				/* need exclusive lock to update m->dirty */
6828 				if (entry->protection & VM_PROT_WRITE) {
6829 					vm_object_lock(object);
6830 				} else {
6831 					vm_object_lock_shared(object);
6832 				}
6833 				m = vm_page_lookup(object, offset);
6834 				assert(m != VM_PAGE_NULL);
6835 				assert(VM_PAGE_WIRED(m));
6836 				if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6837 					*physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
6838 					if (entry->protection & VM_PROT_WRITE) {
6839 						vm_object_lock_assert_exclusive(
6840 							object);
6841 						m->vmp_dirty = TRUE;
6842 					}
6843 				} else {
6844 					/* not already wired !? */
6845 					*physpage_p = 0;
6846 				}
6847 				vm_object_unlock(object);
6848 			}
6849 
6850 			/* map was not unlocked: no need to relookup */
6851 			entry = entry->vme_next;
6852 			s = entry->vme_start;
6853 			continue;
6854 		}
6855 
6856 		/*
6857 		 * Unwired entry or wire request transmitted via submap
6858 		 */
6859 
6860 		/*
6861 		 * Wiring would copy the pages to the shadow object.
6862 		 * The shadow object would not be code-signed so
6863 		 * attempting to execute code from these copied pages
6864 		 * would trigger a code-signing violation.
6865 		 */
6866 
6867 		if ((entry->protection & VM_PROT_EXECUTE)
6868 #if XNU_TARGET_OS_OSX
6869 		    &&
6870 		    map->pmap != kernel_pmap &&
6871 		    (vm_map_cs_enforcement(map)
6872 #if __arm64__
6873 		    || !VM_MAP_IS_EXOTIC(map)
6874 #endif /* __arm64__ */
6875 		    )
6876 #endif /* XNU_TARGET_OS_OSX */
6877 		    ) {
6878 #if MACH_ASSERT
6879 			printf("pid %d[%s] wiring executable range from "
6880 			    "0x%llx to 0x%llx: rejected to preserve "
6881 			    "code-signing\n",
6882 			    proc_selfpid(),
6883 			    (current_task()->bsd_info
6884 			    ? proc_name_address(current_task()->bsd_info)
6885 			    : "?"),
6886 			    (uint64_t) entry->vme_start,
6887 			    (uint64_t) entry->vme_end);
6888 #endif /* MACH_ASSERT */
6889 			DTRACE_VM2(cs_executable_wire,
6890 			    uint64_t, (uint64_t)entry->vme_start,
6891 			    uint64_t, (uint64_t)entry->vme_end);
6892 			cs_executable_wire++;
6893 			rc = KERN_PROTECTION_FAILURE;
6894 			goto done;
6895 		}
6896 
6897 		/*
6898 		 * Perform actions of vm_map_lookup that need the write
6899 		 * lock on the map: create a shadow object for a
6900 		 * copy-on-write region, or an object for a zero-fill
6901 		 * region.
6902 		 */
6903 		size = entry->vme_end - entry->vme_start;
6904 		/*
6905 		 * If wiring a copy-on-write page, we need to copy it now
6906 		 * even if we're only (currently) requesting read access.
6907 		 * This is aggressive, but once it's wired we can't move it.
6908 		 */
6909 		if (entry->needs_copy) {
6910 			if (wire_and_extract) {
6911 				/*
6912 				 * We're supposed to share with the original
6913 				 * provider so should not be "needs_copy"
6914 				 */
6915 				rc = KERN_INVALID_ARGUMENT;
6916 				goto done;
6917 			}
6918 
6919 			VME_OBJECT_SHADOW(entry, size);
6920 			entry->needs_copy = FALSE;
6921 		} else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
6922 			if (wire_and_extract) {
6923 				/*
6924 				 * We're supposed to share with the original
6925 				 * provider so should already have an object.
6926 				 */
6927 				rc = KERN_INVALID_ARGUMENT;
6928 				goto done;
6929 			}
6930 			VME_OBJECT_SET(entry, vm_object_allocate(size));
6931 			VME_OFFSET_SET(entry, (vm_object_offset_t)0);
6932 			assert(entry->use_pmap);
6933 		}
6934 
6935 		vm_map_clip_start(map, entry, s);
6936 		vm_map_clip_end(map, entry, end);
6937 
6938 		/* re-compute "e" */
6939 		e = entry->vme_end;
6940 		if (e > end) {
6941 			e = end;
6942 		}
6943 
6944 		/*
6945 		 * Check for holes and protection mismatch.
6946 		 * Holes: Next entry should be contiguous unless this
6947 		 *	  is the end of the region.
6948 		 * Protection: Access requested must be allowed, unless
6949 		 *	wiring is by protection class
6950 		 */
6951 		if ((entry->vme_end < end) &&
6952 		    ((entry->vme_next == vm_map_to_entry(map)) ||
6953 		    (entry->vme_next->vme_start > entry->vme_end))) {
6954 			/* found a hole */
6955 			rc = KERN_INVALID_ADDRESS;
6956 			goto done;
6957 		}
6958 		if ((entry->protection & access_type) != access_type) {
6959 			/* found a protection problem */
6960 			rc = KERN_PROTECTION_FAILURE;
6961 			goto done;
6962 		}
6963 
6964 		assert(entry->wired_count == 0 && entry->user_wired_count == 0);
6965 
6966 		if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6967 			goto done;
6968 		}
6969 
6970 		entry->in_transition = TRUE;
6971 
6972 		/*
6973 		 * This entry might get split once we unlock the map.
6974 		 * In vm_fault_wire(), we need the current range as
6975 		 * defined by this entry.  In order for this to work
6976 		 * along with a simultaneous clip operation, we make a
6977 		 * temporary copy of this entry and use that for the
6978 		 * wiring.  Note that the underlying objects do not
6979 		 * change during a clip.
6980 		 */
6981 		tmp_entry = *entry;
6982 
6983 		/*
6984 		 * The in_transition state guarentees that the entry
6985 		 * (or entries for this range, if split occured) will be
6986 		 * there when the map lock is acquired for the second time.
6987 		 */
6988 		vm_map_unlock(map);
6989 
6990 		if (!user_wire && cur_thread != THREAD_NULL) {
6991 			interruptible_state = thread_interrupt_level(THREAD_UNINT);
6992 		} else {
6993 			interruptible_state = THREAD_UNINT;
6994 		}
6995 
6996 		if (map_pmap) {
6997 			rc = vm_fault_wire(map,
6998 			    &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
6999 			    physpage_p);
7000 		} else {
7001 			rc = vm_fault_wire(map,
7002 			    &tmp_entry, caller_prot, tag, map->pmap,
7003 			    tmp_entry.vme_start,
7004 			    physpage_p);
7005 		}
7006 
7007 		if (!user_wire && cur_thread != THREAD_NULL) {
7008 			thread_interrupt_level(interruptible_state);
7009 		}
7010 
7011 		vm_map_lock(map);
7012 
7013 		if (last_timestamp + 1 != map->timestamp) {
7014 			/*
7015 			 * Find the entry again.  It could have been clipped
7016 			 * after we unlocked the map.
7017 			 */
7018 			if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7019 			    &first_entry)) {
7020 				panic("vm_map_wire: re-lookup failed");
7021 			}
7022 
7023 			entry = first_entry;
7024 		}
7025 
7026 		last_timestamp = map->timestamp;
7027 
7028 		while ((entry != vm_map_to_entry(map)) &&
7029 		    (entry->vme_start < tmp_entry.vme_end)) {
7030 			assert(entry->in_transition);
7031 			entry->in_transition = FALSE;
7032 			if (entry->needs_wakeup) {
7033 				entry->needs_wakeup = FALSE;
7034 				need_wakeup = TRUE;
7035 			}
7036 			if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
7037 				subtract_wire_counts(map, entry, user_wire);
7038 			}
7039 			entry = entry->vme_next;
7040 		}
7041 
7042 		if (rc != KERN_SUCCESS) {               /* from vm_*_wire */
7043 			goto done;
7044 		}
7045 
7046 		if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
7047 		    (tmp_entry.vme_end != end) &&    /* AND, we are not at the end of the requested range */
7048 		    (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
7049 			/* found a "new" hole */
7050 			s = tmp_entry.vme_end;
7051 			rc = KERN_INVALID_ADDRESS;
7052 			goto done;
7053 		}
7054 
7055 		s = entry->vme_start;
7056 	} /* end while loop through map entries */
7057 
7058 done:
7059 	if (rc == KERN_SUCCESS) {
7060 		/* repair any damage we may have made to the VM map */
7061 		vm_map_simplify_range(map, start, end);
7062 	}
7063 
7064 	vm_map_unlock(map);
7065 
7066 	/*
7067 	 * wake up anybody waiting on entries we wired.
7068 	 */
7069 	if (need_wakeup) {
7070 		vm_map_entry_wakeup(map);
7071 	}
7072 
7073 	if (rc != KERN_SUCCESS) {
7074 		/* undo what has been wired so far */
7075 		vm_map_unwire_nested(map, start, s, user_wire,
7076 		    map_pmap, pmap_addr);
7077 		if (physpage_p) {
7078 			*physpage_p = 0;
7079 		}
7080 	}
7081 
7082 	return rc;
7083 }
7084 
7085 kern_return_t
vm_map_wire_external(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t caller_prot,boolean_t user_wire)7086 vm_map_wire_external(
7087 	vm_map_t                map,
7088 	vm_map_offset_t         start,
7089 	vm_map_offset_t         end,
7090 	vm_prot_t               caller_prot,
7091 	boolean_t               user_wire)
7092 {
7093 	kern_return_t   kret;
7094 
7095 	kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
7096 	    user_wire, (pmap_t)NULL, 0, NULL);
7097 	return kret;
7098 }
7099 
7100 kern_return_t
vm_map_wire_kernel(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t caller_prot,vm_tag_t tag,boolean_t user_wire)7101 vm_map_wire_kernel(
7102 	vm_map_t                map,
7103 	vm_map_offset_t         start,
7104 	vm_map_offset_t         end,
7105 	vm_prot_t               caller_prot,
7106 	vm_tag_t                tag,
7107 	boolean_t               user_wire)
7108 {
7109 	kern_return_t   kret;
7110 
7111 	kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
7112 	    user_wire, (pmap_t)NULL, 0, NULL);
7113 	return kret;
7114 }
7115 
7116 kern_return_t
vm_map_wire_and_extract_external(vm_map_t map,vm_map_offset_t start,vm_prot_t caller_prot,boolean_t user_wire,ppnum_t * physpage_p)7117 vm_map_wire_and_extract_external(
7118 	vm_map_t        map,
7119 	vm_map_offset_t start,
7120 	vm_prot_t       caller_prot,
7121 	boolean_t       user_wire,
7122 	ppnum_t         *physpage_p)
7123 {
7124 	kern_return_t   kret;
7125 
7126 	kret = vm_map_wire_nested(map,
7127 	    start,
7128 	    start + VM_MAP_PAGE_SIZE(map),
7129 	    caller_prot,
7130 	    vm_tag_bt(),
7131 	    user_wire,
7132 	    (pmap_t)NULL,
7133 	    0,
7134 	    physpage_p);
7135 	if (kret != KERN_SUCCESS &&
7136 	    physpage_p != NULL) {
7137 		*physpage_p = 0;
7138 	}
7139 	return kret;
7140 }
7141 
7142 kern_return_t
vm_map_wire_and_extract_kernel(vm_map_t map,vm_map_offset_t start,vm_prot_t caller_prot,vm_tag_t tag,boolean_t user_wire,ppnum_t * physpage_p)7143 vm_map_wire_and_extract_kernel(
7144 	vm_map_t        map,
7145 	vm_map_offset_t start,
7146 	vm_prot_t       caller_prot,
7147 	vm_tag_t        tag,
7148 	boolean_t       user_wire,
7149 	ppnum_t         *physpage_p)
7150 {
7151 	kern_return_t   kret;
7152 
7153 	kret = vm_map_wire_nested(map,
7154 	    start,
7155 	    start + VM_MAP_PAGE_SIZE(map),
7156 	    caller_prot,
7157 	    tag,
7158 	    user_wire,
7159 	    (pmap_t)NULL,
7160 	    0,
7161 	    physpage_p);
7162 	if (kret != KERN_SUCCESS &&
7163 	    physpage_p != NULL) {
7164 		*physpage_p = 0;
7165 	}
7166 	return kret;
7167 }
7168 
7169 /*
7170  *	vm_map_unwire:
7171  *
7172  *	Sets the pageability of the specified address range in the target
7173  *	as pageable.  Regions specified must have been wired previously.
7174  *
7175  *	The map must not be locked, but a reference must remain to the map
7176  *	throughout the call.
7177  *
7178  *	Kernel will panic on failures.  User unwire ignores holes and
7179  *	unwired and intransition entries to avoid losing memory by leaving
7180  *	it unwired.
7181  */
7182 static kern_return_t
vm_map_unwire_nested(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,boolean_t user_wire,pmap_t map_pmap,vm_map_offset_t pmap_addr)7183 vm_map_unwire_nested(
7184 	vm_map_t                map,
7185 	vm_map_offset_t         start,
7186 	vm_map_offset_t         end,
7187 	boolean_t               user_wire,
7188 	pmap_t                  map_pmap,
7189 	vm_map_offset_t         pmap_addr)
7190 {
7191 	vm_map_entry_t          entry;
7192 	struct vm_map_entry     *first_entry, tmp_entry;
7193 	boolean_t               need_wakeup;
7194 	boolean_t               main_map = FALSE;
7195 	unsigned int            last_timestamp;
7196 
7197 	vm_map_lock(map);
7198 	if (map_pmap == NULL) {
7199 		main_map = TRUE;
7200 	}
7201 	last_timestamp = map->timestamp;
7202 
7203 	VM_MAP_RANGE_CHECK(map, start, end);
7204 	assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
7205 	assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
7206 
7207 	if (start == end) {
7208 		/* We unwired what the caller asked for: zero pages */
7209 		vm_map_unlock(map);
7210 		return KERN_SUCCESS;
7211 	}
7212 
7213 	if (vm_map_lookup_entry(map, start, &first_entry)) {
7214 		entry = first_entry;
7215 		/*
7216 		 * vm_map_clip_start will be done later.
7217 		 * We don't want to unnest any nested sub maps here !
7218 		 */
7219 	} else {
7220 		if (!user_wire) {
7221 			panic("vm_map_unwire: start not found");
7222 		}
7223 		/*	Start address is not in map. */
7224 		vm_map_unlock(map);
7225 		return KERN_INVALID_ADDRESS;
7226 	}
7227 
7228 	if (entry->superpage_size) {
7229 		/* superpages are always wired */
7230 		vm_map_unlock(map);
7231 		return KERN_INVALID_ADDRESS;
7232 	}
7233 
7234 	need_wakeup = FALSE;
7235 	while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
7236 		if (entry->in_transition) {
7237 			/*
7238 			 * 1)
7239 			 * Another thread is wiring down this entry. Note
7240 			 * that if it is not for the other thread we would
7241 			 * be unwiring an unwired entry.  This is not
7242 			 * permitted.  If we wait, we will be unwiring memory
7243 			 * we did not wire.
7244 			 *
7245 			 * 2)
7246 			 * Another thread is unwiring this entry.  We did not
7247 			 * have a reference to it, because if we did, this
7248 			 * entry will not be getting unwired now.
7249 			 */
7250 			if (!user_wire) {
7251 				/*
7252 				 * XXX FBDP
7253 				 * This could happen:  there could be some
7254 				 * overlapping vslock/vsunlock operations
7255 				 * going on.
7256 				 * We should probably just wait and retry,
7257 				 * but then we have to be careful that this
7258 				 * entry could get "simplified" after
7259 				 * "in_transition" gets unset and before
7260 				 * we re-lookup the entry, so we would
7261 				 * have to re-clip the entry to avoid
7262 				 * re-unwiring what we have already unwired...
7263 				 * See vm_map_wire_nested().
7264 				 *
7265 				 * Or we could just ignore "in_transition"
7266 				 * here and proceed to decement the wired
7267 				 * count(s) on this entry.  That should be fine
7268 				 * as long as "wired_count" doesn't drop all
7269 				 * the way to 0 (and we should panic if THAT
7270 				 * happens).
7271 				 */
7272 				panic("vm_map_unwire: in_transition entry");
7273 			}
7274 
7275 			entry = entry->vme_next;
7276 			continue;
7277 		}
7278 
7279 		if (entry->is_sub_map) {
7280 			vm_map_offset_t sub_start;
7281 			vm_map_offset_t sub_end;
7282 			vm_map_offset_t local_end;
7283 			pmap_t          pmap;
7284 
7285 			vm_map_clip_start(map, entry, start);
7286 			vm_map_clip_end(map, entry, end);
7287 
7288 			sub_start = VME_OFFSET(entry);
7289 			sub_end = entry->vme_end - entry->vme_start;
7290 			sub_end += VME_OFFSET(entry);
7291 			local_end = entry->vme_end;
7292 			if (map_pmap == NULL) {
7293 				if (entry->use_pmap) {
7294 					pmap = VME_SUBMAP(entry)->pmap;
7295 					pmap_addr = sub_start;
7296 				} else {
7297 					pmap = map->pmap;
7298 					pmap_addr = start;
7299 				}
7300 				if (entry->wired_count == 0 ||
7301 				    (user_wire && entry->user_wired_count == 0)) {
7302 					if (!user_wire) {
7303 						panic("vm_map_unwire: entry is unwired");
7304 					}
7305 					entry = entry->vme_next;
7306 					continue;
7307 				}
7308 
7309 				/*
7310 				 * Check for holes
7311 				 * Holes: Next entry should be contiguous unless
7312 				 * this is the end of the region.
7313 				 */
7314 				if (((entry->vme_end < end) &&
7315 				    ((entry->vme_next == vm_map_to_entry(map)) ||
7316 				    (entry->vme_next->vme_start
7317 				    > entry->vme_end)))) {
7318 					if (!user_wire) {
7319 						panic("vm_map_unwire: non-contiguous region");
7320 					}
7321 /*
7322  *                                       entry = entry->vme_next;
7323  *                                       continue;
7324  */
7325 				}
7326 
7327 				subtract_wire_counts(map, entry, user_wire);
7328 
7329 				if (entry->wired_count != 0) {
7330 					entry = entry->vme_next;
7331 					continue;
7332 				}
7333 
7334 				entry->in_transition = TRUE;
7335 				tmp_entry = *entry;/* see comment in vm_map_wire() */
7336 
7337 				/*
7338 				 * We can unlock the map now. The in_transition state
7339 				 * guarantees existance of the entry.
7340 				 */
7341 				vm_map_unlock(map);
7342 				vm_map_unwire_nested(VME_SUBMAP(entry),
7343 				    sub_start, sub_end, user_wire, pmap, pmap_addr);
7344 				vm_map_lock(map);
7345 
7346 				if (last_timestamp + 1 != map->timestamp) {
7347 					/*
7348 					 * Find the entry again.  It could have been
7349 					 * clipped or deleted after we unlocked the map.
7350 					 */
7351 					if (!vm_map_lookup_entry(map,
7352 					    tmp_entry.vme_start,
7353 					    &first_entry)) {
7354 						if (!user_wire) {
7355 							panic("vm_map_unwire: re-lookup failed");
7356 						}
7357 						entry = first_entry->vme_next;
7358 					} else {
7359 						entry = first_entry;
7360 					}
7361 				}
7362 				last_timestamp = map->timestamp;
7363 
7364 				/*
7365 				 * clear transition bit for all constituent entries
7366 				 * that were in the original entry (saved in
7367 				 * tmp_entry).  Also check for waiters.
7368 				 */
7369 				while ((entry != vm_map_to_entry(map)) &&
7370 				    (entry->vme_start < tmp_entry.vme_end)) {
7371 					assert(entry->in_transition);
7372 					entry->in_transition = FALSE;
7373 					if (entry->needs_wakeup) {
7374 						entry->needs_wakeup = FALSE;
7375 						need_wakeup = TRUE;
7376 					}
7377 					entry = entry->vme_next;
7378 				}
7379 				continue;
7380 			} else {
7381 				tmp_entry = *entry;
7382 				vm_map_unlock(map);
7383 				vm_map_unwire_nested(VME_SUBMAP(entry),
7384 				    sub_start, sub_end, user_wire, map_pmap,
7385 				    pmap_addr);
7386 				vm_map_lock(map);
7387 
7388 				if (last_timestamp + 1 != map->timestamp) {
7389 					/*
7390 					 * Find the entry again.  It could have been
7391 					 * clipped or deleted after we unlocked the map.
7392 					 */
7393 					if (!vm_map_lookup_entry(map,
7394 					    tmp_entry.vme_start,
7395 					    &first_entry)) {
7396 						if (!user_wire) {
7397 							panic("vm_map_unwire: re-lookup failed");
7398 						}
7399 						entry = first_entry->vme_next;
7400 					} else {
7401 						entry = first_entry;
7402 					}
7403 				}
7404 				last_timestamp = map->timestamp;
7405 			}
7406 		}
7407 
7408 
7409 		if ((entry->wired_count == 0) ||
7410 		    (user_wire && entry->user_wired_count == 0)) {
7411 			if (!user_wire) {
7412 				panic("vm_map_unwire: entry is unwired");
7413 			}
7414 
7415 			entry = entry->vme_next;
7416 			continue;
7417 		}
7418 
7419 		assert(entry->wired_count > 0 &&
7420 		    (!user_wire || entry->user_wired_count > 0));
7421 
7422 		vm_map_clip_start(map, entry, start);
7423 		vm_map_clip_end(map, entry, end);
7424 
7425 		/*
7426 		 * Check for holes
7427 		 * Holes: Next entry should be contiguous unless
7428 		 *	  this is the end of the region.
7429 		 */
7430 		if (((entry->vme_end < end) &&
7431 		    ((entry->vme_next == vm_map_to_entry(map)) ||
7432 		    (entry->vme_next->vme_start > entry->vme_end)))) {
7433 			if (!user_wire) {
7434 				panic("vm_map_unwire: non-contiguous region");
7435 			}
7436 			entry = entry->vme_next;
7437 			continue;
7438 		}
7439 
7440 		subtract_wire_counts(map, entry, user_wire);
7441 
7442 		if (entry->wired_count != 0) {
7443 			entry = entry->vme_next;
7444 			continue;
7445 		}
7446 
7447 		if (entry->zero_wired_pages) {
7448 			entry->zero_wired_pages = FALSE;
7449 		}
7450 
7451 		entry->in_transition = TRUE;
7452 		tmp_entry = *entry;     /* see comment in vm_map_wire() */
7453 
7454 		/*
7455 		 * We can unlock the map now. The in_transition state
7456 		 * guarantees existance of the entry.
7457 		 */
7458 		vm_map_unlock(map);
7459 		if (map_pmap) {
7460 			vm_fault_unwire(map,
7461 			    &tmp_entry, FALSE, map_pmap, pmap_addr);
7462 		} else {
7463 			vm_fault_unwire(map,
7464 			    &tmp_entry, FALSE, map->pmap,
7465 			    tmp_entry.vme_start);
7466 		}
7467 		vm_map_lock(map);
7468 
7469 		if (last_timestamp + 1 != map->timestamp) {
7470 			/*
7471 			 * Find the entry again.  It could have been clipped
7472 			 * or deleted after we unlocked the map.
7473 			 */
7474 			if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7475 			    &first_entry)) {
7476 				if (!user_wire) {
7477 					panic("vm_map_unwire: re-lookup failed");
7478 				}
7479 				entry = first_entry->vme_next;
7480 			} else {
7481 				entry = first_entry;
7482 			}
7483 		}
7484 		last_timestamp = map->timestamp;
7485 
7486 		/*
7487 		 * clear transition bit for all constituent entries that
7488 		 * were in the original entry (saved in tmp_entry).  Also
7489 		 * check for waiters.
7490 		 */
7491 		while ((entry != vm_map_to_entry(map)) &&
7492 		    (entry->vme_start < tmp_entry.vme_end)) {
7493 			assert(entry->in_transition);
7494 			entry->in_transition = FALSE;
7495 			if (entry->needs_wakeup) {
7496 				entry->needs_wakeup = FALSE;
7497 				need_wakeup = TRUE;
7498 			}
7499 			entry = entry->vme_next;
7500 		}
7501 	}
7502 
7503 	/*
7504 	 * We might have fragmented the address space when we wired this
7505 	 * range of addresses.  Attempt to re-coalesce these VM map entries
7506 	 * with their neighbors now that they're no longer wired.
7507 	 * Under some circumstances, address space fragmentation can
7508 	 * prevent VM object shadow chain collapsing, which can cause
7509 	 * swap space leaks.
7510 	 */
7511 	vm_map_simplify_range(map, start, end);
7512 
7513 	vm_map_unlock(map);
7514 	/*
7515 	 * wake up anybody waiting on entries that we have unwired.
7516 	 */
7517 	if (need_wakeup) {
7518 		vm_map_entry_wakeup(map);
7519 	}
7520 	return KERN_SUCCESS;
7521 }
7522 
7523 kern_return_t
vm_map_unwire(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,boolean_t user_wire)7524 vm_map_unwire(
7525 	vm_map_t                map,
7526 	vm_map_offset_t         start,
7527 	vm_map_offset_t         end,
7528 	boolean_t               user_wire)
7529 {
7530 	return vm_map_unwire_nested(map, start, end,
7531 	           user_wire, (pmap_t)NULL, 0);
7532 }
7533 
7534 
7535 /*
7536  *	vm_map_entry_delete:	[ internal use only ]
7537  *
7538  *	Deallocate the given entry from the target map.
7539  */
7540 static void
vm_map_entry_delete(vm_map_t map,vm_map_entry_t entry)7541 vm_map_entry_delete(
7542 	vm_map_t        map,
7543 	vm_map_entry_t  entry)
7544 {
7545 	vm_map_offset_t s, e;
7546 	vm_object_t     object;
7547 	vm_map_t        submap;
7548 
7549 	s = entry->vme_start;
7550 	e = entry->vme_end;
7551 	assert(VM_MAP_PAGE_ALIGNED(s, FOURK_PAGE_MASK));
7552 	assert(VM_MAP_PAGE_ALIGNED(e, FOURK_PAGE_MASK));
7553 	if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK) {
7554 		assert(page_aligned(s));
7555 		assert(page_aligned(e));
7556 	}
7557 	if (entry->map_aligned == TRUE) {
7558 		assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
7559 		assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
7560 	}
7561 	assert(entry->wired_count == 0);
7562 	assert(entry->user_wired_count == 0);
7563 	assert(!entry->permanent);
7564 
7565 	if (entry->is_sub_map) {
7566 		object = NULL;
7567 		submap = VME_SUBMAP(entry);
7568 	} else {
7569 		submap = NULL;
7570 		object = VME_OBJECT(entry);
7571 	}
7572 
7573 	vm_map_store_entry_unlink(map, entry);
7574 	map->size -= e - s;
7575 
7576 	vm_map_entry_dispose(map, entry);
7577 
7578 	vm_map_unlock(map);
7579 	/*
7580 	 *	Deallocate the object only after removing all
7581 	 *	pmap entries pointing to its pages.
7582 	 */
7583 	if (submap) {
7584 		vm_map_deallocate(submap);
7585 	} else {
7586 		vm_object_deallocate(object);
7587 	}
7588 }
7589 
7590 void
vm_map_submap_pmap_clean(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_map_t sub_map,vm_map_offset_t offset)7591 vm_map_submap_pmap_clean(
7592 	vm_map_t        map,
7593 	vm_map_offset_t start,
7594 	vm_map_offset_t end,
7595 	vm_map_t        sub_map,
7596 	vm_map_offset_t offset)
7597 {
7598 	vm_map_offset_t submap_start;
7599 	vm_map_offset_t submap_end;
7600 	vm_map_size_t   remove_size;
7601 	vm_map_entry_t  entry;
7602 
7603 	submap_end = offset + (end - start);
7604 	submap_start = offset;
7605 
7606 	vm_map_lock_read(sub_map);
7607 	if (vm_map_lookup_entry(sub_map, offset, &entry)) {
7608 		remove_size = (entry->vme_end - entry->vme_start);
7609 		if (offset > entry->vme_start) {
7610 			remove_size -= offset - entry->vme_start;
7611 		}
7612 
7613 
7614 		if (submap_end < entry->vme_end) {
7615 			remove_size -=
7616 			    entry->vme_end - submap_end;
7617 		}
7618 		if (entry->is_sub_map) {
7619 			vm_map_submap_pmap_clean(
7620 				sub_map,
7621 				start,
7622 				start + remove_size,
7623 				VME_SUBMAP(entry),
7624 				VME_OFFSET(entry));
7625 		} else {
7626 			if (map->mapped_in_other_pmaps &&
7627 			    os_ref_get_count_raw(&map->map_refcnt) != 0 &&
7628 			    VME_OBJECT(entry) != NULL) {
7629 				vm_object_pmap_protect_options(
7630 					VME_OBJECT(entry),
7631 					(VME_OFFSET(entry) +
7632 					offset -
7633 					entry->vme_start),
7634 					remove_size,
7635 					PMAP_NULL,
7636 					PAGE_SIZE,
7637 					entry->vme_start,
7638 					VM_PROT_NONE,
7639 					PMAP_OPTIONS_REMOVE);
7640 			} else {
7641 				pmap_remove(map->pmap,
7642 				    (addr64_t)start,
7643 				    (addr64_t)(start + remove_size));
7644 			}
7645 		}
7646 	}
7647 
7648 	entry = entry->vme_next;
7649 
7650 	while ((entry != vm_map_to_entry(sub_map))
7651 	    && (entry->vme_start < submap_end)) {
7652 		remove_size = (entry->vme_end - entry->vme_start);
7653 		if (submap_end < entry->vme_end) {
7654 			remove_size -= entry->vme_end - submap_end;
7655 		}
7656 		if (entry->is_sub_map) {
7657 			vm_map_submap_pmap_clean(
7658 				sub_map,
7659 				(start + entry->vme_start) - offset,
7660 				((start + entry->vme_start) - offset) + remove_size,
7661 				VME_SUBMAP(entry),
7662 				VME_OFFSET(entry));
7663 		} else {
7664 			if (map->mapped_in_other_pmaps &&
7665 			    os_ref_get_count_raw(&map->map_refcnt) != 0 &&
7666 			    VME_OBJECT(entry) != NULL) {
7667 				vm_object_pmap_protect_options(
7668 					VME_OBJECT(entry),
7669 					VME_OFFSET(entry),
7670 					remove_size,
7671 					PMAP_NULL,
7672 					PAGE_SIZE,
7673 					entry->vme_start,
7674 					VM_PROT_NONE,
7675 					PMAP_OPTIONS_REMOVE);
7676 			} else {
7677 				pmap_remove(map->pmap,
7678 				    (addr64_t)((start + entry->vme_start)
7679 				    - offset),
7680 				    (addr64_t)(((start + entry->vme_start)
7681 				    - offset) + remove_size));
7682 			}
7683 		}
7684 		entry = entry->vme_next;
7685 	}
7686 	vm_map_unlock_read(sub_map);
7687 	return;
7688 }
7689 
7690 /*
7691  *     virt_memory_guard_ast:
7692  *
7693  *     Handle the AST callout for a virtual memory guard.
7694  *	   raise an EXC_GUARD exception and terminate the task
7695  *     if configured to do so.
7696  */
7697 void
virt_memory_guard_ast(thread_t thread,mach_exception_data_type_t code,mach_exception_data_type_t subcode)7698 virt_memory_guard_ast(
7699 	thread_t thread,
7700 	mach_exception_data_type_t code,
7701 	mach_exception_data_type_t subcode)
7702 {
7703 	task_t task = get_threadtask(thread);
7704 	assert(task != kernel_task);
7705 	assert(task == current_task());
7706 	kern_return_t sync_exception_result;
7707 	uint32_t behavior;
7708 
7709 	behavior = task->task_exc_guard;
7710 
7711 	/* Is delivery enabled */
7712 	if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7713 		return;
7714 	}
7715 
7716 	/* If only once, make sure we're that once */
7717 	while (behavior & TASK_EXC_GUARD_VM_ONCE) {
7718 		uint32_t new_behavior = behavior & ~TASK_EXC_GUARD_VM_DELIVER;
7719 
7720 		if (OSCompareAndSwap(behavior, new_behavior, &task->task_exc_guard)) {
7721 			break;
7722 		}
7723 		behavior = task->task_exc_guard;
7724 		if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7725 			return;
7726 		}
7727 	}
7728 
7729 	/* Raise exception synchronously and see if handler claimed it */
7730 	sync_exception_result = task_exception_notify(EXC_GUARD, code, subcode);
7731 
7732 	if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7733 		/*
7734 		 * If Synchronous EXC_GUARD delivery was successful then
7735 		 * kill the process and return, else kill the process
7736 		 * and deliver the exception via EXC_CORPSE_NOTIFY.
7737 		 */
7738 		if (sync_exception_result == KERN_SUCCESS) {
7739 			task_bsdtask_kill(current_task());
7740 		} else {
7741 			exit_with_guard_exception(current_proc(), code, subcode);
7742 		}
7743 	} else if (task->task_exc_guard & TASK_EXC_GUARD_VM_CORPSE) {
7744 		/*
7745 		 * If the synchronous EXC_GUARD delivery was not successful,
7746 		 * raise a simulated crash.
7747 		 */
7748 		if (sync_exception_result != KERN_SUCCESS) {
7749 			task_violated_guard(code, subcode, NULL);
7750 		}
7751 	}
7752 }
7753 
7754 /*
7755  *     vm_map_guard_exception:
7756  *
7757  *     Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7758  *
7759  *     Right now, we do this when we find nothing mapped, or a
7760  *     gap in the mapping when a user address space deallocate
7761  *     was requested. We report the address of the first gap found.
7762  */
7763 static void
vm_map_guard_exception(vm_map_offset_t gap_start,unsigned reason)7764 vm_map_guard_exception(
7765 	vm_map_offset_t gap_start,
7766 	unsigned reason)
7767 {
7768 	mach_exception_code_t code = 0;
7769 	unsigned int guard_type = GUARD_TYPE_VIRT_MEMORY;
7770 	unsigned int target = 0; /* should we pass in pid associated with map? */
7771 	mach_exception_data_type_t subcode = (uint64_t)gap_start;
7772 	boolean_t fatal = FALSE;
7773 
7774 	task_t task = current_task();
7775 
7776 	/* Can't deliver exceptions to kernel task */
7777 	if (task == kernel_task) {
7778 		return;
7779 	}
7780 
7781 	EXC_GUARD_ENCODE_TYPE(code, guard_type);
7782 	EXC_GUARD_ENCODE_FLAVOR(code, reason);
7783 	EXC_GUARD_ENCODE_TARGET(code, target);
7784 
7785 	if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7786 		fatal = TRUE;
7787 	}
7788 	thread_guard_violation(current_thread(), code, subcode, fatal);
7789 }
7790 
7791 /*
7792  *	vm_map_delete:	[ internal use only ]
7793  *
7794  *	Deallocates the given address range from the target map.
7795  *	Removes all user wirings. Unwires one kernel wiring if
7796  *	VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
7797  *	away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
7798  *	interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7799  *
7800  *	This routine is called with map locked and leaves map locked.
7801  */
7802 static kern_return_t
vm_map_delete(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,int flags,vm_map_t zap_map)7803 vm_map_delete(
7804 	vm_map_t                map,
7805 	vm_map_offset_t         start,
7806 	vm_map_offset_t         end,
7807 	int                     flags,
7808 	vm_map_t                zap_map)
7809 {
7810 	vm_map_entry_t          entry, next;
7811 	struct   vm_map_entry   *first_entry, tmp_entry;
7812 	vm_map_offset_t         s;
7813 	vm_object_t             object;
7814 	boolean_t               need_wakeup;
7815 	unsigned int            last_timestamp = ~0; /* unlikely value */
7816 	int                     interruptible;
7817 	vm_map_offset_t         gap_start;
7818 	__unused vm_map_offset_t save_start = start;
7819 	__unused vm_map_offset_t save_end = end;
7820 	const vm_map_offset_t   FIND_GAP = 1;   /* a not page aligned value */
7821 	const vm_map_offset_t   GAPS_OK = 2;    /* a different not page aligned value */
7822 
7823 	if (map != kernel_map && !(flags & VM_MAP_REMOVE_GAPS_OK) && !map->terminated) {
7824 		gap_start = FIND_GAP;
7825 	} else {
7826 		gap_start = GAPS_OK;
7827 	}
7828 
7829 	interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
7830 	    THREAD_ABORTSAFE : THREAD_UNINT;
7831 
7832 	/*
7833 	 * All our DMA I/O operations in IOKit are currently done by
7834 	 * wiring through the map entries of the task requesting the I/O.
7835 	 * Because of this, we must always wait for kernel wirings
7836 	 * to go away on the entries before deleting them.
7837 	 *
7838 	 * Any caller who wants to actually remove a kernel wiring
7839 	 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7840 	 * properly remove one wiring instead of blasting through
7841 	 * them all.
7842 	 */
7843 	flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
7844 
7845 	while (1) {
7846 		/*
7847 		 *	Find the start of the region, and clip it
7848 		 */
7849 		if (vm_map_lookup_entry(map, start, &first_entry)) {
7850 			entry = first_entry;
7851 			if (kalloc_owned_map(map) &&
7852 			    (entry->vme_start != start ||
7853 			    entry->vme_end != end)) {
7854 				panic("vm_map_delete(%p,0x%llx,0x%llx): "
7855 				    "mismatched entry %p [0x%llx:0x%llx]\n",
7856 				    map,
7857 				    (uint64_t)start,
7858 				    (uint64_t)end,
7859 				    entry,
7860 				    (uint64_t)entry->vme_start,
7861 				    (uint64_t)entry->vme_end);
7862 			}
7863 
7864 			/*
7865 			 * If in a superpage, extend the range to include the start of the mapping.
7866 			 */
7867 			if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) {
7868 				start = SUPERPAGE_ROUND_DOWN(start);
7869 				continue;
7870 			}
7871 
7872 			if (start == entry->vme_start) {
7873 				/*
7874 				 * No need to clip.  We don't want to cause
7875 				 * any unnecessary unnesting in this case...
7876 				 */
7877 			} else {
7878 				if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7879 				    entry->map_aligned &&
7880 				    !VM_MAP_PAGE_ALIGNED(
7881 					    start,
7882 					    VM_MAP_PAGE_MASK(map))) {
7883 					/*
7884 					 * The entry will no longer be
7885 					 * map-aligned after clipping
7886 					 * and the caller said it's OK.
7887 					 */
7888 					entry->map_aligned = FALSE;
7889 				}
7890 				if (kalloc_owned_map(map)) {
7891 					panic("vm_map_delete(%p,0x%llx,0x%llx):"
7892 					    " clipping %p at 0x%llx\n",
7893 					    map,
7894 					    (uint64_t)start,
7895 					    (uint64_t)end,
7896 					    entry,
7897 					    (uint64_t)start);
7898 				}
7899 				vm_map_clip_start(map, entry, start);
7900 			}
7901 
7902 			/*
7903 			 *	Fix the lookup hint now, rather than each
7904 			 *	time through the loop.
7905 			 */
7906 			SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7907 		} else {
7908 			if (map->pmap == kernel_pmap &&
7909 			    os_ref_get_count_raw(&map->map_refcnt) != 0) {
7910 				panic("vm_map_delete(%p,0x%llx,0x%llx): "
7911 				    "no map entry at 0x%llx\n",
7912 				    map,
7913 				    (uint64_t)start,
7914 				    (uint64_t)end,
7915 				    (uint64_t)start);
7916 			}
7917 			entry = first_entry->vme_next;
7918 			if (gap_start == FIND_GAP) {
7919 				gap_start = start;
7920 			}
7921 		}
7922 		break;
7923 	}
7924 	if (entry->superpage_size) {
7925 		end = SUPERPAGE_ROUND_UP(end);
7926 	}
7927 
7928 	need_wakeup = FALSE;
7929 	/*
7930 	 *	Step through all entries in this region
7931 	 */
7932 	s = entry->vme_start;
7933 	while ((entry != vm_map_to_entry(map)) && (s < end)) {
7934 		/*
7935 		 * At this point, we have deleted all the memory entries
7936 		 * between "start" and "s".  We still need to delete
7937 		 * all memory entries between "s" and "end".
7938 		 * While we were blocked and the map was unlocked, some
7939 		 * new memory entries could have been re-allocated between
7940 		 * "start" and "s" and we don't want to mess with those.
7941 		 * Some of those entries could even have been re-assembled
7942 		 * with an entry after "s" (in vm_map_simplify_entry()), so
7943 		 * we may have to vm_map_clip_start() again.
7944 		 */
7945 
7946 		if (entry->vme_start >= s) {
7947 			/*
7948 			 * This entry starts on or after "s"
7949 			 * so no need to clip its start.
7950 			 */
7951 		} else {
7952 			/*
7953 			 * This entry has been re-assembled by a
7954 			 * vm_map_simplify_entry().  We need to
7955 			 * re-clip its start.
7956 			 */
7957 			if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7958 			    entry->map_aligned &&
7959 			    !VM_MAP_PAGE_ALIGNED(s,
7960 			    VM_MAP_PAGE_MASK(map))) {
7961 				/*
7962 				 * The entry will no longer be map-aligned
7963 				 * after clipping and the caller said it's OK.
7964 				 */
7965 				entry->map_aligned = FALSE;
7966 			}
7967 			if (kalloc_owned_map(map)) {
7968 				panic("vm_map_delete(%p,0x%llx,0x%llx): "
7969 				    "clipping %p at 0x%llx\n",
7970 				    map,
7971 				    (uint64_t)start,
7972 				    (uint64_t)end,
7973 				    entry,
7974 				    (uint64_t)s);
7975 			}
7976 			vm_map_clip_start(map, entry, s);
7977 		}
7978 		if (entry->vme_end <= end) {
7979 			/*
7980 			 * This entry is going away completely, so no need
7981 			 * to clip and possibly cause an unnecessary unnesting.
7982 			 */
7983 		} else {
7984 			if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7985 			    entry->map_aligned &&
7986 			    !VM_MAP_PAGE_ALIGNED(end,
7987 			    VM_MAP_PAGE_MASK(map))) {
7988 				/*
7989 				 * The entry will no longer be map-aligned
7990 				 * after clipping and the caller said it's OK.
7991 				 */
7992 				entry->map_aligned = FALSE;
7993 			}
7994 			if (kalloc_owned_map(map)) {
7995 				panic("vm_map_delete(%p,0x%llx,0x%llx): "
7996 				    "clipping %p at 0x%llx\n",
7997 				    map,
7998 				    (uint64_t)start,
7999 				    (uint64_t)end,
8000 				    entry,
8001 				    (uint64_t)end);
8002 			}
8003 			vm_map_clip_end(map, entry, end);
8004 		}
8005 
8006 		if (entry->permanent) {
8007 			if (map->pmap == kernel_pmap) {
8008 				panic("%s(%p,0x%llx,0x%llx): "
8009 				    "attempt to remove permanent "
8010 				    "VM map entry "
8011 				    "%p [0x%llx:0x%llx]\n",
8012 				    __FUNCTION__,
8013 				    map,
8014 				    (uint64_t) start,
8015 				    (uint64_t) end,
8016 				    entry,
8017 				    (uint64_t) entry->vme_start,
8018 				    (uint64_t) entry->vme_end);
8019 			} else if (flags & VM_MAP_REMOVE_IMMUTABLE) {
8020 //				printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
8021 				entry->permanent = FALSE;
8022 			} else {
8023 				if (vm_map_executable_immutable_verbose) {
8024 					printf("%d[%s] %s(0x%llx,0x%llx): "
8025 					    "permanent entry [0x%llx:0x%llx] "
8026 					    "prot 0x%x/0x%x\n",
8027 					    proc_selfpid(),
8028 					    (current_task()->bsd_info
8029 					    ? proc_name_address(current_task()->bsd_info)
8030 					    : "?"),
8031 					    __FUNCTION__,
8032 					    (uint64_t) start,
8033 					    (uint64_t) end,
8034 					    (uint64_t)entry->vme_start,
8035 					    (uint64_t)entry->vme_end,
8036 					    entry->protection,
8037 					    entry->max_protection);
8038 				}
8039 				/*
8040 				 * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
8041 				 */
8042 				DTRACE_VM5(vm_map_delete_permanent,
8043 				    vm_map_offset_t, entry->vme_start,
8044 				    vm_map_offset_t, entry->vme_end,
8045 				    vm_prot_t, entry->protection,
8046 				    vm_prot_t, entry->max_protection,
8047 				    int, VME_ALIAS(entry));
8048 			}
8049 		}
8050 
8051 
8052 		if (entry->in_transition) {
8053 			wait_result_t wait_result;
8054 
8055 			/*
8056 			 * Another thread is wiring/unwiring this entry.
8057 			 * Let the other thread know we are waiting.
8058 			 */
8059 			assert(s == entry->vme_start);
8060 			entry->needs_wakeup = TRUE;
8061 
8062 			/*
8063 			 * wake up anybody waiting on entries that we have
8064 			 * already unwired/deleted.
8065 			 */
8066 			if (need_wakeup) {
8067 				vm_map_entry_wakeup(map);
8068 				need_wakeup = FALSE;
8069 			}
8070 
8071 			wait_result = vm_map_entry_wait(map, interruptible);
8072 
8073 			if (interruptible &&
8074 			    wait_result == THREAD_INTERRUPTED) {
8075 				/*
8076 				 * We do not clear the needs_wakeup flag,
8077 				 * since we cannot tell if we were the only one.
8078 				 */
8079 				return KERN_ABORTED;
8080 			}
8081 
8082 			/*
8083 			 * The entry could have been clipped or it
8084 			 * may not exist anymore.  Look it up again.
8085 			 */
8086 			if (!vm_map_lookup_entry(map, s, &first_entry)) {
8087 				/*
8088 				 * User: use the next entry
8089 				 */
8090 				if (gap_start == FIND_GAP) {
8091 					gap_start = s;
8092 				}
8093 				entry = first_entry->vme_next;
8094 				s = entry->vme_start;
8095 			} else {
8096 				entry = first_entry;
8097 				SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8098 			}
8099 			last_timestamp = map->timestamp;
8100 			continue;
8101 		} /* end in_transition */
8102 
8103 		if (entry->wired_count) {
8104 			boolean_t       user_wire;
8105 
8106 			user_wire = entry->user_wired_count > 0;
8107 
8108 			/*
8109 			 *      Remove a kernel wiring if requested
8110 			 */
8111 			if (flags & VM_MAP_REMOVE_KUNWIRE) {
8112 				entry->wired_count--;
8113 			}
8114 
8115 			/*
8116 			 *	Remove all user wirings for proper accounting
8117 			 */
8118 			if (entry->user_wired_count > 0) {
8119 				while (entry->user_wired_count) {
8120 					subtract_wire_counts(map, entry, user_wire);
8121 				}
8122 			}
8123 
8124 			if (entry->wired_count != 0) {
8125 				assert(map != kernel_map);
8126 				/*
8127 				 * Cannot continue.  Typical case is when
8128 				 * a user thread has physical io pending on
8129 				 * on this page.  Either wait for the
8130 				 * kernel wiring to go away or return an
8131 				 * error.
8132 				 */
8133 				if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
8134 					wait_result_t wait_result;
8135 
8136 					assert(s == entry->vme_start);
8137 					entry->needs_wakeup = TRUE;
8138 					wait_result = vm_map_entry_wait(map,
8139 					    interruptible);
8140 
8141 					if (interruptible &&
8142 					    wait_result == THREAD_INTERRUPTED) {
8143 						/*
8144 						 * We do not clear the
8145 						 * needs_wakeup flag, since we
8146 						 * cannot tell if we were the
8147 						 * only one.
8148 						 */
8149 						return KERN_ABORTED;
8150 					}
8151 
8152 					/*
8153 					 * The entry could have been clipped or
8154 					 * it may not exist anymore.  Look it
8155 					 * up again.
8156 					 */
8157 					if (!vm_map_lookup_entry(map, s,
8158 					    &first_entry)) {
8159 						assert(map != kernel_map);
8160 						/*
8161 						 * User: use the next entry
8162 						 */
8163 						if (gap_start == FIND_GAP) {
8164 							gap_start = s;
8165 						}
8166 						entry = first_entry->vme_next;
8167 						s = entry->vme_start;
8168 					} else {
8169 						entry = first_entry;
8170 						SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8171 					}
8172 					last_timestamp = map->timestamp;
8173 					continue;
8174 				} else {
8175 					return KERN_FAILURE;
8176 				}
8177 			}
8178 
8179 			entry->in_transition = TRUE;
8180 			/*
8181 			 * copy current entry.  see comment in vm_map_wire()
8182 			 */
8183 			tmp_entry = *entry;
8184 			assert(s == entry->vme_start);
8185 
8186 			/*
8187 			 * We can unlock the map now. The in_transition
8188 			 * state guarentees existance of the entry.
8189 			 */
8190 			vm_map_unlock(map);
8191 
8192 			if (tmp_entry.is_sub_map) {
8193 				vm_map_t sub_map;
8194 				vm_map_offset_t sub_start, sub_end;
8195 				pmap_t pmap;
8196 				vm_map_offset_t pmap_addr;
8197 
8198 
8199 				sub_map = VME_SUBMAP(&tmp_entry);
8200 				sub_start = VME_OFFSET(&tmp_entry);
8201 				sub_end = sub_start + (tmp_entry.vme_end -
8202 				    tmp_entry.vme_start);
8203 				if (tmp_entry.use_pmap) {
8204 					pmap = sub_map->pmap;
8205 					pmap_addr = tmp_entry.vme_start;
8206 				} else {
8207 					pmap = map->pmap;
8208 					pmap_addr = tmp_entry.vme_start;
8209 				}
8210 				(void) vm_map_unwire_nested(sub_map,
8211 				    sub_start, sub_end,
8212 				    user_wire,
8213 				    pmap, pmap_addr);
8214 			} else {
8215 				if (VME_OBJECT(&tmp_entry) == kernel_object) {
8216 					pmap_protect_options(
8217 						map->pmap,
8218 						tmp_entry.vme_start,
8219 						tmp_entry.vme_end,
8220 						VM_PROT_NONE,
8221 						PMAP_OPTIONS_REMOVE,
8222 						NULL);
8223 				}
8224 				vm_fault_unwire(map, &tmp_entry,
8225 				    VME_OBJECT(&tmp_entry) == kernel_object,
8226 				    map->pmap, tmp_entry.vme_start);
8227 			}
8228 
8229 			vm_map_lock(map);
8230 
8231 			if (last_timestamp + 1 != map->timestamp) {
8232 				/*
8233 				 * Find the entry again.  It could have
8234 				 * been clipped after we unlocked the map.
8235 				 */
8236 				if (!vm_map_lookup_entry(map, s, &first_entry)) {
8237 					assert((map != kernel_map) &&
8238 					    (!entry->is_sub_map));
8239 					if (gap_start == FIND_GAP) {
8240 						gap_start = s;
8241 					}
8242 					first_entry = first_entry->vme_next;
8243 					s = first_entry->vme_start;
8244 				} else {
8245 					SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8246 				}
8247 			} else {
8248 				SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8249 				first_entry = entry;
8250 			}
8251 
8252 			last_timestamp = map->timestamp;
8253 
8254 			entry = first_entry;
8255 			while ((entry != vm_map_to_entry(map)) &&
8256 			    (entry->vme_start < tmp_entry.vme_end)) {
8257 				assert(entry->in_transition);
8258 				entry->in_transition = FALSE;
8259 				if (entry->needs_wakeup) {
8260 					entry->needs_wakeup = FALSE;
8261 					need_wakeup = TRUE;
8262 				}
8263 				entry = entry->vme_next;
8264 			}
8265 			/*
8266 			 * We have unwired the entry(s).  Go back and
8267 			 * delete them.
8268 			 */
8269 			entry = first_entry;
8270 			continue;
8271 		}
8272 
8273 		/* entry is unwired */
8274 		assert(entry->wired_count == 0);
8275 		assert(entry->user_wired_count == 0);
8276 
8277 		assert(s == entry->vme_start);
8278 
8279 		if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
8280 			/*
8281 			 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
8282 			 * vm_map_delete(), some map entries might have been
8283 			 * transferred to a "zap_map", which doesn't have a
8284 			 * pmap.  The original pmap has already been flushed
8285 			 * in the vm_map_delete() call targeting the original
8286 			 * map, but when we get to destroying the "zap_map",
8287 			 * we don't have any pmap to flush, so let's just skip
8288 			 * all this.
8289 			 */
8290 		} else if (entry->is_sub_map) {
8291 			assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) >= VM_MAP_PAGE_SHIFT(map),
8292 			    "map %p (%d) entry %p submap %p (%d)\n",
8293 			    map, VM_MAP_PAGE_SHIFT(map), entry,
8294 			    VME_SUBMAP(entry),
8295 			    VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
8296 			if (entry->use_pmap) {
8297 				assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) == VM_MAP_PAGE_SHIFT(map),
8298 				    "map %p (%d) entry %p submap %p (%d)\n",
8299 				    map, VM_MAP_PAGE_SHIFT(map), entry,
8300 				    VME_SUBMAP(entry),
8301 				    VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
8302 #ifndef NO_NESTED_PMAP
8303 				int pmap_flags;
8304 
8305 				if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
8306 					/*
8307 					 * This is the final cleanup of the
8308 					 * address space being terminated.
8309 					 * No new mappings are expected and
8310 					 * we don't really need to unnest the
8311 					 * shared region (and lose the "global"
8312 					 * pmap mappings, if applicable).
8313 					 *
8314 					 * Tell the pmap layer that we're
8315 					 * "clean" wrt nesting.
8316 					 */
8317 					pmap_flags = PMAP_UNNEST_CLEAN;
8318 				} else {
8319 					/*
8320 					 * We're unmapping part of the nested
8321 					 * shared region, so we can't keep the
8322 					 * nested pmap.
8323 					 */
8324 					pmap_flags = 0;
8325 				}
8326 				pmap_unnest_options(
8327 					map->pmap,
8328 					(addr64_t)entry->vme_start,
8329 					entry->vme_end - entry->vme_start,
8330 					pmap_flags);
8331 #endif  /* NO_NESTED_PMAP */
8332 				if (map->mapped_in_other_pmaps &&
8333 				    os_ref_get_count_raw(&map->map_refcnt) != 0) {
8334 					/* clean up parent map/maps */
8335 					vm_map_submap_pmap_clean(
8336 						map, entry->vme_start,
8337 						entry->vme_end,
8338 						VME_SUBMAP(entry),
8339 						VME_OFFSET(entry));
8340 				}
8341 			} else {
8342 				vm_map_submap_pmap_clean(
8343 					map, entry->vme_start, entry->vme_end,
8344 					VME_SUBMAP(entry),
8345 					VME_OFFSET(entry));
8346 			}
8347 		} else if (VME_OBJECT(entry) != kernel_object &&
8348 		    VME_OBJECT(entry) != compressor_object) {
8349 			object = VME_OBJECT(entry);
8350 			if (map->mapped_in_other_pmaps &&
8351 			    os_ref_get_count_raw(&map->map_refcnt) != 0) {
8352 				vm_object_pmap_protect_options(
8353 					object, VME_OFFSET(entry),
8354 					entry->vme_end - entry->vme_start,
8355 					PMAP_NULL,
8356 					PAGE_SIZE,
8357 					entry->vme_start,
8358 					VM_PROT_NONE,
8359 					PMAP_OPTIONS_REMOVE);
8360 			} else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
8361 			    (map->pmap == kernel_pmap)) {
8362 				/* Remove translations associated
8363 				 * with this range unless the entry
8364 				 * does not have an object, or
8365 				 * it's the kernel map or a descendant
8366 				 * since the platform could potentially
8367 				 * create "backdoor" mappings invisible
8368 				 * to the VM. It is expected that
8369 				 * objectless, non-kernel ranges
8370 				 * do not have such VM invisible
8371 				 * translations.
8372 				 */
8373 				pmap_remove_options(map->pmap,
8374 				    (addr64_t)entry->vme_start,
8375 				    (addr64_t)entry->vme_end,
8376 				    PMAP_OPTIONS_REMOVE);
8377 			}
8378 		}
8379 
8380 		if (entry->iokit_acct) {
8381 			/* alternate accounting */
8382 			DTRACE_VM4(vm_map_iokit_unmapped_region,
8383 			    vm_map_t, map,
8384 			    vm_map_offset_t, entry->vme_start,
8385 			    vm_map_offset_t, entry->vme_end,
8386 			    int, VME_ALIAS(entry));
8387 			vm_map_iokit_unmapped_region(map,
8388 			    (entry->vme_end -
8389 			    entry->vme_start));
8390 			entry->iokit_acct = FALSE;
8391 			entry->use_pmap = FALSE;
8392 		}
8393 
8394 		/*
8395 		 * All pmap mappings for this map entry must have been
8396 		 * cleared by now.
8397 		 */
8398 #if DEBUG
8399 		assert(pmap_is_empty(map->pmap,
8400 		    entry->vme_start,
8401 		    entry->vme_end));
8402 #endif /* DEBUG */
8403 
8404 		next = entry->vme_next;
8405 
8406 		if (map->pmap == kernel_pmap &&
8407 		    os_ref_get_count_raw(&map->map_refcnt) != 0) {
8408 			if (entry->vme_end < end && (next == vm_map_to_entry(map) || next->vme_start != entry->vme_end)) {
8409 				panic("vm_map_delete(%p,0x%llx,0x%llx): "
8410 				    "hole after %p at 0x%llx\n",
8411 				    map,
8412 				    (uint64_t)start,
8413 				    (uint64_t)end,
8414 				    entry,
8415 				    (uint64_t)entry->vme_end);
8416 			}
8417 
8418 			if (entry->vme_atomic && (entry->vme_start != start || entry->vme_end != end)) {
8419 				/*
8420 				 * In the kernel map and its submaps, the removal of
8421 				 * an atomic entry is strict. An atomic entry is
8422 				 * processed only if it was specifically targeted. We
8423 				 * might have deleted non-atomic entries before it but
8424 				 * we won't remove this atomic entry OR anything after it.
8425 				 */
8426 #if DEVELOPMENT || DEBUG
8427 				panic("vm_map_delete(%p,0x%llx,0x%llx): "
8428 				    "request loosely encompasses atomic entry %p at (0x%llx,0x%llx)\n",
8429 				    map,
8430 				    (uint64_t)start,
8431 				    (uint64_t)end,
8432 				    entry,
8433 				    (uint64_t)entry->vme_start,
8434 				    (uint64_t)entry->vme_end);
8435 #endif /* DEVELOPMENT || DEBUG */
8436 
8437 				break;
8438 			}
8439 		}
8440 
8441 		/*
8442 		 * If the desired range didn't end with "entry", then there is a gap if
8443 		 * we wrapped around to the start of the map or if "entry" and "next"
8444 		 * aren't contiguous.
8445 		 *
8446 		 * The vm_map_round_page() is needed since an entry can be less than VM_MAP_PAGE_MASK() sized.
8447 		 * For example, devices which have h/w 4K pages, but entry sizes are all now 16K.
8448 		 */
8449 		if (gap_start == FIND_GAP &&
8450 		    vm_map_round_page(entry->vme_end, VM_MAP_PAGE_MASK(map)) < end &&
8451 		    (next == vm_map_to_entry(map) || entry->vme_end != next->vme_start)) {
8452 			gap_start = entry->vme_end;
8453 		}
8454 		s = next->vme_start;
8455 		last_timestamp = map->timestamp;
8456 
8457 		if (entry->permanent) {
8458 			/*
8459 			 * A permanent entry can not be removed, so leave it
8460 			 * in place but remove all access permissions.
8461 			 */
8462 			entry->protection = VM_PROT_NONE;
8463 			entry->max_protection = VM_PROT_NONE;
8464 		} else if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
8465 		    zap_map != VM_MAP_NULL) {
8466 			vm_map_size_t entry_size;
8467 			/*
8468 			 * The caller wants to save the affected VM map entries
8469 			 * into the "zap_map".  The caller will take care of
8470 			 * these entries.
8471 			 */
8472 			/* unlink the entry from "map" ... */
8473 			vm_map_store_entry_unlink(map, entry);
8474 			/* ... and add it to the end of the "zap_map" */
8475 			vm_map_store_entry_link(zap_map,
8476 			    vm_map_last_entry(zap_map),
8477 			    entry,
8478 			    VM_MAP_KERNEL_FLAGS_NONE);
8479 			entry_size = entry->vme_end - entry->vme_start;
8480 			map->size -= entry_size;
8481 			zap_map->size += entry_size;
8482 			/* we didn't unlock the map, so no timestamp increase */
8483 			last_timestamp--;
8484 		} else {
8485 			vm_map_entry_delete(map, entry);
8486 			/* vm_map_entry_delete unlocks the map */
8487 			vm_map_lock(map);
8488 		}
8489 
8490 		entry = next;
8491 
8492 		if (entry == vm_map_to_entry(map)) {
8493 			break;
8494 		}
8495 		if (last_timestamp + 1 != map->timestamp) {
8496 			/*
8497 			 * We are responsible for deleting everything
8498 			 * from the given space. If someone has interfered,
8499 			 * we pick up where we left off. Back fills should
8500 			 * be all right for anyone, except map_delete, and
8501 			 * we have to assume that the task has been fully
8502 			 * disabled before we get here
8503 			 */
8504 			if (!vm_map_lookup_entry(map, s, &entry)) {
8505 				entry = entry->vme_next;
8506 
8507 				/*
8508 				 * Nothing found for s. If we weren't already done, then there is a gap.
8509 				 */
8510 				if (gap_start == FIND_GAP && s < end) {
8511 					gap_start = s;
8512 				}
8513 				s = entry->vme_start;
8514 			} else {
8515 				SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8516 			}
8517 			/*
8518 			 * others can not only allocate behind us, we can
8519 			 * also see coalesce while we don't have the map lock
8520 			 */
8521 			if (entry == vm_map_to_entry(map)) {
8522 				break;
8523 			}
8524 		}
8525 		last_timestamp = map->timestamp;
8526 	}
8527 
8528 	if (map->wait_for_space) {
8529 		thread_wakeup((event_t) map);
8530 	}
8531 	/*
8532 	 * wake up anybody waiting on entries that we have already deleted.
8533 	 */
8534 	if (need_wakeup) {
8535 		vm_map_entry_wakeup(map);
8536 	}
8537 
8538 	if (gap_start != FIND_GAP && gap_start != GAPS_OK) {
8539 		DTRACE_VM3(kern_vm_deallocate_gap,
8540 		    vm_map_offset_t, gap_start,
8541 		    vm_map_offset_t, save_start,
8542 		    vm_map_offset_t, save_end);
8543 		if (!(flags & VM_MAP_REMOVE_GAPS_OK)) {
8544 			vm_map_guard_exception(gap_start, kGUARD_EXC_DEALLOC_GAP);
8545 		}
8546 	}
8547 
8548 	return KERN_SUCCESS;
8549 }
8550 
8551 
8552 /*
8553  *	vm_map_terminate:
8554  *
8555  *	Clean out a task's map.
8556  */
8557 kern_return_t
vm_map_terminate(vm_map_t map)8558 vm_map_terminate(
8559 	vm_map_t        map)
8560 {
8561 	vm_map_lock(map);
8562 	map->terminated = TRUE;
8563 	vm_map_unlock(map);
8564 
8565 	return vm_map_remove(map,
8566 	           map->min_offset,
8567 	           map->max_offset,
8568 	           /*
8569 	            * Final cleanup:
8570 	            * + no unnesting
8571 	            * + remove immutable mappings
8572 	            * + allow gaps in range
8573 	            */
8574 	           (VM_MAP_REMOVE_NO_UNNESTING |
8575 	           VM_MAP_REMOVE_IMMUTABLE |
8576 	           VM_MAP_REMOVE_GAPS_OK));
8577 }
8578 
8579 /*
8580  *	vm_map_remove:
8581  *
8582  *	Remove the given address range from the target map.
8583  *	This is the exported form of vm_map_delete.
8584  */
8585 kern_return_t
vm_map_remove(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,boolean_t flags)8586 vm_map_remove(
8587 	vm_map_t        map,
8588 	vm_map_offset_t start,
8589 	vm_map_offset_t end,
8590 	boolean_t      flags)
8591 {
8592 	kern_return_t   result;
8593 
8594 	vm_map_lock(map);
8595 	VM_MAP_RANGE_CHECK(map, start, end);
8596 	/*
8597 	 * For the zone maps, the kernel controls the allocation/freeing of memory.
8598 	 * Any free to the zone maps should be within the bounds of the map and
8599 	 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
8600 	 * free to the zone maps into a no-op, there is a problem and we should
8601 	 * panic.
8602 	 */
8603 	if ((start == end) && zone_maps_owned(start, 1)) {
8604 		panic("Nothing being freed to a zone map. start = end = %p", (void *)start);
8605 	}
8606 	result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8607 	vm_map_unlock(map);
8608 
8609 	return result;
8610 }
8611 
8612 /*
8613  *	vm_map_remove_locked:
8614  *
8615  *	Remove the given address range from the target locked map.
8616  *	This is the exported form of vm_map_delete.
8617  */
8618 kern_return_t
vm_map_remove_locked(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,boolean_t flags)8619 vm_map_remove_locked(
8620 	vm_map_t        map,
8621 	vm_map_offset_t start,
8622 	vm_map_offset_t end,
8623 	boolean_t       flags)
8624 {
8625 	kern_return_t   result;
8626 
8627 	VM_MAP_RANGE_CHECK(map, start, end);
8628 	result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8629 	return result;
8630 }
8631 
8632 
8633 /*
8634  *	Routine:	vm_map_copy_allocate
8635  *
8636  *	Description:
8637  *		Allocates and initializes a map copy object.
8638  */
8639 static vm_map_copy_t
vm_map_copy_allocate(void)8640 vm_map_copy_allocate(void)
8641 {
8642 	vm_map_copy_t new_copy;
8643 
8644 	new_copy = zalloc_flags(vm_map_copy_zone, Z_WAITOK | Z_ZERO);
8645 	new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8646 	vm_map_copy_first_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8647 	vm_map_copy_last_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8648 	return new_copy;
8649 }
8650 
8651 /*
8652  *	Routine:	vm_map_copy_discard
8653  *
8654  *	Description:
8655  *		Dispose of a map copy object (returned by
8656  *		vm_map_copyin).
8657  */
8658 void
vm_map_copy_discard(vm_map_copy_t copy)8659 vm_map_copy_discard(
8660 	vm_map_copy_t   copy)
8661 {
8662 	if (copy == VM_MAP_COPY_NULL) {
8663 		return;
8664 	}
8665 
8666 	switch (copy->type) {
8667 	case VM_MAP_COPY_ENTRY_LIST:
8668 		while (vm_map_copy_first_entry(copy) !=
8669 		    vm_map_copy_to_entry(copy)) {
8670 			vm_map_entry_t  entry = vm_map_copy_first_entry(copy);
8671 
8672 			vm_map_copy_entry_unlink(copy, entry);
8673 			if (entry->is_sub_map) {
8674 				vm_map_deallocate(VME_SUBMAP(entry));
8675 			} else {
8676 				vm_object_deallocate(VME_OBJECT(entry));
8677 			}
8678 			vm_map_copy_entry_dispose(copy, entry);
8679 		}
8680 		break;
8681 	case VM_MAP_COPY_OBJECT:
8682 		vm_object_deallocate(copy->cpy_object);
8683 		break;
8684 	case VM_MAP_COPY_KERNEL_BUFFER:
8685 
8686 		/*
8687 		 * The vm_map_copy_t and possibly the data buffer were
8688 		 * allocated by a single call to kalloc_data(), i.e. the
8689 		 * vm_map_copy_t was not allocated out of the zone.
8690 		 */
8691 		if (copy->size > msg_ool_size_small || copy->offset) {
8692 			panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8693 			    (long long)copy->size, (long long)copy->offset);
8694 		}
8695 		kfree_data(copy->cpy_kdata, copy->size);
8696 	}
8697 	zfree(vm_map_copy_zone, copy);
8698 }
8699 
8700 /*
8701  *	Routine:	vm_map_copy_copy
8702  *
8703  *	Description:
8704  *			Move the information in a map copy object to
8705  *			a new map copy object, leaving the old one
8706  *			empty.
8707  *
8708  *			This is used by kernel routines that need
8709  *			to look at out-of-line data (in copyin form)
8710  *			before deciding whether to return SUCCESS.
8711  *			If the routine returns FAILURE, the original
8712  *			copy object will be deallocated; therefore,
8713  *			these routines must make a copy of the copy
8714  *			object and leave the original empty so that
8715  *			deallocation will not fail.
8716  */
8717 vm_map_copy_t
vm_map_copy_copy(vm_map_copy_t copy)8718 vm_map_copy_copy(
8719 	vm_map_copy_t   copy)
8720 {
8721 	vm_map_copy_t   new_copy;
8722 
8723 	if (copy == VM_MAP_COPY_NULL) {
8724 		return VM_MAP_COPY_NULL;
8725 	}
8726 
8727 	/*
8728 	 * Allocate a new copy object, and copy the information
8729 	 * from the old one into it.
8730 	 */
8731 
8732 	new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8733 	memcpy((void *) new_copy, (void *) copy, sizeof(struct vm_map_copy));
8734 #if __has_feature(ptrauth_calls)
8735 	if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8736 		new_copy->cpy_kdata = copy->cpy_kdata;
8737 	}
8738 #endif
8739 
8740 	if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
8741 		/*
8742 		 * The links in the entry chain must be
8743 		 * changed to point to the new copy object.
8744 		 */
8745 		vm_map_copy_first_entry(copy)->vme_prev
8746 		        = vm_map_copy_to_entry(new_copy);
8747 		vm_map_copy_last_entry(copy)->vme_next
8748 		        = vm_map_copy_to_entry(new_copy);
8749 	}
8750 
8751 	/*
8752 	 * Change the old copy object into one that contains
8753 	 * nothing to be deallocated.
8754 	 */
8755 	copy->type = VM_MAP_COPY_OBJECT;
8756 	copy->cpy_object = VM_OBJECT_NULL;
8757 
8758 	/*
8759 	 * Return the new object.
8760 	 */
8761 	return new_copy;
8762 }
8763 
8764 static boolean_t
vm_map_entry_is_overwritable(vm_map_t dst_map __unused,vm_map_entry_t entry)8765 vm_map_entry_is_overwritable(
8766 	vm_map_t        dst_map __unused,
8767 	vm_map_entry_t  entry)
8768 {
8769 	if (!(entry->protection & VM_PROT_WRITE)) {
8770 		/* can't overwrite if not writable */
8771 		return FALSE;
8772 	}
8773 #if !__x86_64__
8774 	if (entry->used_for_jit &&
8775 	    vm_map_cs_enforcement(dst_map) &&
8776 	    !dst_map->cs_debugged) {
8777 		/*
8778 		 * Can't overwrite a JIT region while cs_enforced
8779 		 * and not cs_debugged.
8780 		 */
8781 		return FALSE;
8782 	}
8783 #endif /* !__x86_64__ */
8784 	return TRUE;
8785 }
8786 
8787 static kern_return_t
vm_map_overwrite_submap_recurse(vm_map_t dst_map,vm_map_offset_t dst_addr,vm_map_size_t dst_size)8788 vm_map_overwrite_submap_recurse(
8789 	vm_map_t        dst_map,
8790 	vm_map_offset_t dst_addr,
8791 	vm_map_size_t   dst_size)
8792 {
8793 	vm_map_offset_t dst_end;
8794 	vm_map_entry_t  tmp_entry;
8795 	vm_map_entry_t  entry;
8796 	kern_return_t   result;
8797 	boolean_t       encountered_sub_map = FALSE;
8798 
8799 
8800 
8801 	/*
8802 	 *	Verify that the destination is all writeable
8803 	 *	initially.  We have to trunc the destination
8804 	 *	address and round the copy size or we'll end up
8805 	 *	splitting entries in strange ways.
8806 	 */
8807 
8808 	dst_end = vm_map_round_page(dst_addr + dst_size,
8809 	    VM_MAP_PAGE_MASK(dst_map));
8810 	vm_map_lock(dst_map);
8811 
8812 start_pass_1:
8813 	if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8814 		vm_map_unlock(dst_map);
8815 		return KERN_INVALID_ADDRESS;
8816 	}
8817 
8818 	vm_map_clip_start(dst_map,
8819 	    tmp_entry,
8820 	    vm_map_trunc_page(dst_addr,
8821 	    VM_MAP_PAGE_MASK(dst_map)));
8822 	if (tmp_entry->is_sub_map) {
8823 		/* clipping did unnest if needed */
8824 		assert(!tmp_entry->use_pmap);
8825 	}
8826 
8827 	for (entry = tmp_entry;;) {
8828 		vm_map_entry_t  next;
8829 
8830 		next = entry->vme_next;
8831 		while (entry->is_sub_map) {
8832 			vm_map_offset_t sub_start;
8833 			vm_map_offset_t sub_end;
8834 			vm_map_offset_t local_end;
8835 
8836 			if (entry->in_transition) {
8837 				/*
8838 				 * Say that we are waiting, and wait for entry.
8839 				 */
8840 				entry->needs_wakeup = TRUE;
8841 				vm_map_entry_wait(dst_map, THREAD_UNINT);
8842 
8843 				goto start_pass_1;
8844 			}
8845 
8846 			encountered_sub_map = TRUE;
8847 			sub_start = VME_OFFSET(entry);
8848 
8849 			if (entry->vme_end < dst_end) {
8850 				sub_end = entry->vme_end;
8851 			} else {
8852 				sub_end = dst_end;
8853 			}
8854 			sub_end -= entry->vme_start;
8855 			sub_end += VME_OFFSET(entry);
8856 			local_end = entry->vme_end;
8857 			vm_map_unlock(dst_map);
8858 
8859 			result = vm_map_overwrite_submap_recurse(
8860 				VME_SUBMAP(entry),
8861 				sub_start,
8862 				sub_end - sub_start);
8863 
8864 			if (result != KERN_SUCCESS) {
8865 				return result;
8866 			}
8867 			if (dst_end <= entry->vme_end) {
8868 				return KERN_SUCCESS;
8869 			}
8870 			vm_map_lock(dst_map);
8871 			if (!vm_map_lookup_entry(dst_map, local_end,
8872 			    &tmp_entry)) {
8873 				vm_map_unlock(dst_map);
8874 				return KERN_INVALID_ADDRESS;
8875 			}
8876 			entry = tmp_entry;
8877 			next = entry->vme_next;
8878 		}
8879 
8880 		if (!(entry->protection & VM_PROT_WRITE)) {
8881 			vm_map_unlock(dst_map);
8882 			return KERN_PROTECTION_FAILURE;
8883 		}
8884 
8885 		if (!vm_map_entry_is_overwritable(dst_map, entry)) {
8886 			vm_map_unlock(dst_map);
8887 			return KERN_PROTECTION_FAILURE;
8888 		}
8889 
8890 		/*
8891 		 *	If the entry is in transition, we must wait
8892 		 *	for it to exit that state.  Anything could happen
8893 		 *	when we unlock the map, so start over.
8894 		 */
8895 		if (entry->in_transition) {
8896 			/*
8897 			 * Say that we are waiting, and wait for entry.
8898 			 */
8899 			entry->needs_wakeup = TRUE;
8900 			vm_map_entry_wait(dst_map, THREAD_UNINT);
8901 
8902 			goto start_pass_1;
8903 		}
8904 
8905 /*
8906  *		our range is contained completely within this map entry
8907  */
8908 		if (dst_end <= entry->vme_end) {
8909 			vm_map_unlock(dst_map);
8910 			return KERN_SUCCESS;
8911 		}
8912 /*
8913  *		check that range specified is contiguous region
8914  */
8915 		if ((next == vm_map_to_entry(dst_map)) ||
8916 		    (next->vme_start != entry->vme_end)) {
8917 			vm_map_unlock(dst_map);
8918 			return KERN_INVALID_ADDRESS;
8919 		}
8920 
8921 		/*
8922 		 *	Check for permanent objects in the destination.
8923 		 */
8924 		if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8925 		    ((!VME_OBJECT(entry)->internal) ||
8926 		    (VME_OBJECT(entry)->true_share))) {
8927 			if (encountered_sub_map) {
8928 				vm_map_unlock(dst_map);
8929 				return KERN_FAILURE;
8930 			}
8931 		}
8932 
8933 
8934 		entry = next;
8935 	}/* for */
8936 	vm_map_unlock(dst_map);
8937 	return KERN_SUCCESS;
8938 }
8939 
8940 /*
8941  *	Routine:	vm_map_copy_overwrite
8942  *
8943  *	Description:
8944  *		Copy the memory described by the map copy
8945  *		object (copy; returned by vm_map_copyin) onto
8946  *		the specified destination region (dst_map, dst_addr).
8947  *		The destination must be writeable.
8948  *
8949  *		Unlike vm_map_copyout, this routine actually
8950  *		writes over previously-mapped memory.  If the
8951  *		previous mapping was to a permanent (user-supplied)
8952  *		memory object, it is preserved.
8953  *
8954  *		The attributes (protection and inheritance) of the
8955  *		destination region are preserved.
8956  *
8957  *		If successful, consumes the copy object.
8958  *		Otherwise, the caller is responsible for it.
8959  *
8960  *	Implementation notes:
8961  *		To overwrite aligned temporary virtual memory, it is
8962  *		sufficient to remove the previous mapping and insert
8963  *		the new copy.  This replacement is done either on
8964  *		the whole region (if no permanent virtual memory
8965  *		objects are embedded in the destination region) or
8966  *		in individual map entries.
8967  *
8968  *		To overwrite permanent virtual memory , it is necessary
8969  *		to copy each page, as the external memory management
8970  *		interface currently does not provide any optimizations.
8971  *
8972  *		Unaligned memory also has to be copied.  It is possible
8973  *		to use 'vm_trickery' to copy the aligned data.  This is
8974  *		not done but not hard to implement.
8975  *
8976  *		Once a page of permanent memory has been overwritten,
8977  *		it is impossible to interrupt this function; otherwise,
8978  *		the call would be neither atomic nor location-independent.
8979  *		The kernel-state portion of a user thread must be
8980  *		interruptible.
8981  *
8982  *		It may be expensive to forward all requests that might
8983  *		overwrite permanent memory (vm_write, vm_copy) to
8984  *		uninterruptible kernel threads.  This routine may be
8985  *		called by interruptible threads; however, success is
8986  *		not guaranteed -- if the request cannot be performed
8987  *		atomically and interruptibly, an error indication is
8988  *		returned.
8989  */
8990 
8991 static kern_return_t
vm_map_copy_overwrite_nested(vm_map_t dst_map,vm_map_address_t dst_addr,vm_map_copy_t copy,boolean_t interruptible,pmap_t pmap,boolean_t discard_on_success)8992 vm_map_copy_overwrite_nested(
8993 	vm_map_t                dst_map,
8994 	vm_map_address_t        dst_addr,
8995 	vm_map_copy_t           copy,
8996 	boolean_t               interruptible,
8997 	pmap_t                  pmap,
8998 	boolean_t               discard_on_success)
8999 {
9000 	vm_map_offset_t         dst_end;
9001 	vm_map_entry_t          tmp_entry;
9002 	vm_map_entry_t          entry;
9003 	kern_return_t           kr;
9004 	boolean_t               aligned = TRUE;
9005 	boolean_t               contains_permanent_objects = FALSE;
9006 	boolean_t               encountered_sub_map = FALSE;
9007 	vm_map_offset_t         base_addr;
9008 	vm_map_size_t           copy_size;
9009 	vm_map_size_t           total_size;
9010 	uint16_t                copy_page_shift;
9011 
9012 
9013 	/*
9014 	 *	Check for null copy object.
9015 	 */
9016 
9017 	if (copy == VM_MAP_COPY_NULL) {
9018 		return KERN_SUCCESS;
9019 	}
9020 
9021 	/*
9022 	 * Assert that the vm_map_copy is coming from the right
9023 	 * zone and hasn't been forged
9024 	 */
9025 	vm_map_copy_require(copy);
9026 
9027 	/*
9028 	 *	Check for special kernel buffer allocated
9029 	 *	by new_ipc_kmsg_copyin.
9030 	 */
9031 
9032 	if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
9033 		return vm_map_copyout_kernel_buffer(
9034 			dst_map, &dst_addr,
9035 			copy, copy->size, TRUE, discard_on_success);
9036 	}
9037 
9038 	/*
9039 	 *      Only works for entry lists at the moment.  Will
9040 	 *	support page lists later.
9041 	 */
9042 
9043 	assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9044 
9045 	if (copy->size == 0) {
9046 		if (discard_on_success) {
9047 			vm_map_copy_discard(copy);
9048 		}
9049 		return KERN_SUCCESS;
9050 	}
9051 
9052 	copy_page_shift = copy->cpy_hdr.page_shift;
9053 
9054 	/*
9055 	 *	Verify that the destination is all writeable
9056 	 *	initially.  We have to trunc the destination
9057 	 *	address and round the copy size or we'll end up
9058 	 *	splitting entries in strange ways.
9059 	 */
9060 
9061 	if (!VM_MAP_PAGE_ALIGNED(copy->size,
9062 	    VM_MAP_PAGE_MASK(dst_map)) ||
9063 	    !VM_MAP_PAGE_ALIGNED(copy->offset,
9064 	    VM_MAP_PAGE_MASK(dst_map)) ||
9065 	    !VM_MAP_PAGE_ALIGNED(dst_addr,
9066 	    VM_MAP_PAGE_MASK(dst_map)) ||
9067 	    copy_page_shift != VM_MAP_PAGE_SHIFT(dst_map)) {
9068 		aligned = FALSE;
9069 		dst_end = vm_map_round_page(dst_addr + copy->size,
9070 		    VM_MAP_PAGE_MASK(dst_map));
9071 	} else {
9072 		dst_end = dst_addr + copy->size;
9073 	}
9074 
9075 	vm_map_lock(dst_map);
9076 
9077 	/* LP64todo - remove this check when vm_map_commpage64()
9078 	 * no longer has to stuff in a map_entry for the commpage
9079 	 * above the map's max_offset.
9080 	 */
9081 	if (dst_addr >= dst_map->max_offset) {
9082 		vm_map_unlock(dst_map);
9083 		return KERN_INVALID_ADDRESS;
9084 	}
9085 
9086 start_pass_1:
9087 	if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
9088 		vm_map_unlock(dst_map);
9089 		return KERN_INVALID_ADDRESS;
9090 	}
9091 	vm_map_clip_start(dst_map,
9092 	    tmp_entry,
9093 	    vm_map_trunc_page(dst_addr,
9094 	    VM_MAP_PAGE_MASK(dst_map)));
9095 	for (entry = tmp_entry;;) {
9096 		vm_map_entry_t  next = entry->vme_next;
9097 
9098 		while (entry->is_sub_map) {
9099 			vm_map_offset_t sub_start;
9100 			vm_map_offset_t sub_end;
9101 			vm_map_offset_t local_end;
9102 
9103 			if (entry->in_transition) {
9104 				/*
9105 				 * Say that we are waiting, and wait for entry.
9106 				 */
9107 				entry->needs_wakeup = TRUE;
9108 				vm_map_entry_wait(dst_map, THREAD_UNINT);
9109 
9110 				goto start_pass_1;
9111 			}
9112 
9113 			local_end = entry->vme_end;
9114 			if (!(entry->needs_copy)) {
9115 				/* if needs_copy we are a COW submap */
9116 				/* in such a case we just replace so */
9117 				/* there is no need for the follow-  */
9118 				/* ing check.                        */
9119 				encountered_sub_map = TRUE;
9120 				sub_start = VME_OFFSET(entry);
9121 
9122 				if (entry->vme_end < dst_end) {
9123 					sub_end = entry->vme_end;
9124 				} else {
9125 					sub_end = dst_end;
9126 				}
9127 				sub_end -= entry->vme_start;
9128 				sub_end += VME_OFFSET(entry);
9129 				vm_map_unlock(dst_map);
9130 
9131 				kr = vm_map_overwrite_submap_recurse(
9132 					VME_SUBMAP(entry),
9133 					sub_start,
9134 					sub_end - sub_start);
9135 				if (kr != KERN_SUCCESS) {
9136 					return kr;
9137 				}
9138 				vm_map_lock(dst_map);
9139 			}
9140 
9141 			if (dst_end <= entry->vme_end) {
9142 				goto start_overwrite;
9143 			}
9144 			if (!vm_map_lookup_entry(dst_map, local_end,
9145 			    &entry)) {
9146 				vm_map_unlock(dst_map);
9147 				return KERN_INVALID_ADDRESS;
9148 			}
9149 			next = entry->vme_next;
9150 		}
9151 
9152 		if (!(entry->protection & VM_PROT_WRITE)) {
9153 			vm_map_unlock(dst_map);
9154 			return KERN_PROTECTION_FAILURE;
9155 		}
9156 
9157 		if (!vm_map_entry_is_overwritable(dst_map, entry)) {
9158 			vm_map_unlock(dst_map);
9159 			return KERN_PROTECTION_FAILURE;
9160 		}
9161 
9162 		/*
9163 		 *	If the entry is in transition, we must wait
9164 		 *	for it to exit that state.  Anything could happen
9165 		 *	when we unlock the map, so start over.
9166 		 */
9167 		if (entry->in_transition) {
9168 			/*
9169 			 * Say that we are waiting, and wait for entry.
9170 			 */
9171 			entry->needs_wakeup = TRUE;
9172 			vm_map_entry_wait(dst_map, THREAD_UNINT);
9173 
9174 			goto start_pass_1;
9175 		}
9176 
9177 /*
9178  *		our range is contained completely within this map entry
9179  */
9180 		if (dst_end <= entry->vme_end) {
9181 			break;
9182 		}
9183 /*
9184  *		check that range specified is contiguous region
9185  */
9186 		if ((next == vm_map_to_entry(dst_map)) ||
9187 		    (next->vme_start != entry->vme_end)) {
9188 			vm_map_unlock(dst_map);
9189 			return KERN_INVALID_ADDRESS;
9190 		}
9191 
9192 
9193 		/*
9194 		 *	Check for permanent objects in the destination.
9195 		 */
9196 		if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
9197 		    ((!VME_OBJECT(entry)->internal) ||
9198 		    (VME_OBJECT(entry)->true_share))) {
9199 			contains_permanent_objects = TRUE;
9200 		}
9201 
9202 		entry = next;
9203 	}/* for */
9204 
9205 start_overwrite:
9206 	/*
9207 	 *	If there are permanent objects in the destination, then
9208 	 *	the copy cannot be interrupted.
9209 	 */
9210 
9211 	if (interruptible && contains_permanent_objects) {
9212 		vm_map_unlock(dst_map);
9213 		return KERN_FAILURE;   /* XXX */
9214 	}
9215 
9216 	/*
9217 	 *
9218 	 *	Make a second pass, overwriting the data
9219 	 *	At the beginning of each loop iteration,
9220 	 *	the next entry to be overwritten is "tmp_entry"
9221 	 *	(initially, the value returned from the lookup above),
9222 	 *	and the starting address expected in that entry
9223 	 *	is "start".
9224 	 */
9225 
9226 	total_size = copy->size;
9227 	if (encountered_sub_map) {
9228 		copy_size = 0;
9229 		/* re-calculate tmp_entry since we've had the map */
9230 		/* unlocked */
9231 		if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
9232 			vm_map_unlock(dst_map);
9233 			return KERN_INVALID_ADDRESS;
9234 		}
9235 	} else {
9236 		copy_size = copy->size;
9237 	}
9238 
9239 	base_addr = dst_addr;
9240 	while (TRUE) {
9241 		/* deconstruct the copy object and do in parts */
9242 		/* only in sub_map, interruptable case */
9243 		vm_map_entry_t  copy_entry;
9244 		vm_map_entry_t  previous_prev = VM_MAP_ENTRY_NULL;
9245 		vm_map_entry_t  next_copy = VM_MAP_ENTRY_NULL;
9246 		int             nentries;
9247 		int             remaining_entries = 0;
9248 		vm_map_offset_t new_offset = 0;
9249 
9250 		for (entry = tmp_entry; copy_size == 0;) {
9251 			vm_map_entry_t  next;
9252 
9253 			next = entry->vme_next;
9254 
9255 			/* tmp_entry and base address are moved along */
9256 			/* each time we encounter a sub-map.  Otherwise */
9257 			/* entry can outpase tmp_entry, and the copy_size */
9258 			/* may reflect the distance between them */
9259 			/* if the current entry is found to be in transition */
9260 			/* we will start over at the beginning or the last */
9261 			/* encounter of a submap as dictated by base_addr */
9262 			/* we will zero copy_size accordingly. */
9263 			if (entry->in_transition) {
9264 				/*
9265 				 * Say that we are waiting, and wait for entry.
9266 				 */
9267 				entry->needs_wakeup = TRUE;
9268 				vm_map_entry_wait(dst_map, THREAD_UNINT);
9269 
9270 				if (!vm_map_lookup_entry(dst_map, base_addr,
9271 				    &tmp_entry)) {
9272 					vm_map_unlock(dst_map);
9273 					return KERN_INVALID_ADDRESS;
9274 				}
9275 				copy_size = 0;
9276 				entry = tmp_entry;
9277 				continue;
9278 			}
9279 			if (entry->is_sub_map) {
9280 				vm_map_offset_t sub_start;
9281 				vm_map_offset_t sub_end;
9282 				vm_map_offset_t local_end;
9283 
9284 				if (entry->needs_copy) {
9285 					/* if this is a COW submap */
9286 					/* just back the range with a */
9287 					/* anonymous entry */
9288 					if (entry->vme_end < dst_end) {
9289 						sub_end = entry->vme_end;
9290 					} else {
9291 						sub_end = dst_end;
9292 					}
9293 					if (entry->vme_start < base_addr) {
9294 						sub_start = base_addr;
9295 					} else {
9296 						sub_start = entry->vme_start;
9297 					}
9298 					vm_map_clip_end(
9299 						dst_map, entry, sub_end);
9300 					vm_map_clip_start(
9301 						dst_map, entry, sub_start);
9302 					assert(!entry->use_pmap);
9303 					assert(!entry->iokit_acct);
9304 					entry->use_pmap = TRUE;
9305 					entry->is_sub_map = FALSE;
9306 					vm_map_deallocate(
9307 						VME_SUBMAP(entry));
9308 					VME_OBJECT_SET(entry, VM_OBJECT_NULL);
9309 					VME_OFFSET_SET(entry, 0);
9310 					entry->is_shared = FALSE;
9311 					entry->needs_copy = FALSE;
9312 					entry->protection = VM_PROT_DEFAULT;
9313 					entry->max_protection = VM_PROT_ALL;
9314 					entry->wired_count = 0;
9315 					entry->user_wired_count = 0;
9316 					if (entry->inheritance
9317 					    == VM_INHERIT_SHARE) {
9318 						entry->inheritance = VM_INHERIT_COPY;
9319 					}
9320 					continue;
9321 				}
9322 				/* first take care of any non-sub_map */
9323 				/* entries to send */
9324 				if (base_addr < entry->vme_start) {
9325 					/* stuff to send */
9326 					copy_size =
9327 					    entry->vme_start - base_addr;
9328 					break;
9329 				}
9330 				sub_start = VME_OFFSET(entry);
9331 
9332 				if (entry->vme_end < dst_end) {
9333 					sub_end = entry->vme_end;
9334 				} else {
9335 					sub_end = dst_end;
9336 				}
9337 				sub_end -= entry->vme_start;
9338 				sub_end += VME_OFFSET(entry);
9339 				local_end = entry->vme_end;
9340 				vm_map_unlock(dst_map);
9341 				copy_size = sub_end - sub_start;
9342 
9343 				/* adjust the copy object */
9344 				if (total_size > copy_size) {
9345 					vm_map_size_t   local_size = 0;
9346 					vm_map_size_t   entry_size;
9347 
9348 					nentries = 1;
9349 					new_offset = copy->offset;
9350 					copy_entry = vm_map_copy_first_entry(copy);
9351 					while (copy_entry !=
9352 					    vm_map_copy_to_entry(copy)) {
9353 						entry_size = copy_entry->vme_end -
9354 						    copy_entry->vme_start;
9355 						if ((local_size < copy_size) &&
9356 						    ((local_size + entry_size)
9357 						    >= copy_size)) {
9358 							vm_map_copy_clip_end(copy,
9359 							    copy_entry,
9360 							    copy_entry->vme_start +
9361 							    (copy_size - local_size));
9362 							entry_size = copy_entry->vme_end -
9363 							    copy_entry->vme_start;
9364 							local_size += entry_size;
9365 							new_offset += entry_size;
9366 						}
9367 						if (local_size >= copy_size) {
9368 							next_copy = copy_entry->vme_next;
9369 							copy_entry->vme_next =
9370 							    vm_map_copy_to_entry(copy);
9371 							previous_prev =
9372 							    copy->cpy_hdr.links.prev;
9373 							copy->cpy_hdr.links.prev = copy_entry;
9374 							copy->size = copy_size;
9375 							remaining_entries =
9376 							    copy->cpy_hdr.nentries;
9377 							remaining_entries -= nentries;
9378 							copy->cpy_hdr.nentries = nentries;
9379 							break;
9380 						} else {
9381 							local_size += entry_size;
9382 							new_offset += entry_size;
9383 							nentries++;
9384 						}
9385 						copy_entry = copy_entry->vme_next;
9386 					}
9387 				}
9388 
9389 				if ((entry->use_pmap) && (pmap == NULL)) {
9390 					kr = vm_map_copy_overwrite_nested(
9391 						VME_SUBMAP(entry),
9392 						sub_start,
9393 						copy,
9394 						interruptible,
9395 						VME_SUBMAP(entry)->pmap,
9396 						TRUE);
9397 				} else if (pmap != NULL) {
9398 					kr = vm_map_copy_overwrite_nested(
9399 						VME_SUBMAP(entry),
9400 						sub_start,
9401 						copy,
9402 						interruptible, pmap,
9403 						TRUE);
9404 				} else {
9405 					kr = vm_map_copy_overwrite_nested(
9406 						VME_SUBMAP(entry),
9407 						sub_start,
9408 						copy,
9409 						interruptible,
9410 						dst_map->pmap,
9411 						TRUE);
9412 				}
9413 				if (kr != KERN_SUCCESS) {
9414 					if (next_copy != NULL) {
9415 						copy->cpy_hdr.nentries +=
9416 						    remaining_entries;
9417 						copy->cpy_hdr.links.prev->vme_next =
9418 						    next_copy;
9419 						copy->cpy_hdr.links.prev
9420 						        = previous_prev;
9421 						copy->size = total_size;
9422 					}
9423 					return kr;
9424 				}
9425 				if (dst_end <= local_end) {
9426 					return KERN_SUCCESS;
9427 				}
9428 				/* otherwise copy no longer exists, it was */
9429 				/* destroyed after successful copy_overwrite */
9430 				copy = vm_map_copy_allocate();
9431 				copy->type = VM_MAP_COPY_ENTRY_LIST;
9432 				copy->offset = new_offset;
9433 				copy->cpy_hdr.page_shift = copy_page_shift;
9434 
9435 				/*
9436 				 * XXX FBDP
9437 				 * this does not seem to deal with
9438 				 * the VM map store (R&B tree)
9439 				 */
9440 
9441 				total_size -= copy_size;
9442 				copy_size = 0;
9443 				/* put back remainder of copy in container */
9444 				if (next_copy != NULL) {
9445 					copy->cpy_hdr.nentries = remaining_entries;
9446 					copy->cpy_hdr.links.next = next_copy;
9447 					copy->cpy_hdr.links.prev = previous_prev;
9448 					copy->size = total_size;
9449 					next_copy->vme_prev =
9450 					    vm_map_copy_to_entry(copy);
9451 					next_copy = NULL;
9452 				}
9453 				base_addr = local_end;
9454 				vm_map_lock(dst_map);
9455 				if (!vm_map_lookup_entry(dst_map,
9456 				    local_end, &tmp_entry)) {
9457 					vm_map_unlock(dst_map);
9458 					return KERN_INVALID_ADDRESS;
9459 				}
9460 				entry = tmp_entry;
9461 				continue;
9462 			}
9463 			if (dst_end <= entry->vme_end) {
9464 				copy_size = dst_end - base_addr;
9465 				break;
9466 			}
9467 
9468 			if ((next == vm_map_to_entry(dst_map)) ||
9469 			    (next->vme_start != entry->vme_end)) {
9470 				vm_map_unlock(dst_map);
9471 				return KERN_INVALID_ADDRESS;
9472 			}
9473 
9474 			entry = next;
9475 		}/* for */
9476 
9477 		next_copy = NULL;
9478 		nentries = 1;
9479 
9480 		/* adjust the copy object */
9481 		if (total_size > copy_size) {
9482 			vm_map_size_t   local_size = 0;
9483 			vm_map_size_t   entry_size;
9484 
9485 			new_offset = copy->offset;
9486 			copy_entry = vm_map_copy_first_entry(copy);
9487 			while (copy_entry != vm_map_copy_to_entry(copy)) {
9488 				entry_size = copy_entry->vme_end -
9489 				    copy_entry->vme_start;
9490 				if ((local_size < copy_size) &&
9491 				    ((local_size + entry_size)
9492 				    >= copy_size)) {
9493 					vm_map_copy_clip_end(copy, copy_entry,
9494 					    copy_entry->vme_start +
9495 					    (copy_size - local_size));
9496 					entry_size = copy_entry->vme_end -
9497 					    copy_entry->vme_start;
9498 					local_size += entry_size;
9499 					new_offset += entry_size;
9500 				}
9501 				if (local_size >= copy_size) {
9502 					next_copy = copy_entry->vme_next;
9503 					copy_entry->vme_next =
9504 					    vm_map_copy_to_entry(copy);
9505 					previous_prev =
9506 					    copy->cpy_hdr.links.prev;
9507 					copy->cpy_hdr.links.prev = copy_entry;
9508 					copy->size = copy_size;
9509 					remaining_entries =
9510 					    copy->cpy_hdr.nentries;
9511 					remaining_entries -= nentries;
9512 					copy->cpy_hdr.nentries = nentries;
9513 					break;
9514 				} else {
9515 					local_size += entry_size;
9516 					new_offset += entry_size;
9517 					nentries++;
9518 				}
9519 				copy_entry = copy_entry->vme_next;
9520 			}
9521 		}
9522 
9523 		if (aligned) {
9524 			pmap_t  local_pmap;
9525 
9526 			if (pmap) {
9527 				local_pmap = pmap;
9528 			} else {
9529 				local_pmap = dst_map->pmap;
9530 			}
9531 
9532 			if ((kr =  vm_map_copy_overwrite_aligned(
9533 				    dst_map, tmp_entry, copy,
9534 				    base_addr, local_pmap)) != KERN_SUCCESS) {
9535 				if (next_copy != NULL) {
9536 					copy->cpy_hdr.nentries +=
9537 					    remaining_entries;
9538 					copy->cpy_hdr.links.prev->vme_next =
9539 					    next_copy;
9540 					copy->cpy_hdr.links.prev =
9541 					    previous_prev;
9542 					copy->size += copy_size;
9543 				}
9544 				return kr;
9545 			}
9546 			vm_map_unlock(dst_map);
9547 		} else {
9548 			/*
9549 			 * Performance gain:
9550 			 *
9551 			 * if the copy and dst address are misaligned but the same
9552 			 * offset within the page we can copy_not_aligned the
9553 			 * misaligned parts and copy aligned the rest.  If they are
9554 			 * aligned but len is unaligned we simply need to copy
9555 			 * the end bit unaligned.  We'll need to split the misaligned
9556 			 * bits of the region in this case !
9557 			 */
9558 			/* ALWAYS UNLOCKS THE dst_map MAP */
9559 			kr = vm_map_copy_overwrite_unaligned(
9560 				dst_map,
9561 				tmp_entry,
9562 				copy,
9563 				base_addr,
9564 				discard_on_success);
9565 			if (kr != KERN_SUCCESS) {
9566 				if (next_copy != NULL) {
9567 					copy->cpy_hdr.nentries +=
9568 					    remaining_entries;
9569 					copy->cpy_hdr.links.prev->vme_next =
9570 					    next_copy;
9571 					copy->cpy_hdr.links.prev =
9572 					    previous_prev;
9573 					copy->size += copy_size;
9574 				}
9575 				return kr;
9576 			}
9577 		}
9578 		total_size -= copy_size;
9579 		if (total_size == 0) {
9580 			break;
9581 		}
9582 		base_addr += copy_size;
9583 		copy_size = 0;
9584 		copy->offset = new_offset;
9585 		if (next_copy != NULL) {
9586 			copy->cpy_hdr.nentries = remaining_entries;
9587 			copy->cpy_hdr.links.next = next_copy;
9588 			copy->cpy_hdr.links.prev = previous_prev;
9589 			next_copy->vme_prev = vm_map_copy_to_entry(copy);
9590 			copy->size = total_size;
9591 		}
9592 		vm_map_lock(dst_map);
9593 		while (TRUE) {
9594 			if (!vm_map_lookup_entry(dst_map,
9595 			    base_addr, &tmp_entry)) {
9596 				vm_map_unlock(dst_map);
9597 				return KERN_INVALID_ADDRESS;
9598 			}
9599 			if (tmp_entry->in_transition) {
9600 				entry->needs_wakeup = TRUE;
9601 				vm_map_entry_wait(dst_map, THREAD_UNINT);
9602 			} else {
9603 				break;
9604 			}
9605 		}
9606 		vm_map_clip_start(dst_map,
9607 		    tmp_entry,
9608 		    vm_map_trunc_page(base_addr,
9609 		    VM_MAP_PAGE_MASK(dst_map)));
9610 
9611 		entry = tmp_entry;
9612 	} /* while */
9613 
9614 	/*
9615 	 *	Throw away the vm_map_copy object
9616 	 */
9617 	if (discard_on_success) {
9618 		vm_map_copy_discard(copy);
9619 	}
9620 
9621 	return KERN_SUCCESS;
9622 }/* vm_map_copy_overwrite */
9623 
9624 kern_return_t
vm_map_copy_overwrite(vm_map_t dst_map,vm_map_offset_t dst_addr,vm_map_copy_t copy,vm_map_size_t copy_size,boolean_t interruptible)9625 vm_map_copy_overwrite(
9626 	vm_map_t        dst_map,
9627 	vm_map_offset_t dst_addr,
9628 	vm_map_copy_t   copy,
9629 	vm_map_size_t   copy_size,
9630 	boolean_t       interruptible)
9631 {
9632 	vm_map_size_t   head_size, tail_size;
9633 	vm_map_copy_t   head_copy, tail_copy;
9634 	vm_map_offset_t head_addr, tail_addr;
9635 	vm_map_entry_t  entry;
9636 	kern_return_t   kr;
9637 	vm_map_offset_t effective_page_mask, effective_page_size;
9638 	uint16_t        copy_page_shift;
9639 
9640 	head_size = 0;
9641 	tail_size = 0;
9642 	head_copy = NULL;
9643 	tail_copy = NULL;
9644 	head_addr = 0;
9645 	tail_addr = 0;
9646 
9647 	if (interruptible ||
9648 	    copy == VM_MAP_COPY_NULL ||
9649 	    copy->type != VM_MAP_COPY_ENTRY_LIST) {
9650 		/*
9651 		 * We can't split the "copy" map if we're interruptible
9652 		 * or if we don't have a "copy" map...
9653 		 */
9654 blunt_copy:
9655 		return vm_map_copy_overwrite_nested(dst_map,
9656 		           dst_addr,
9657 		           copy,
9658 		           interruptible,
9659 		           (pmap_t) NULL,
9660 		           TRUE);
9661 	}
9662 
9663 	copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy);
9664 	if (copy_page_shift < PAGE_SHIFT ||
9665 	    VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
9666 		goto blunt_copy;
9667 	}
9668 
9669 	if (VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
9670 		effective_page_mask = VM_MAP_PAGE_MASK(dst_map);
9671 	} else {
9672 		effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
9673 		effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
9674 		    effective_page_mask);
9675 	}
9676 	effective_page_size = effective_page_mask + 1;
9677 
9678 	if (copy_size < VM_MAP_COPY_OVERWRITE_OPTIMIZATION_THRESHOLD_PAGES * effective_page_size) {
9679 		/*
9680 		 * Too small to bother with optimizing...
9681 		 */
9682 		goto blunt_copy;
9683 	}
9684 
9685 	if ((dst_addr & effective_page_mask) !=
9686 	    (copy->offset & effective_page_mask)) {
9687 		/*
9688 		 * Incompatible mis-alignment of source and destination...
9689 		 */
9690 		goto blunt_copy;
9691 	}
9692 
9693 	/*
9694 	 * Proper alignment or identical mis-alignment at the beginning.
9695 	 * Let's try and do a small unaligned copy first (if needed)
9696 	 * and then an aligned copy for the rest.
9697 	 */
9698 	if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
9699 		head_addr = dst_addr;
9700 		head_size = (effective_page_size -
9701 		    (copy->offset & effective_page_mask));
9702 		head_size = MIN(head_size, copy_size);
9703 	}
9704 	if (!vm_map_page_aligned(copy->offset + copy_size,
9705 	    effective_page_mask)) {
9706 		/*
9707 		 * Mis-alignment at the end.
9708 		 * Do an aligned copy up to the last page and
9709 		 * then an unaligned copy for the remaining bytes.
9710 		 */
9711 		tail_size = ((copy->offset + copy_size) &
9712 		    effective_page_mask);
9713 		tail_size = MIN(tail_size, copy_size);
9714 		tail_addr = dst_addr + copy_size - tail_size;
9715 		assert(tail_addr >= head_addr + head_size);
9716 	}
9717 	assert(head_size + tail_size <= copy_size);
9718 
9719 	if (head_size + tail_size == copy_size) {
9720 		/*
9721 		 * It's all unaligned, no optimization possible...
9722 		 */
9723 		goto blunt_copy;
9724 	}
9725 
9726 	/*
9727 	 * Can't optimize if there are any submaps in the
9728 	 * destination due to the way we free the "copy" map
9729 	 * progressively in vm_map_copy_overwrite_nested()
9730 	 * in that case.
9731 	 */
9732 	vm_map_lock_read(dst_map);
9733 	if (!vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
9734 		vm_map_unlock_read(dst_map);
9735 		goto blunt_copy;
9736 	}
9737 	for (;
9738 	    (entry != vm_map_copy_to_entry(copy) &&
9739 	    entry->vme_start < dst_addr + copy_size);
9740 	    entry = entry->vme_next) {
9741 		if (entry->is_sub_map) {
9742 			vm_map_unlock_read(dst_map);
9743 			goto blunt_copy;
9744 		}
9745 	}
9746 	vm_map_unlock_read(dst_map);
9747 
9748 	if (head_size) {
9749 		/*
9750 		 * Unaligned copy of the first "head_size" bytes, to reach
9751 		 * a page boundary.
9752 		 */
9753 
9754 		/*
9755 		 * Extract "head_copy" out of "copy".
9756 		 */
9757 		head_copy = vm_map_copy_allocate();
9758 		head_copy->type = VM_MAP_COPY_ENTRY_LIST;
9759 		head_copy->cpy_hdr.entries_pageable =
9760 		    copy->cpy_hdr.entries_pageable;
9761 		vm_map_store_init(&head_copy->cpy_hdr);
9762 		head_copy->cpy_hdr.page_shift = copy_page_shift;
9763 
9764 		entry = vm_map_copy_first_entry(copy);
9765 		if (entry->vme_end < copy->offset + head_size) {
9766 			head_size = entry->vme_end - copy->offset;
9767 		}
9768 
9769 		head_copy->offset = copy->offset;
9770 		head_copy->size = head_size;
9771 		copy->offset += head_size;
9772 		copy->size -= head_size;
9773 		copy_size -= head_size;
9774 		assert(copy_size > 0);
9775 
9776 		vm_map_copy_clip_end(copy, entry, copy->offset);
9777 		vm_map_copy_entry_unlink(copy, entry);
9778 		vm_map_copy_entry_link(head_copy,
9779 		    vm_map_copy_to_entry(head_copy),
9780 		    entry);
9781 
9782 		/*
9783 		 * Do the unaligned copy.
9784 		 */
9785 		kr = vm_map_copy_overwrite_nested(dst_map,
9786 		    head_addr,
9787 		    head_copy,
9788 		    interruptible,
9789 		    (pmap_t) NULL,
9790 		    FALSE);
9791 		if (kr != KERN_SUCCESS) {
9792 			goto done;
9793 		}
9794 	}
9795 
9796 	if (tail_size) {
9797 		/*
9798 		 * Extract "tail_copy" out of "copy".
9799 		 */
9800 		tail_copy = vm_map_copy_allocate();
9801 		tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
9802 		tail_copy->cpy_hdr.entries_pageable =
9803 		    copy->cpy_hdr.entries_pageable;
9804 		vm_map_store_init(&tail_copy->cpy_hdr);
9805 		tail_copy->cpy_hdr.page_shift = copy_page_shift;
9806 
9807 		tail_copy->offset = copy->offset + copy_size - tail_size;
9808 		tail_copy->size = tail_size;
9809 
9810 		copy->size -= tail_size;
9811 		copy_size -= tail_size;
9812 		assert(copy_size > 0);
9813 
9814 		entry = vm_map_copy_last_entry(copy);
9815 		vm_map_copy_clip_start(copy, entry, tail_copy->offset);
9816 		entry = vm_map_copy_last_entry(copy);
9817 		vm_map_copy_entry_unlink(copy, entry);
9818 		vm_map_copy_entry_link(tail_copy,
9819 		    vm_map_copy_last_entry(tail_copy),
9820 		    entry);
9821 	}
9822 
9823 	/*
9824 	 * If we are here from ipc_kmsg_copyout_ool_descriptor(),
9825 	 * we want to avoid TOCTOU issues w.r.t copy->size but
9826 	 * we don't need to change vm_map_copy_overwrite_nested()
9827 	 * and all other vm_map_copy_overwrite variants.
9828 	 *
9829 	 * So we assign the original copy_size that was passed into
9830 	 * this routine back to copy.
9831 	 *
9832 	 * This use of local 'copy_size' passed into this routine is
9833 	 * to try and protect against TOCTOU attacks where the kernel
9834 	 * has been exploited. We don't expect this to be an issue
9835 	 * during normal system operation.
9836 	 */
9837 	assertf(copy->size == copy_size,
9838 	    "Mismatch of copy sizes. Expected 0x%llx, Got 0x%llx\n", (uint64_t) copy_size, (uint64_t) copy->size);
9839 	copy->size = copy_size;
9840 
9841 	/*
9842 	 * Copy most (or possibly all) of the data.
9843 	 */
9844 	kr = vm_map_copy_overwrite_nested(dst_map,
9845 	    dst_addr + head_size,
9846 	    copy,
9847 	    interruptible,
9848 	    (pmap_t) NULL,
9849 	    FALSE);
9850 	if (kr != KERN_SUCCESS) {
9851 		goto done;
9852 	}
9853 
9854 	if (tail_size) {
9855 		kr = vm_map_copy_overwrite_nested(dst_map,
9856 		    tail_addr,
9857 		    tail_copy,
9858 		    interruptible,
9859 		    (pmap_t) NULL,
9860 		    FALSE);
9861 	}
9862 
9863 done:
9864 	assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9865 	if (kr == KERN_SUCCESS) {
9866 		/*
9867 		 * Discard all the copy maps.
9868 		 */
9869 		if (head_copy) {
9870 			vm_map_copy_discard(head_copy);
9871 			head_copy = NULL;
9872 		}
9873 		vm_map_copy_discard(copy);
9874 		if (tail_copy) {
9875 			vm_map_copy_discard(tail_copy);
9876 			tail_copy = NULL;
9877 		}
9878 	} else {
9879 		/*
9880 		 * Re-assemble the original copy map.
9881 		 */
9882 		if (head_copy) {
9883 			entry = vm_map_copy_first_entry(head_copy);
9884 			vm_map_copy_entry_unlink(head_copy, entry);
9885 			vm_map_copy_entry_link(copy,
9886 			    vm_map_copy_to_entry(copy),
9887 			    entry);
9888 			copy->offset -= head_size;
9889 			copy->size += head_size;
9890 			vm_map_copy_discard(head_copy);
9891 			head_copy = NULL;
9892 		}
9893 		if (tail_copy) {
9894 			entry = vm_map_copy_last_entry(tail_copy);
9895 			vm_map_copy_entry_unlink(tail_copy, entry);
9896 			vm_map_copy_entry_link(copy,
9897 			    vm_map_copy_last_entry(copy),
9898 			    entry);
9899 			copy->size += tail_size;
9900 			vm_map_copy_discard(tail_copy);
9901 			tail_copy = NULL;
9902 		}
9903 	}
9904 	return kr;
9905 }
9906 
9907 
9908 /*
9909  *	Routine: vm_map_copy_overwrite_unaligned	[internal use only]
9910  *
9911  *	Decription:
9912  *	Physically copy unaligned data
9913  *
9914  *	Implementation:
9915  *	Unaligned parts of pages have to be physically copied.  We use
9916  *	a modified form of vm_fault_copy (which understands none-aligned
9917  *	page offsets and sizes) to do the copy.  We attempt to copy as
9918  *	much memory in one go as possibly, however vm_fault_copy copies
9919  *	within 1 memory object so we have to find the smaller of "amount left"
9920  *	"source object data size" and "target object data size".  With
9921  *	unaligned data we don't need to split regions, therefore the source
9922  *	(copy) object should be one map entry, the target range may be split
9923  *	over multiple map entries however.  In any event we are pessimistic
9924  *	about these assumptions.
9925  *
9926  *	Assumptions:
9927  *	dst_map is locked on entry and is return locked on success,
9928  *	unlocked on error.
9929  */
9930 
9931 static kern_return_t
vm_map_copy_overwrite_unaligned(vm_map_t dst_map,vm_map_entry_t entry,vm_map_copy_t copy,vm_map_offset_t start,boolean_t discard_on_success)9932 vm_map_copy_overwrite_unaligned(
9933 	vm_map_t        dst_map,
9934 	vm_map_entry_t  entry,
9935 	vm_map_copy_t   copy,
9936 	vm_map_offset_t start,
9937 	boolean_t       discard_on_success)
9938 {
9939 	vm_map_entry_t          copy_entry;
9940 	vm_map_entry_t          copy_entry_next;
9941 	vm_map_version_t        version;
9942 	vm_object_t             dst_object;
9943 	vm_object_offset_t      dst_offset;
9944 	vm_object_offset_t      src_offset;
9945 	vm_object_offset_t      entry_offset;
9946 	vm_map_offset_t         entry_end;
9947 	vm_map_size_t           src_size,
9948 	    dst_size,
9949 	    copy_size,
9950 	    amount_left;
9951 	kern_return_t           kr = KERN_SUCCESS;
9952 
9953 
9954 	copy_entry = vm_map_copy_first_entry(copy);
9955 
9956 	vm_map_lock_write_to_read(dst_map);
9957 
9958 	src_offset = copy->offset - trunc_page_mask_64(copy->offset, VM_MAP_COPY_PAGE_MASK(copy));
9959 	amount_left = copy->size;
9960 /*
9961  *	unaligned so we never clipped this entry, we need the offset into
9962  *	the vm_object not just the data.
9963  */
9964 	while (amount_left > 0) {
9965 		if (entry == vm_map_to_entry(dst_map)) {
9966 			vm_map_unlock_read(dst_map);
9967 			return KERN_INVALID_ADDRESS;
9968 		}
9969 
9970 		/* "start" must be within the current map entry */
9971 		assert((start >= entry->vme_start) && (start < entry->vme_end));
9972 
9973 		dst_offset = start - entry->vme_start;
9974 
9975 		dst_size = entry->vme_end - start;
9976 
9977 		src_size = copy_entry->vme_end -
9978 		    (copy_entry->vme_start + src_offset);
9979 
9980 		if (dst_size < src_size) {
9981 /*
9982  *			we can only copy dst_size bytes before
9983  *			we have to get the next destination entry
9984  */
9985 			copy_size = dst_size;
9986 		} else {
9987 /*
9988  *			we can only copy src_size bytes before
9989  *			we have to get the next source copy entry
9990  */
9991 			copy_size = src_size;
9992 		}
9993 
9994 		if (copy_size > amount_left) {
9995 			copy_size = amount_left;
9996 		}
9997 /*
9998  *		Entry needs copy, create a shadow shadow object for
9999  *		Copy on write region.
10000  */
10001 		if (entry->needs_copy &&
10002 		    ((entry->protection & VM_PROT_WRITE) != 0)) {
10003 			if (vm_map_lock_read_to_write(dst_map)) {
10004 				vm_map_lock_read(dst_map);
10005 				goto RetryLookup;
10006 			}
10007 			VME_OBJECT_SHADOW(entry,
10008 			    (vm_map_size_t)(entry->vme_end
10009 			    - entry->vme_start));
10010 			entry->needs_copy = FALSE;
10011 			vm_map_lock_write_to_read(dst_map);
10012 		}
10013 		dst_object = VME_OBJECT(entry);
10014 /*
10015  *		unlike with the virtual (aligned) copy we're going
10016  *		to fault on it therefore we need a target object.
10017  */
10018 		if (dst_object == VM_OBJECT_NULL) {
10019 			if (vm_map_lock_read_to_write(dst_map)) {
10020 				vm_map_lock_read(dst_map);
10021 				goto RetryLookup;
10022 			}
10023 			dst_object = vm_object_allocate((vm_map_size_t)
10024 			    entry->vme_end - entry->vme_start);
10025 			VME_OBJECT_SET(entry, dst_object);
10026 			VME_OFFSET_SET(entry, 0);
10027 			assert(entry->use_pmap);
10028 			vm_map_lock_write_to_read(dst_map);
10029 		}
10030 /*
10031  *		Take an object reference and unlock map. The "entry" may
10032  *		disappear or change when the map is unlocked.
10033  */
10034 		vm_object_reference(dst_object);
10035 		version.main_timestamp = dst_map->timestamp;
10036 		entry_offset = VME_OFFSET(entry);
10037 		entry_end = entry->vme_end;
10038 		vm_map_unlock_read(dst_map);
10039 /*
10040  *		Copy as much as possible in one pass
10041  */
10042 		kr = vm_fault_copy(
10043 			VME_OBJECT(copy_entry),
10044 			VME_OFFSET(copy_entry) + src_offset,
10045 			&copy_size,
10046 			dst_object,
10047 			entry_offset + dst_offset,
10048 			dst_map,
10049 			&version,
10050 			THREAD_UNINT );
10051 
10052 		start += copy_size;
10053 		src_offset += copy_size;
10054 		amount_left -= copy_size;
10055 /*
10056  *		Release the object reference
10057  */
10058 		vm_object_deallocate(dst_object);
10059 /*
10060  *		If a hard error occurred, return it now
10061  */
10062 		if (kr != KERN_SUCCESS) {
10063 			return kr;
10064 		}
10065 
10066 		if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
10067 		    || amount_left == 0) {
10068 /*
10069  *			all done with this copy entry, dispose.
10070  */
10071 			copy_entry_next = copy_entry->vme_next;
10072 
10073 			if (discard_on_success) {
10074 				vm_map_copy_entry_unlink(copy, copy_entry);
10075 				assert(!copy_entry->is_sub_map);
10076 				vm_object_deallocate(VME_OBJECT(copy_entry));
10077 				vm_map_copy_entry_dispose(copy, copy_entry);
10078 			}
10079 
10080 			if (copy_entry_next == vm_map_copy_to_entry(copy) &&
10081 			    amount_left) {
10082 /*
10083  *				not finished copying but run out of source
10084  */
10085 				return KERN_INVALID_ADDRESS;
10086 			}
10087 
10088 			copy_entry = copy_entry_next;
10089 
10090 			src_offset = 0;
10091 		}
10092 
10093 		if (amount_left == 0) {
10094 			return KERN_SUCCESS;
10095 		}
10096 
10097 		vm_map_lock_read(dst_map);
10098 		if (version.main_timestamp == dst_map->timestamp) {
10099 			if (start == entry_end) {
10100 /*
10101  *				destination region is split.  Use the version
10102  *				information to avoid a lookup in the normal
10103  *				case.
10104  */
10105 				entry = entry->vme_next;
10106 /*
10107  *				should be contiguous. Fail if we encounter
10108  *				a hole in the destination.
10109  */
10110 				if (start != entry->vme_start) {
10111 					vm_map_unlock_read(dst_map);
10112 					return KERN_INVALID_ADDRESS;
10113 				}
10114 			}
10115 		} else {
10116 /*
10117  *			Map version check failed.
10118  *			we must lookup the entry because somebody
10119  *			might have changed the map behind our backs.
10120  */
10121 RetryLookup:
10122 			if (!vm_map_lookup_entry(dst_map, start, &entry)) {
10123 				vm_map_unlock_read(dst_map);
10124 				return KERN_INVALID_ADDRESS;
10125 			}
10126 		}
10127 	}/* while */
10128 
10129 	return KERN_SUCCESS;
10130 }/* vm_map_copy_overwrite_unaligned */
10131 
10132 /*
10133  *	Routine: vm_map_copy_overwrite_aligned	[internal use only]
10134  *
10135  *	Description:
10136  *	Does all the vm_trickery possible for whole pages.
10137  *
10138  *	Implementation:
10139  *
10140  *	If there are no permanent objects in the destination,
10141  *	and the source and destination map entry zones match,
10142  *	and the destination map entry is not shared,
10143  *	then the map entries can be deleted and replaced
10144  *	with those from the copy.  The following code is the
10145  *	basic idea of what to do, but there are lots of annoying
10146  *	little details about getting protection and inheritance
10147  *	right.  Should add protection, inheritance, and sharing checks
10148  *	to the above pass and make sure that no wiring is involved.
10149  */
10150 
10151 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
10152 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
10153 int vm_map_copy_overwrite_aligned_src_large = 0;
10154 
10155 static kern_return_t
vm_map_copy_overwrite_aligned(vm_map_t dst_map,vm_map_entry_t tmp_entry,vm_map_copy_t copy,vm_map_offset_t start,__unused pmap_t pmap)10156 vm_map_copy_overwrite_aligned(
10157 	vm_map_t        dst_map,
10158 	vm_map_entry_t  tmp_entry,
10159 	vm_map_copy_t   copy,
10160 	vm_map_offset_t start,
10161 	__unused pmap_t pmap)
10162 {
10163 	vm_object_t     object;
10164 	vm_map_entry_t  copy_entry;
10165 	vm_map_size_t   copy_size;
10166 	vm_map_size_t   size;
10167 	vm_map_entry_t  entry;
10168 
10169 	while ((copy_entry = vm_map_copy_first_entry(copy))
10170 	    != vm_map_copy_to_entry(copy)) {
10171 		copy_size = (copy_entry->vme_end - copy_entry->vme_start);
10172 
10173 		entry = tmp_entry;
10174 		if (entry->is_sub_map) {
10175 			/* unnested when clipped earlier */
10176 			assert(!entry->use_pmap);
10177 		}
10178 		if (entry == vm_map_to_entry(dst_map)) {
10179 			vm_map_unlock(dst_map);
10180 			return KERN_INVALID_ADDRESS;
10181 		}
10182 		size = (entry->vme_end - entry->vme_start);
10183 		/*
10184 		 *	Make sure that no holes popped up in the
10185 		 *	address map, and that the protection is
10186 		 *	still valid, in case the map was unlocked
10187 		 *	earlier.
10188 		 */
10189 
10190 		if ((entry->vme_start != start) || ((entry->is_sub_map)
10191 		    && !entry->needs_copy)) {
10192 			vm_map_unlock(dst_map);
10193 			return KERN_INVALID_ADDRESS;
10194 		}
10195 		assert(entry != vm_map_to_entry(dst_map));
10196 
10197 		/*
10198 		 *	Check protection again
10199 		 */
10200 
10201 		if (!(entry->protection & VM_PROT_WRITE)) {
10202 			vm_map_unlock(dst_map);
10203 			return KERN_PROTECTION_FAILURE;
10204 		}
10205 
10206 		if (!vm_map_entry_is_overwritable(dst_map, entry)) {
10207 			vm_map_unlock(dst_map);
10208 			return KERN_PROTECTION_FAILURE;
10209 		}
10210 
10211 		/*
10212 		 *	Adjust to source size first
10213 		 */
10214 
10215 		if (copy_size < size) {
10216 			if (entry->map_aligned &&
10217 			    !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
10218 			    VM_MAP_PAGE_MASK(dst_map))) {
10219 				/* no longer map-aligned */
10220 				entry->map_aligned = FALSE;
10221 			}
10222 			vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
10223 			size = copy_size;
10224 		}
10225 
10226 		/*
10227 		 *	Adjust to destination size
10228 		 */
10229 
10230 		if (size < copy_size) {
10231 			vm_map_copy_clip_end(copy, copy_entry,
10232 			    copy_entry->vme_start + size);
10233 			copy_size = size;
10234 		}
10235 
10236 		assert((entry->vme_end - entry->vme_start) == size);
10237 		assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
10238 		assert((copy_entry->vme_end - copy_entry->vme_start) == size);
10239 
10240 		/*
10241 		 *	If the destination contains temporary unshared memory,
10242 		 *	we can perform the copy by throwing it away and
10243 		 *	installing the source data.
10244 		 */
10245 
10246 		object = VME_OBJECT(entry);
10247 		if ((!entry->is_shared &&
10248 		    ((object == VM_OBJECT_NULL) ||
10249 		    (object->internal && !object->true_share))) ||
10250 		    entry->needs_copy) {
10251 			vm_object_t     old_object = VME_OBJECT(entry);
10252 			vm_object_offset_t      old_offset = VME_OFFSET(entry);
10253 			vm_object_offset_t      offset;
10254 
10255 			/*
10256 			 * Ensure that the source and destination aren't
10257 			 * identical
10258 			 */
10259 			if (old_object == VME_OBJECT(copy_entry) &&
10260 			    old_offset == VME_OFFSET(copy_entry)) {
10261 				vm_map_copy_entry_unlink(copy, copy_entry);
10262 				vm_map_copy_entry_dispose(copy, copy_entry);
10263 
10264 				if (old_object != VM_OBJECT_NULL) {
10265 					vm_object_deallocate(old_object);
10266 				}
10267 
10268 				start = tmp_entry->vme_end;
10269 				tmp_entry = tmp_entry->vme_next;
10270 				continue;
10271 			}
10272 
10273 #if XNU_TARGET_OS_OSX
10274 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
10275 #define __TRADEOFF1_COPY_SIZE (128 * 1024)      /* 128 KB */
10276 			if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
10277 			    VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
10278 			    copy_size <= __TRADEOFF1_COPY_SIZE) {
10279 				/*
10280 				 * Virtual vs. Physical copy tradeoff #1.
10281 				 *
10282 				 * Copying only a few pages out of a large
10283 				 * object:  do a physical copy instead of
10284 				 * a virtual copy, to avoid possibly keeping
10285 				 * the entire large object alive because of
10286 				 * those few copy-on-write pages.
10287 				 */
10288 				vm_map_copy_overwrite_aligned_src_large++;
10289 				goto slow_copy;
10290 			}
10291 #endif /* XNU_TARGET_OS_OSX */
10292 
10293 			if ((dst_map->pmap != kernel_pmap) &&
10294 			    (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
10295 			    (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_MEDIUM)) {
10296 				vm_object_t new_object, new_shadow;
10297 
10298 				/*
10299 				 * We're about to map something over a mapping
10300 				 * established by malloc()...
10301 				 */
10302 				new_object = VME_OBJECT(copy_entry);
10303 				if (new_object != VM_OBJECT_NULL) {
10304 					vm_object_lock_shared(new_object);
10305 				}
10306 				while (new_object != VM_OBJECT_NULL &&
10307 #if XNU_TARGET_OS_OSX
10308 				    !new_object->true_share &&
10309 				    new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10310 #endif /* XNU_TARGET_OS_OSX */
10311 				    new_object->internal) {
10312 					new_shadow = new_object->shadow;
10313 					if (new_shadow == VM_OBJECT_NULL) {
10314 						break;
10315 					}
10316 					vm_object_lock_shared(new_shadow);
10317 					vm_object_unlock(new_object);
10318 					new_object = new_shadow;
10319 				}
10320 				if (new_object != VM_OBJECT_NULL) {
10321 					if (!new_object->internal) {
10322 						/*
10323 						 * The new mapping is backed
10324 						 * by an external object.  We
10325 						 * don't want malloc'ed memory
10326 						 * to be replaced with such a
10327 						 * non-anonymous mapping, so
10328 						 * let's go off the optimized
10329 						 * path...
10330 						 */
10331 						vm_map_copy_overwrite_aligned_src_not_internal++;
10332 						vm_object_unlock(new_object);
10333 						goto slow_copy;
10334 					}
10335 #if XNU_TARGET_OS_OSX
10336 					if (new_object->true_share ||
10337 					    new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
10338 						/*
10339 						 * Same if there's a "true_share"
10340 						 * object in the shadow chain, or
10341 						 * an object with a non-default
10342 						 * (SYMMETRIC) copy strategy.
10343 						 */
10344 						vm_map_copy_overwrite_aligned_src_not_symmetric++;
10345 						vm_object_unlock(new_object);
10346 						goto slow_copy;
10347 					}
10348 #endif /* XNU_TARGET_OS_OSX */
10349 					vm_object_unlock(new_object);
10350 				}
10351 				/*
10352 				 * The new mapping is still backed by
10353 				 * anonymous (internal) memory, so it's
10354 				 * OK to substitute it for the original
10355 				 * malloc() mapping.
10356 				 */
10357 			}
10358 
10359 			if (old_object != VM_OBJECT_NULL) {
10360 				if (entry->is_sub_map) {
10361 					if (entry->use_pmap) {
10362 #ifndef NO_NESTED_PMAP
10363 						pmap_unnest(dst_map->pmap,
10364 						    (addr64_t)entry->vme_start,
10365 						    entry->vme_end - entry->vme_start);
10366 #endif  /* NO_NESTED_PMAP */
10367 						if (dst_map->mapped_in_other_pmaps) {
10368 							/* clean up parent */
10369 							/* map/maps */
10370 							vm_map_submap_pmap_clean(
10371 								dst_map, entry->vme_start,
10372 								entry->vme_end,
10373 								VME_SUBMAP(entry),
10374 								VME_OFFSET(entry));
10375 						}
10376 					} else {
10377 						vm_map_submap_pmap_clean(
10378 							dst_map, entry->vme_start,
10379 							entry->vme_end,
10380 							VME_SUBMAP(entry),
10381 							VME_OFFSET(entry));
10382 					}
10383 					vm_map_deallocate(VME_SUBMAP(entry));
10384 				} else {
10385 					if (dst_map->mapped_in_other_pmaps) {
10386 						vm_object_pmap_protect_options(
10387 							VME_OBJECT(entry),
10388 							VME_OFFSET(entry),
10389 							entry->vme_end
10390 							- entry->vme_start,
10391 							PMAP_NULL,
10392 							PAGE_SIZE,
10393 							entry->vme_start,
10394 							VM_PROT_NONE,
10395 							PMAP_OPTIONS_REMOVE);
10396 					} else {
10397 						pmap_remove_options(
10398 							dst_map->pmap,
10399 							(addr64_t)(entry->vme_start),
10400 							(addr64_t)(entry->vme_end),
10401 							PMAP_OPTIONS_REMOVE);
10402 					}
10403 					vm_object_deallocate(old_object);
10404 				}
10405 			}
10406 
10407 			if (entry->iokit_acct) {
10408 				/* keep using iokit accounting */
10409 				entry->use_pmap = FALSE;
10410 			} else {
10411 				/* use pmap accounting */
10412 				entry->use_pmap = TRUE;
10413 			}
10414 			entry->is_sub_map = FALSE;
10415 			VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
10416 			object = VME_OBJECT(entry);
10417 			entry->needs_copy = copy_entry->needs_copy;
10418 			entry->wired_count = 0;
10419 			entry->user_wired_count = 0;
10420 			offset = VME_OFFSET(copy_entry);
10421 			VME_OFFSET_SET(entry, offset);
10422 
10423 			vm_map_copy_entry_unlink(copy, copy_entry);
10424 			vm_map_copy_entry_dispose(copy, copy_entry);
10425 
10426 			/*
10427 			 * we could try to push pages into the pmap at this point, BUT
10428 			 * this optimization only saved on average 2 us per page if ALL
10429 			 * the pages in the source were currently mapped
10430 			 * and ALL the pages in the dest were touched, if there were fewer
10431 			 * than 2/3 of the pages touched, this optimization actually cost more cycles
10432 			 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
10433 			 */
10434 
10435 			/*
10436 			 *	Set up for the next iteration.  The map
10437 			 *	has not been unlocked, so the next
10438 			 *	address should be at the end of this
10439 			 *	entry, and the next map entry should be
10440 			 *	the one following it.
10441 			 */
10442 
10443 			start = tmp_entry->vme_end;
10444 			tmp_entry = tmp_entry->vme_next;
10445 		} else {
10446 			vm_map_version_t        version;
10447 			vm_object_t             dst_object;
10448 			vm_object_offset_t      dst_offset;
10449 			kern_return_t           r;
10450 
10451 slow_copy:
10452 			if (entry->needs_copy) {
10453 				VME_OBJECT_SHADOW(entry,
10454 				    (entry->vme_end -
10455 				    entry->vme_start));
10456 				entry->needs_copy = FALSE;
10457 			}
10458 
10459 			dst_object = VME_OBJECT(entry);
10460 			dst_offset = VME_OFFSET(entry);
10461 
10462 			/*
10463 			 *	Take an object reference, and record
10464 			 *	the map version information so that the
10465 			 *	map can be safely unlocked.
10466 			 */
10467 
10468 			if (dst_object == VM_OBJECT_NULL) {
10469 				/*
10470 				 * We would usually have just taken the
10471 				 * optimized path above if the destination
10472 				 * object has not been allocated yet.  But we
10473 				 * now disable that optimization if the copy
10474 				 * entry's object is not backed by anonymous
10475 				 * memory to avoid replacing malloc'ed
10476 				 * (i.e. re-usable) anonymous memory with a
10477 				 * not-so-anonymous mapping.
10478 				 * So we have to handle this case here and
10479 				 * allocate a new VM object for this map entry.
10480 				 */
10481 				dst_object = vm_object_allocate(
10482 					entry->vme_end - entry->vme_start);
10483 				dst_offset = 0;
10484 				VME_OBJECT_SET(entry, dst_object);
10485 				VME_OFFSET_SET(entry, dst_offset);
10486 				assert(entry->use_pmap);
10487 			}
10488 
10489 			vm_object_reference(dst_object);
10490 
10491 			/* account for unlock bumping up timestamp */
10492 			version.main_timestamp = dst_map->timestamp + 1;
10493 
10494 			vm_map_unlock(dst_map);
10495 
10496 			/*
10497 			 *	Copy as much as possible in one pass
10498 			 */
10499 
10500 			copy_size = size;
10501 			r = vm_fault_copy(
10502 				VME_OBJECT(copy_entry),
10503 				VME_OFFSET(copy_entry),
10504 				&copy_size,
10505 				dst_object,
10506 				dst_offset,
10507 				dst_map,
10508 				&version,
10509 				THREAD_UNINT );
10510 
10511 			/*
10512 			 *	Release the object reference
10513 			 */
10514 
10515 			vm_object_deallocate(dst_object);
10516 
10517 			/*
10518 			 *	If a hard error occurred, return it now
10519 			 */
10520 
10521 			if (r != KERN_SUCCESS) {
10522 				return r;
10523 			}
10524 
10525 			if (copy_size != 0) {
10526 				/*
10527 				 *	Dispose of the copied region
10528 				 */
10529 
10530 				vm_map_copy_clip_end(copy, copy_entry,
10531 				    copy_entry->vme_start + copy_size);
10532 				vm_map_copy_entry_unlink(copy, copy_entry);
10533 				vm_object_deallocate(VME_OBJECT(copy_entry));
10534 				vm_map_copy_entry_dispose(copy, copy_entry);
10535 			}
10536 
10537 			/*
10538 			 *	Pick up in the destination map where we left off.
10539 			 *
10540 			 *	Use the version information to avoid a lookup
10541 			 *	in the normal case.
10542 			 */
10543 
10544 			start += copy_size;
10545 			vm_map_lock(dst_map);
10546 			if (version.main_timestamp == dst_map->timestamp &&
10547 			    copy_size != 0) {
10548 				/* We can safely use saved tmp_entry value */
10549 
10550 				if (tmp_entry->map_aligned &&
10551 				    !VM_MAP_PAGE_ALIGNED(
10552 					    start,
10553 					    VM_MAP_PAGE_MASK(dst_map))) {
10554 					/* no longer map-aligned */
10555 					tmp_entry->map_aligned = FALSE;
10556 				}
10557 				vm_map_clip_end(dst_map, tmp_entry, start);
10558 				tmp_entry = tmp_entry->vme_next;
10559 			} else {
10560 				/* Must do lookup of tmp_entry */
10561 
10562 				if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
10563 					vm_map_unlock(dst_map);
10564 					return KERN_INVALID_ADDRESS;
10565 				}
10566 				if (tmp_entry->map_aligned &&
10567 				    !VM_MAP_PAGE_ALIGNED(
10568 					    start,
10569 					    VM_MAP_PAGE_MASK(dst_map))) {
10570 					/* no longer map-aligned */
10571 					tmp_entry->map_aligned = FALSE;
10572 				}
10573 				vm_map_clip_start(dst_map, tmp_entry, start);
10574 			}
10575 		}
10576 	}/* while */
10577 
10578 	return KERN_SUCCESS;
10579 }/* vm_map_copy_overwrite_aligned */
10580 
10581 /*
10582  *	Routine: vm_map_copyin_kernel_buffer [internal use only]
10583  *
10584  *	Description:
10585  *		Copy in data to a kernel buffer from space in the
10586  *		source map. The original space may be optionally
10587  *		deallocated.
10588  *
10589  *		If successful, returns a new copy object.
10590  */
10591 static kern_return_t
vm_map_copyin_kernel_buffer(vm_map_t src_map,vm_map_offset_t src_addr,vm_map_size_t len,boolean_t src_destroy,vm_map_copy_t * copy_result)10592 vm_map_copyin_kernel_buffer(
10593 	vm_map_t        src_map,
10594 	vm_map_offset_t src_addr,
10595 	vm_map_size_t   len,
10596 	boolean_t       src_destroy,
10597 	vm_map_copy_t   *copy_result)
10598 {
10599 	kern_return_t kr;
10600 	vm_map_copy_t copy;
10601 
10602 	if (len > msg_ool_size_small) {
10603 		return KERN_INVALID_ARGUMENT;
10604 	}
10605 
10606 	copy = zalloc_flags(vm_map_copy_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
10607 	copy->cpy_kdata = kalloc_data(len, Z_WAITOK);
10608 	if (copy->cpy_kdata == NULL) {
10609 		zfree(vm_map_copy_zone, copy);
10610 		return KERN_RESOURCE_SHORTAGE;
10611 	}
10612 
10613 	copy->type = VM_MAP_COPY_KERNEL_BUFFER;
10614 	copy->size = len;
10615 	copy->offset = 0;
10616 
10617 	kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
10618 	if (kr != KERN_SUCCESS) {
10619 		kfree_data(copy->cpy_kdata, len);
10620 		zfree(vm_map_copy_zone, copy);
10621 		return kr;
10622 	}
10623 	if (src_destroy) {
10624 		(void) vm_map_remove(
10625 			src_map,
10626 			vm_map_trunc_page(src_addr,
10627 			VM_MAP_PAGE_MASK(src_map)),
10628 			vm_map_round_page(src_addr + len,
10629 			VM_MAP_PAGE_MASK(src_map)),
10630 			(VM_MAP_REMOVE_INTERRUPTIBLE |
10631 			VM_MAP_REMOVE_WAIT_FOR_KWIRE |
10632 			((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : VM_MAP_REMOVE_NO_FLAGS)));
10633 	}
10634 	*copy_result = copy;
10635 	return KERN_SUCCESS;
10636 }
10637 
10638 /*
10639  *	Routine: vm_map_copyout_kernel_buffer	[internal use only]
10640  *
10641  *	Description:
10642  *		Copy out data from a kernel buffer into space in the
10643  *		destination map. The space may be otpionally dynamically
10644  *		allocated.
10645  *
10646  *		If successful, consumes the copy object.
10647  *		Otherwise, the caller is responsible for it.
10648  */
10649 static int vm_map_copyout_kernel_buffer_failures = 0;
10650 static kern_return_t
vm_map_copyout_kernel_buffer(vm_map_t map,vm_map_address_t * addr,vm_map_copy_t copy,vm_map_size_t copy_size,boolean_t overwrite,boolean_t consume_on_success)10651 vm_map_copyout_kernel_buffer(
10652 	vm_map_t                map,
10653 	vm_map_address_t        *addr,  /* IN/OUT */
10654 	vm_map_copy_t           copy,
10655 	vm_map_size_t           copy_size,
10656 	boolean_t               overwrite,
10657 	boolean_t               consume_on_success)
10658 {
10659 	kern_return_t kr = KERN_SUCCESS;
10660 	thread_t thread = current_thread();
10661 
10662 	assert(copy->size == copy_size);
10663 
10664 	/*
10665 	 * check for corrupted vm_map_copy structure
10666 	 */
10667 	if (copy_size > msg_ool_size_small || copy->offset) {
10668 		panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
10669 		    (long long)copy->size, (long long)copy->offset);
10670 	}
10671 
10672 	if (!overwrite) {
10673 		/*
10674 		 * Allocate space in the target map for the data
10675 		 */
10676 		*addr = 0;
10677 		kr = vm_map_enter(map,
10678 		    addr,
10679 		    vm_map_round_page(copy_size,
10680 		    VM_MAP_PAGE_MASK(map)),
10681 		    (vm_map_offset_t) 0,
10682 		    VM_FLAGS_ANYWHERE,
10683 		    VM_MAP_KERNEL_FLAGS_NONE,
10684 		    VM_KERN_MEMORY_NONE,
10685 		    VM_OBJECT_NULL,
10686 		    (vm_object_offset_t) 0,
10687 		    FALSE,
10688 		    VM_PROT_DEFAULT,
10689 		    VM_PROT_ALL,
10690 		    VM_INHERIT_DEFAULT);
10691 		if (kr != KERN_SUCCESS) {
10692 			return kr;
10693 		}
10694 #if KASAN
10695 		if (map->pmap == kernel_pmap) {
10696 			kasan_notify_address(*addr, copy->size);
10697 		}
10698 #endif
10699 	}
10700 
10701 	/*
10702 	 * Copyout the data from the kernel buffer to the target map.
10703 	 */
10704 	if (thread->map == map) {
10705 		/*
10706 		 * If the target map is the current map, just do
10707 		 * the copy.
10708 		 */
10709 		assert((vm_size_t)copy_size == copy_size);
10710 		if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10711 			kr = KERN_INVALID_ADDRESS;
10712 		}
10713 	} else {
10714 		vm_map_t oldmap;
10715 
10716 		/*
10717 		 * If the target map is another map, assume the
10718 		 * target's address space identity for the duration
10719 		 * of the copy.
10720 		 */
10721 		vm_map_reference(map);
10722 		oldmap = vm_map_switch(map);
10723 
10724 		assert((vm_size_t)copy_size == copy_size);
10725 		if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10726 			vm_map_copyout_kernel_buffer_failures++;
10727 			kr = KERN_INVALID_ADDRESS;
10728 		}
10729 
10730 		(void) vm_map_switch(oldmap);
10731 		vm_map_deallocate(map);
10732 	}
10733 
10734 	if (kr != KERN_SUCCESS) {
10735 		/* the copy failed, clean up */
10736 		if (!overwrite) {
10737 			/*
10738 			 * Deallocate the space we allocated in the target map.
10739 			 */
10740 			(void) vm_map_remove(
10741 				map,
10742 				vm_map_trunc_page(*addr,
10743 				VM_MAP_PAGE_MASK(map)),
10744 				vm_map_round_page((*addr +
10745 				vm_map_round_page(copy_size,
10746 				VM_MAP_PAGE_MASK(map))),
10747 				VM_MAP_PAGE_MASK(map)),
10748 				VM_MAP_REMOVE_NO_FLAGS);
10749 			*addr = 0;
10750 		}
10751 	} else {
10752 		/* copy was successful, dicard the copy structure */
10753 		if (consume_on_success) {
10754 			kfree_data(copy->cpy_kdata, copy_size);
10755 			zfree(vm_map_copy_zone, copy);
10756 		}
10757 	}
10758 
10759 	return kr;
10760 }
10761 
10762 /*
10763  *	Routine:	vm_map_copy_insert      [internal use only]
10764  *
10765  *	Description:
10766  *		Link a copy chain ("copy") into a map at the
10767  *		specified location (after "where").
10768  *	Side effects:
10769  *		The copy chain is destroyed.
10770  */
10771 static void
vm_map_copy_insert(vm_map_t map,vm_map_entry_t after_where,vm_map_copy_t copy)10772 vm_map_copy_insert(
10773 	vm_map_t        map,
10774 	vm_map_entry_t  after_where,
10775 	vm_map_copy_t   copy)
10776 {
10777 	vm_map_entry_t  entry;
10778 
10779 	while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
10780 		entry = vm_map_copy_first_entry(copy);
10781 		vm_map_copy_entry_unlink(copy, entry);
10782 		vm_map_store_entry_link(map, after_where, entry,
10783 		    VM_MAP_KERNEL_FLAGS_NONE);
10784 		after_where = entry;
10785 	}
10786 	zfree(vm_map_copy_zone, copy);
10787 }
10788 
10789 void
vm_map_copy_remap(vm_map_t map,vm_map_entry_t where,vm_map_copy_t copy,vm_map_offset_t adjustment,vm_prot_t cur_prot,vm_prot_t max_prot,vm_inherit_t inheritance)10790 vm_map_copy_remap(
10791 	vm_map_t        map,
10792 	vm_map_entry_t  where,
10793 	vm_map_copy_t   copy,
10794 	vm_map_offset_t adjustment,
10795 	vm_prot_t       cur_prot,
10796 	vm_prot_t       max_prot,
10797 	vm_inherit_t    inheritance)
10798 {
10799 	vm_map_entry_t  copy_entry, new_entry;
10800 
10801 	for (copy_entry = vm_map_copy_first_entry(copy);
10802 	    copy_entry != vm_map_copy_to_entry(copy);
10803 	    copy_entry = copy_entry->vme_next) {
10804 		/* get a new VM map entry for the map */
10805 		new_entry = vm_map_entry_create(map,
10806 		    !map->hdr.entries_pageable);
10807 		/* copy the "copy entry" to the new entry */
10808 		vm_map_entry_copy(map, new_entry, copy_entry);
10809 		/* adjust "start" and "end" */
10810 		new_entry->vme_start += adjustment;
10811 		new_entry->vme_end += adjustment;
10812 		/* clear some attributes */
10813 		new_entry->inheritance = inheritance;
10814 		new_entry->protection = cur_prot;
10815 		new_entry->max_protection = max_prot;
10816 		new_entry->behavior = VM_BEHAVIOR_DEFAULT;
10817 		/* take an extra reference on the entry's "object" */
10818 		if (new_entry->is_sub_map) {
10819 			assert(!new_entry->use_pmap); /* not nested */
10820 			vm_map_lock(VME_SUBMAP(new_entry));
10821 			vm_map_reference(VME_SUBMAP(new_entry));
10822 			vm_map_unlock(VME_SUBMAP(new_entry));
10823 		} else {
10824 			vm_object_reference(VME_OBJECT(new_entry));
10825 		}
10826 		/* insert the new entry in the map */
10827 		vm_map_store_entry_link(map, where, new_entry,
10828 		    VM_MAP_KERNEL_FLAGS_NONE);
10829 		/* continue inserting the "copy entries" after the new entry */
10830 		where = new_entry;
10831 	}
10832 }
10833 
10834 
10835 /*
10836  * Returns true if *size matches (or is in the range of) copy->size.
10837  * Upon returning true, the *size field is updated with the actual size of the
10838  * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
10839  */
10840 boolean_t
vm_map_copy_validate_size(vm_map_t dst_map,vm_map_copy_t copy,vm_map_size_t * size)10841 vm_map_copy_validate_size(
10842 	vm_map_t                dst_map,
10843 	vm_map_copy_t           copy,
10844 	vm_map_size_t           *size)
10845 {
10846 	if (copy == VM_MAP_COPY_NULL) {
10847 		return FALSE;
10848 	}
10849 	vm_map_size_t copy_sz = copy->size;
10850 	vm_map_size_t sz = *size;
10851 	switch (copy->type) {
10852 	case VM_MAP_COPY_OBJECT:
10853 	case VM_MAP_COPY_KERNEL_BUFFER:
10854 		if (sz == copy_sz) {
10855 			return TRUE;
10856 		}
10857 		break;
10858 	case VM_MAP_COPY_ENTRY_LIST:
10859 		/*
10860 		 * potential page-size rounding prevents us from exactly
10861 		 * validating this flavor of vm_map_copy, but we can at least
10862 		 * assert that it's within a range.
10863 		 */
10864 		if (copy_sz >= sz &&
10865 		    copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
10866 			*size = copy_sz;
10867 			return TRUE;
10868 		}
10869 		break;
10870 	default:
10871 		break;
10872 	}
10873 	return FALSE;
10874 }
10875 
10876 /*
10877  *	Routine:	vm_map_copyout_size
10878  *
10879  *	Description:
10880  *		Copy out a copy chain ("copy") into newly-allocated
10881  *		space in the destination map. Uses a prevalidated
10882  *		size for the copy object (vm_map_copy_validate_size).
10883  *
10884  *		If successful, consumes the copy object.
10885  *		Otherwise, the caller is responsible for it.
10886  */
10887 kern_return_t
vm_map_copyout_size(vm_map_t dst_map,vm_map_address_t * dst_addr,vm_map_copy_t copy,vm_map_size_t copy_size)10888 vm_map_copyout_size(
10889 	vm_map_t                dst_map,
10890 	vm_map_address_t        *dst_addr,      /* OUT */
10891 	vm_map_copy_t           copy,
10892 	vm_map_size_t           copy_size)
10893 {
10894 	return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
10895 	           TRUE,                     /* consume_on_success */
10896 	           VM_PROT_DEFAULT,
10897 	           VM_PROT_ALL,
10898 	           VM_INHERIT_DEFAULT);
10899 }
10900 
10901 /*
10902  *	Routine:	vm_map_copyout
10903  *
10904  *	Description:
10905  *		Copy out a copy chain ("copy") into newly-allocated
10906  *		space in the destination map.
10907  *
10908  *		If successful, consumes the copy object.
10909  *		Otherwise, the caller is responsible for it.
10910  */
10911 kern_return_t
vm_map_copyout(vm_map_t dst_map,vm_map_address_t * dst_addr,vm_map_copy_t copy)10912 vm_map_copyout(
10913 	vm_map_t                dst_map,
10914 	vm_map_address_t        *dst_addr,      /* OUT */
10915 	vm_map_copy_t           copy)
10916 {
10917 	return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
10918 	           TRUE,                     /* consume_on_success */
10919 	           VM_PROT_DEFAULT,
10920 	           VM_PROT_ALL,
10921 	           VM_INHERIT_DEFAULT);
10922 }
10923 
10924 kern_return_t
vm_map_copyout_internal(vm_map_t dst_map,vm_map_address_t * dst_addr,vm_map_copy_t copy,vm_map_size_t copy_size,boolean_t consume_on_success,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)10925 vm_map_copyout_internal(
10926 	vm_map_t                dst_map,
10927 	vm_map_address_t        *dst_addr,      /* OUT */
10928 	vm_map_copy_t           copy,
10929 	vm_map_size_t           copy_size,
10930 	boolean_t               consume_on_success,
10931 	vm_prot_t               cur_protection,
10932 	vm_prot_t               max_protection,
10933 	vm_inherit_t            inheritance)
10934 {
10935 	vm_map_size_t           size;
10936 	vm_map_size_t           adjustment;
10937 	vm_map_offset_t         start;
10938 	vm_object_offset_t      vm_copy_start;
10939 	vm_map_entry_t          last;
10940 	vm_map_entry_t          entry;
10941 	vm_map_entry_t          hole_entry;
10942 	vm_map_copy_t           original_copy;
10943 
10944 	/*
10945 	 *	Check for null copy object.
10946 	 */
10947 
10948 	if (copy == VM_MAP_COPY_NULL) {
10949 		*dst_addr = 0;
10950 		return KERN_SUCCESS;
10951 	}
10952 
10953 	/*
10954 	 * Assert that the vm_map_copy is coming from the right
10955 	 * zone and hasn't been forged
10956 	 */
10957 	vm_map_copy_require(copy);
10958 
10959 	if (copy->size != copy_size) {
10960 		*dst_addr = 0;
10961 		return KERN_FAILURE;
10962 	}
10963 
10964 	/*
10965 	 *	Check for special copy object, created
10966 	 *	by vm_map_copyin_object.
10967 	 */
10968 
10969 	if (copy->type == VM_MAP_COPY_OBJECT) {
10970 		vm_object_t             object = copy->cpy_object;
10971 		kern_return_t           kr;
10972 		vm_object_offset_t      offset;
10973 
10974 		offset = vm_object_trunc_page(copy->offset);
10975 		size = vm_map_round_page((copy_size +
10976 		    (vm_map_size_t)(copy->offset -
10977 		    offset)),
10978 		    VM_MAP_PAGE_MASK(dst_map));
10979 		*dst_addr = 0;
10980 		kr = vm_map_enter(dst_map, dst_addr, size,
10981 		    (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
10982 		    VM_MAP_KERNEL_FLAGS_NONE,
10983 		    VM_KERN_MEMORY_NONE,
10984 		    object, offset, FALSE,
10985 		    VM_PROT_DEFAULT, VM_PROT_ALL,
10986 		    VM_INHERIT_DEFAULT);
10987 		if (kr != KERN_SUCCESS) {
10988 			return kr;
10989 		}
10990 		/* Account for non-pagealigned copy object */
10991 		*dst_addr += (vm_map_offset_t)(copy->offset - offset);
10992 		if (consume_on_success) {
10993 			zfree(vm_map_copy_zone, copy);
10994 		}
10995 		return KERN_SUCCESS;
10996 	}
10997 
10998 	/*
10999 	 *	Check for special kernel buffer allocated
11000 	 *	by new_ipc_kmsg_copyin.
11001 	 */
11002 
11003 	if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
11004 		return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
11005 		           copy, copy_size, FALSE,
11006 		           consume_on_success);
11007 	}
11008 
11009 	original_copy = copy;
11010 	if (copy->cpy_hdr.page_shift != VM_MAP_PAGE_SHIFT(dst_map)) {
11011 		kern_return_t kr;
11012 		vm_map_copy_t target_copy;
11013 		vm_map_offset_t overmap_start, overmap_end, trimmed_start;
11014 
11015 		target_copy = VM_MAP_COPY_NULL;
11016 		DEBUG4K_ADJUST("adjusting...\n");
11017 		kr = vm_map_copy_adjust_to_target(
11018 			copy,
11019 			0, /* offset */
11020 			copy->size, /* size */
11021 			dst_map,
11022 			TRUE, /* copy */
11023 			&target_copy,
11024 			&overmap_start,
11025 			&overmap_end,
11026 			&trimmed_start);
11027 		if (kr != KERN_SUCCESS) {
11028 			DEBUG4K_COPY("adjust failed 0x%x\n", kr);
11029 			return kr;
11030 		}
11031 		DEBUG4K_COPY("copy %p (%d 0x%llx 0x%llx) dst_map %p (%d) target_copy %p (%d 0x%llx 0x%llx) overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx\n", copy, copy->cpy_hdr.page_shift, copy->offset, (uint64_t)copy->size, dst_map, VM_MAP_PAGE_SHIFT(dst_map), target_copy, target_copy->cpy_hdr.page_shift, target_copy->offset, (uint64_t)target_copy->size, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start);
11032 		if (target_copy != copy) {
11033 			copy = target_copy;
11034 		}
11035 		copy_size = copy->size;
11036 	}
11037 
11038 	/*
11039 	 *	Find space for the data
11040 	 */
11041 
11042 	vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
11043 	    VM_MAP_COPY_PAGE_MASK(copy));
11044 	size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
11045 	    VM_MAP_COPY_PAGE_MASK(copy))
11046 	    - vm_copy_start;
11047 
11048 
11049 StartAgain:;
11050 
11051 	vm_map_lock(dst_map);
11052 	if (dst_map->disable_vmentry_reuse == TRUE) {
11053 		VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
11054 		last = entry;
11055 	} else {
11056 		if (dst_map->holelistenabled) {
11057 			hole_entry = CAST_TO_VM_MAP_ENTRY(dst_map->holes_list);
11058 
11059 			if (hole_entry == NULL) {
11060 				/*
11061 				 * No more space in the map?
11062 				 */
11063 				vm_map_unlock(dst_map);
11064 				return KERN_NO_SPACE;
11065 			}
11066 
11067 			last = hole_entry;
11068 			start = last->vme_start;
11069 		} else {
11070 			assert(first_free_is_valid(dst_map));
11071 			start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
11072 			    vm_map_min(dst_map) : last->vme_end;
11073 		}
11074 		start = vm_map_round_page(start,
11075 		    VM_MAP_PAGE_MASK(dst_map));
11076 	}
11077 
11078 	while (TRUE) {
11079 		vm_map_entry_t  next = last->vme_next;
11080 		vm_map_offset_t end = start + size;
11081 
11082 		if ((end > dst_map->max_offset) || (end < start)) {
11083 			if (dst_map->wait_for_space) {
11084 				if (size <= (dst_map->max_offset - dst_map->min_offset)) {
11085 					assert_wait((event_t) dst_map,
11086 					    THREAD_INTERRUPTIBLE);
11087 					vm_map_unlock(dst_map);
11088 					thread_block(THREAD_CONTINUE_NULL);
11089 					goto StartAgain;
11090 				}
11091 			}
11092 			vm_map_unlock(dst_map);
11093 			return KERN_NO_SPACE;
11094 		}
11095 
11096 		if (dst_map->holelistenabled) {
11097 			if (last->vme_end >= end) {
11098 				break;
11099 			}
11100 		} else {
11101 			/*
11102 			 *	If there are no more entries, we must win.
11103 			 *
11104 			 *	OR
11105 			 *
11106 			 *	If there is another entry, it must be
11107 			 *	after the end of the potential new region.
11108 			 */
11109 
11110 			if (next == vm_map_to_entry(dst_map)) {
11111 				break;
11112 			}
11113 
11114 			if (next->vme_start >= end) {
11115 				break;
11116 			}
11117 		}
11118 
11119 		last = next;
11120 
11121 		if (dst_map->holelistenabled) {
11122 			if (last == CAST_TO_VM_MAP_ENTRY(dst_map->holes_list)) {
11123 				/*
11124 				 * Wrapped around
11125 				 */
11126 				vm_map_unlock(dst_map);
11127 				return KERN_NO_SPACE;
11128 			}
11129 			start = last->vme_start;
11130 		} else {
11131 			start = last->vme_end;
11132 		}
11133 		start = vm_map_round_page(start,
11134 		    VM_MAP_PAGE_MASK(dst_map));
11135 	}
11136 
11137 	if (dst_map->holelistenabled) {
11138 		if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
11139 			panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.", last, (unsigned long long)last->vme_start);
11140 		}
11141 	}
11142 
11143 
11144 	adjustment = start - vm_copy_start;
11145 	if (!consume_on_success) {
11146 		/*
11147 		 * We're not allowed to consume "copy", so we'll have to
11148 		 * copy its map entries into the destination map below.
11149 		 * No need to re-allocate map entries from the correct
11150 		 * (pageable or not) zone, since we'll get new map entries
11151 		 * during the transfer.
11152 		 * We'll also adjust the map entries's "start" and "end"
11153 		 * during the transfer, to keep "copy"'s entries consistent
11154 		 * with its "offset".
11155 		 */
11156 		goto after_adjustments;
11157 	}
11158 
11159 	/*
11160 	 *	Since we're going to just drop the map
11161 	 *	entries from the copy into the destination
11162 	 *	map, they must come from the same pool.
11163 	 */
11164 
11165 	if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
11166 		/*
11167 		 * Mismatches occur when dealing with the default
11168 		 * pager.
11169 		 */
11170 		vm_map_entry_t  next, new;
11171 
11172 		/*
11173 		 * Find the zone that the copies were allocated from
11174 		 */
11175 
11176 		entry = vm_map_copy_first_entry(copy);
11177 
11178 		/*
11179 		 * Reinitialize the copy so that vm_map_copy_entry_link
11180 		 * will work.
11181 		 */
11182 		vm_map_store_copy_reset(copy, entry);
11183 		copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
11184 
11185 		/*
11186 		 * Copy each entry.
11187 		 */
11188 		while (entry != vm_map_copy_to_entry(copy)) {
11189 			new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11190 			vm_map_entry_copy_full(new, entry);
11191 			new->vme_no_copy_on_read = FALSE;
11192 			assert(!new->iokit_acct);
11193 			if (new->is_sub_map) {
11194 				/* clr address space specifics */
11195 				new->use_pmap = FALSE;
11196 			}
11197 			vm_map_copy_entry_link(copy,
11198 			    vm_map_copy_last_entry(copy),
11199 			    new);
11200 			next = entry->vme_next;
11201 			_vm_map_entry_dispose(NULL, entry);
11202 			entry = next;
11203 		}
11204 	}
11205 
11206 	/*
11207 	 *	Adjust the addresses in the copy chain, and
11208 	 *	reset the region attributes.
11209 	 */
11210 
11211 	for (entry = vm_map_copy_first_entry(copy);
11212 	    entry != vm_map_copy_to_entry(copy);
11213 	    entry = entry->vme_next) {
11214 		if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
11215 			/*
11216 			 * We're injecting this copy entry into a map that
11217 			 * has the standard page alignment, so clear
11218 			 * "map_aligned" (which might have been inherited
11219 			 * from the original map entry).
11220 			 */
11221 			entry->map_aligned = FALSE;
11222 		}
11223 
11224 		entry->vme_start += adjustment;
11225 		entry->vme_end += adjustment;
11226 
11227 		if (entry->map_aligned) {
11228 			assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
11229 			    VM_MAP_PAGE_MASK(dst_map)));
11230 			assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
11231 			    VM_MAP_PAGE_MASK(dst_map)));
11232 		}
11233 
11234 		entry->inheritance = VM_INHERIT_DEFAULT;
11235 		entry->protection = VM_PROT_DEFAULT;
11236 		entry->max_protection = VM_PROT_ALL;
11237 		entry->behavior = VM_BEHAVIOR_DEFAULT;
11238 
11239 		/*
11240 		 * If the entry is now wired,
11241 		 * map the pages into the destination map.
11242 		 */
11243 		if (entry->wired_count != 0) {
11244 			vm_map_offset_t va;
11245 			vm_object_offset_t       offset;
11246 			vm_object_t object;
11247 			vm_prot_t prot;
11248 			int     type_of_fault;
11249 
11250 			/* TODO4K would need to use actual page size */
11251 			assert(VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT);
11252 
11253 			object = VME_OBJECT(entry);
11254 			offset = VME_OFFSET(entry);
11255 			va = entry->vme_start;
11256 
11257 			pmap_pageable(dst_map->pmap,
11258 			    entry->vme_start,
11259 			    entry->vme_end,
11260 			    TRUE);
11261 
11262 			while (va < entry->vme_end) {
11263 				vm_page_t       m;
11264 				struct vm_object_fault_info fault_info = {};
11265 
11266 				/*
11267 				 * Look up the page in the object.
11268 				 * Assert that the page will be found in the
11269 				 * top object:
11270 				 * either
11271 				 *	the object was newly created by
11272 				 *	vm_object_copy_slowly, and has
11273 				 *	copies of all of the pages from
11274 				 *	the source object
11275 				 * or
11276 				 *	the object was moved from the old
11277 				 *	map entry; because the old map
11278 				 *	entry was wired, all of the pages
11279 				 *	were in the top-level object.
11280 				 *	(XXX not true if we wire pages for
11281 				 *	 reading)
11282 				 */
11283 				vm_object_lock(object);
11284 
11285 				m = vm_page_lookup(object, offset);
11286 				if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
11287 				    m->vmp_absent) {
11288 					panic("vm_map_copyout: wiring %p", m);
11289 				}
11290 
11291 				prot = entry->protection;
11292 
11293 				if (override_nx(dst_map, VME_ALIAS(entry)) &&
11294 				    prot) {
11295 					prot |= VM_PROT_EXECUTE;
11296 				}
11297 
11298 				type_of_fault = DBG_CACHE_HIT_FAULT;
11299 
11300 				fault_info.user_tag = VME_ALIAS(entry);
11301 				fault_info.pmap_options = 0;
11302 				if (entry->iokit_acct ||
11303 				    (!entry->is_sub_map && !entry->use_pmap)) {
11304 					fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
11305 				}
11306 
11307 				vm_fault_enter(m,
11308 				    dst_map->pmap,
11309 				    va,
11310 				    PAGE_SIZE, 0,
11311 				    prot,
11312 				    prot,
11313 				    VM_PAGE_WIRED(m),
11314 				    FALSE,            /* change_wiring */
11315 				    VM_KERN_MEMORY_NONE,            /* tag - not wiring */
11316 				    &fault_info,
11317 				    NULL,             /* need_retry */
11318 				    &type_of_fault);
11319 
11320 				vm_object_unlock(object);
11321 
11322 				offset += PAGE_SIZE_64;
11323 				va += PAGE_SIZE;
11324 			}
11325 		}
11326 	}
11327 
11328 after_adjustments:
11329 
11330 	/*
11331 	 *	Correct the page alignment for the result
11332 	 */
11333 
11334 	*dst_addr = start + (copy->offset - vm_copy_start);
11335 
11336 #if KASAN
11337 	kasan_notify_address(*dst_addr, size);
11338 #endif
11339 
11340 	/*
11341 	 *	Update the hints and the map size
11342 	 */
11343 
11344 	if (consume_on_success) {
11345 		SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
11346 	} else {
11347 		SAVE_HINT_MAP_WRITE(dst_map, last);
11348 	}
11349 
11350 	dst_map->size += size;
11351 
11352 	/*
11353 	 *	Link in the copy
11354 	 */
11355 
11356 	if (consume_on_success) {
11357 		vm_map_copy_insert(dst_map, last, copy);
11358 		if (copy != original_copy) {
11359 			vm_map_copy_discard(original_copy);
11360 			original_copy = VM_MAP_COPY_NULL;
11361 		}
11362 	} else {
11363 		vm_map_copy_remap(dst_map, last, copy, adjustment,
11364 		    cur_protection, max_protection,
11365 		    inheritance);
11366 		if (copy != original_copy && original_copy != VM_MAP_COPY_NULL) {
11367 			vm_map_copy_discard(copy);
11368 			copy = original_copy;
11369 		}
11370 	}
11371 
11372 
11373 	vm_map_unlock(dst_map);
11374 
11375 	/*
11376 	 * XXX	If wiring_required, call vm_map_pageable
11377 	 */
11378 
11379 	return KERN_SUCCESS;
11380 }
11381 
11382 /*
11383  *	Routine:	vm_map_copyin
11384  *
11385  *	Description:
11386  *		see vm_map_copyin_common.  Exported via Unsupported.exports.
11387  *
11388  */
11389 
11390 #undef vm_map_copyin
11391 
11392 kern_return_t
vm_map_copyin(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,boolean_t src_destroy,vm_map_copy_t * copy_result)11393 vm_map_copyin(
11394 	vm_map_t                        src_map,
11395 	vm_map_address_t        src_addr,
11396 	vm_map_size_t           len,
11397 	boolean_t                       src_destroy,
11398 	vm_map_copy_t           *copy_result)   /* OUT */
11399 {
11400 	return vm_map_copyin_common(src_map, src_addr, len, src_destroy,
11401 	           FALSE, copy_result, FALSE);
11402 }
11403 
11404 /*
11405  *	Routine:	vm_map_copyin_common
11406  *
11407  *	Description:
11408  *		Copy the specified region (src_addr, len) from the
11409  *		source address space (src_map), possibly removing
11410  *		the region from the source address space (src_destroy).
11411  *
11412  *	Returns:
11413  *		A vm_map_copy_t object (copy_result), suitable for
11414  *		insertion into another address space (using vm_map_copyout),
11415  *		copying over another address space region (using
11416  *		vm_map_copy_overwrite).  If the copy is unused, it
11417  *		should be destroyed (using vm_map_copy_discard).
11418  *
11419  *	In/out conditions:
11420  *		The source map should not be locked on entry.
11421  */
11422 
11423 typedef struct submap_map {
11424 	vm_map_t        parent_map;
11425 	vm_map_offset_t base_start;
11426 	vm_map_offset_t base_end;
11427 	vm_map_size_t   base_len;
11428 	struct submap_map *next;
11429 } submap_map_t;
11430 
11431 kern_return_t
vm_map_copyin_common(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,boolean_t src_destroy,__unused boolean_t src_volatile,vm_map_copy_t * copy_result,boolean_t use_maxprot)11432 vm_map_copyin_common(
11433 	vm_map_t        src_map,
11434 	vm_map_address_t src_addr,
11435 	vm_map_size_t   len,
11436 	boolean_t       src_destroy,
11437 	__unused boolean_t      src_volatile,
11438 	vm_map_copy_t   *copy_result,   /* OUT */
11439 	boolean_t       use_maxprot)
11440 {
11441 	int flags;
11442 
11443 	flags = 0;
11444 	if (src_destroy) {
11445 		flags |= VM_MAP_COPYIN_SRC_DESTROY;
11446 	}
11447 	if (use_maxprot) {
11448 		flags |= VM_MAP_COPYIN_USE_MAXPROT;
11449 	}
11450 	return vm_map_copyin_internal(src_map,
11451 	           src_addr,
11452 	           len,
11453 	           flags,
11454 	           copy_result);
11455 }
11456 kern_return_t
vm_map_copyin_internal(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,int flags,vm_map_copy_t * copy_result)11457 vm_map_copyin_internal(
11458 	vm_map_t        src_map,
11459 	vm_map_address_t src_addr,
11460 	vm_map_size_t   len,
11461 	int             flags,
11462 	vm_map_copy_t   *copy_result)   /* OUT */
11463 {
11464 	vm_map_entry_t  tmp_entry;      /* Result of last map lookup --
11465 	                                 * in multi-level lookup, this
11466 	                                 * entry contains the actual
11467 	                                 * vm_object/offset.
11468 	                                 */
11469 	vm_map_entry_t  new_entry = VM_MAP_ENTRY_NULL;  /* Map entry for copy */
11470 
11471 	vm_map_offset_t src_start;      /* Start of current entry --
11472 	                                 * where copy is taking place now
11473 	                                 */
11474 	vm_map_offset_t src_end;        /* End of entire region to be
11475 	                                 * copied */
11476 	vm_map_offset_t src_base;
11477 	vm_map_t        base_map = src_map;
11478 	boolean_t       map_share = FALSE;
11479 	submap_map_t    *parent_maps = NULL;
11480 
11481 	vm_map_copy_t   copy;           /* Resulting copy */
11482 	vm_map_address_t copy_addr;
11483 	vm_map_size_t   copy_size;
11484 	boolean_t       src_destroy;
11485 	boolean_t       use_maxprot;
11486 	boolean_t       preserve_purgeable;
11487 	boolean_t       entry_was_shared;
11488 	vm_map_entry_t  saved_src_entry;
11489 
11490 	if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
11491 		return KERN_INVALID_ARGUMENT;
11492 	}
11493 
11494 	src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
11495 	use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
11496 	preserve_purgeable =
11497 	    (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
11498 
11499 	/*
11500 	 *	Check for copies of zero bytes.
11501 	 */
11502 
11503 	if (len == 0) {
11504 		*copy_result = VM_MAP_COPY_NULL;
11505 		return KERN_SUCCESS;
11506 	}
11507 
11508 	/*
11509 	 *	Check that the end address doesn't overflow
11510 	 */
11511 	src_end = src_addr + len;
11512 	if (src_end < src_addr) {
11513 		return KERN_INVALID_ADDRESS;
11514 	}
11515 
11516 	/*
11517 	 *	Compute (page aligned) start and end of region
11518 	 */
11519 	src_start = vm_map_trunc_page(src_addr,
11520 	    VM_MAP_PAGE_MASK(src_map));
11521 	src_end = vm_map_round_page(src_end,
11522 	    VM_MAP_PAGE_MASK(src_map));
11523 
11524 	/*
11525 	 * If the copy is sufficiently small, use a kernel buffer instead
11526 	 * of making a virtual copy.  The theory being that the cost of
11527 	 * setting up VM (and taking C-O-W faults) dominates the copy costs
11528 	 * for small regions.
11529 	 */
11530 	if ((len < msg_ool_size_small) &&
11531 	    !use_maxprot &&
11532 	    !preserve_purgeable &&
11533 	    !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
11534 	    /*
11535 	     * Since the "msg_ool_size_small" threshold was increased and
11536 	     * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
11537 	     * address space limits, we revert to doing a virtual copy if the
11538 	     * copied range goes beyond those limits.  Otherwise, mach_vm_read()
11539 	     * of the commpage would now fail when it used to work.
11540 	     */
11541 	    (src_start >= vm_map_min(src_map) &&
11542 	    src_start < vm_map_max(src_map) &&
11543 	    src_end >= vm_map_min(src_map) &&
11544 	    src_end < vm_map_max(src_map))) {
11545 		return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
11546 		           src_destroy, copy_result);
11547 	}
11548 
11549 	/*
11550 	 *	Allocate a header element for the list.
11551 	 *
11552 	 *	Use the start and end in the header to
11553 	 *	remember the endpoints prior to rounding.
11554 	 */
11555 
11556 	copy = vm_map_copy_allocate();
11557 	copy->type = VM_MAP_COPY_ENTRY_LIST;
11558 	copy->cpy_hdr.entries_pageable = TRUE;
11559 	copy->cpy_hdr.page_shift = (uint16_t)VM_MAP_PAGE_SHIFT(src_map);
11560 
11561 	vm_map_store_init( &(copy->cpy_hdr));
11562 
11563 	copy->offset = src_addr;
11564 	copy->size = len;
11565 
11566 	new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11567 
11568 #define RETURN(x)                                               \
11569 	MACRO_BEGIN                                             \
11570 	vm_map_unlock(src_map);                                 \
11571 	if(src_map != base_map)                                 \
11572 	        vm_map_deallocate(src_map);                     \
11573 	if (new_entry != VM_MAP_ENTRY_NULL)                     \
11574 	        vm_map_copy_entry_dispose(copy,new_entry);      \
11575 	vm_map_copy_discard(copy);                              \
11576 	{                                                       \
11577 	        submap_map_t	*_ptr;                          \
11578                                                                 \
11579 	        for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11580 	                parent_maps=parent_maps->next;          \
11581 	                if (_ptr->parent_map != base_map)       \
11582 	                        vm_map_deallocate(_ptr->parent_map);    \
11583 	                kfree_type(submap_map_t, _ptr);         \
11584 	        }                                               \
11585 	}                                                       \
11586 	MACRO_RETURN(x);                                        \
11587 	MACRO_END
11588 
11589 	/*
11590 	 *	Find the beginning of the region.
11591 	 */
11592 
11593 	vm_map_lock(src_map);
11594 
11595 	/*
11596 	 * Lookup the original "src_addr" rather than the truncated
11597 	 * "src_start", in case "src_start" falls in a non-map-aligned
11598 	 * map entry *before* the map entry that contains "src_addr"...
11599 	 */
11600 	if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry)) {
11601 		RETURN(KERN_INVALID_ADDRESS);
11602 	}
11603 	if (!tmp_entry->is_sub_map) {
11604 		/*
11605 		 * ... but clip to the map-rounded "src_start" rather than
11606 		 * "src_addr" to preserve map-alignment.  We'll adjust the
11607 		 * first copy entry at the end, if needed.
11608 		 */
11609 		vm_map_clip_start(src_map, tmp_entry, src_start);
11610 	}
11611 	if (src_start < tmp_entry->vme_start) {
11612 		/*
11613 		 * Move "src_start" up to the start of the
11614 		 * first map entry to copy.
11615 		 */
11616 		src_start = tmp_entry->vme_start;
11617 	}
11618 	/* set for later submap fix-up */
11619 	copy_addr = src_start;
11620 
11621 	/*
11622 	 *	Go through entries until we get to the end.
11623 	 */
11624 
11625 	while (TRUE) {
11626 		vm_map_entry_t  src_entry = tmp_entry;  /* Top-level entry */
11627 		vm_map_size_t   src_size;               /* Size of source
11628 		                                         * map entry (in both
11629 		                                         * maps)
11630 		                                         */
11631 
11632 		vm_object_t             src_object;     /* Object to copy */
11633 		vm_object_offset_t      src_offset;
11634 
11635 		vm_object_t             new_copy_object;/* vm_object_copy_* result */
11636 
11637 		boolean_t       src_needs_copy;         /* Should source map
11638 		                                         * be made read-only
11639 		                                         * for copy-on-write?
11640 		                                         */
11641 
11642 		boolean_t       new_entry_needs_copy;   /* Will new entry be COW? */
11643 
11644 		boolean_t       was_wired;              /* Was source wired? */
11645 		boolean_t       saved_used_for_jit;     /* Saved used_for_jit. */
11646 		vm_map_version_t version;               /* Version before locks
11647 		                                         * dropped to make copy
11648 		                                         */
11649 		kern_return_t   result;                 /* Return value from
11650 		                                         * copy_strategically.
11651 		                                         */
11652 		while (tmp_entry->is_sub_map) {
11653 			vm_map_size_t submap_len;
11654 			submap_map_t *ptr;
11655 
11656 			ptr = kalloc_type(submap_map_t, Z_WAITOK);
11657 			ptr->next = parent_maps;
11658 			parent_maps = ptr;
11659 			ptr->parent_map = src_map;
11660 			ptr->base_start = src_start;
11661 			ptr->base_end = src_end;
11662 			submap_len = tmp_entry->vme_end - src_start;
11663 			if (submap_len > (src_end - src_start)) {
11664 				submap_len = src_end - src_start;
11665 			}
11666 			ptr->base_len = submap_len;
11667 
11668 			src_start -= tmp_entry->vme_start;
11669 			src_start += VME_OFFSET(tmp_entry);
11670 			src_end = src_start + submap_len;
11671 			src_map = VME_SUBMAP(tmp_entry);
11672 			vm_map_lock(src_map);
11673 			/* keep an outstanding reference for all maps in */
11674 			/* the parents tree except the base map */
11675 			vm_map_reference(src_map);
11676 			vm_map_unlock(ptr->parent_map);
11677 			if (!vm_map_lookup_entry(
11678 				    src_map, src_start, &tmp_entry)) {
11679 				RETURN(KERN_INVALID_ADDRESS);
11680 			}
11681 			map_share = TRUE;
11682 			if (!tmp_entry->is_sub_map) {
11683 				vm_map_clip_start(src_map, tmp_entry, src_start);
11684 			}
11685 			src_entry = tmp_entry;
11686 		}
11687 		/* we are now in the lowest level submap... */
11688 
11689 		if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
11690 		    (VME_OBJECT(tmp_entry)->phys_contiguous)) {
11691 			/* This is not, supported for now.In future */
11692 			/* we will need to detect the phys_contig   */
11693 			/* condition and then upgrade copy_slowly   */
11694 			/* to do physical copy from the device mem  */
11695 			/* based object. We can piggy-back off of   */
11696 			/* the was wired boolean to set-up the      */
11697 			/* proper handling */
11698 			RETURN(KERN_PROTECTION_FAILURE);
11699 		}
11700 		/*
11701 		 *	Create a new address map entry to hold the result.
11702 		 *	Fill in the fields from the appropriate source entries.
11703 		 *	We must unlock the source map to do this if we need
11704 		 *	to allocate a map entry.
11705 		 */
11706 		if (new_entry == VM_MAP_ENTRY_NULL) {
11707 			version.main_timestamp = src_map->timestamp;
11708 			vm_map_unlock(src_map);
11709 
11710 			new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11711 
11712 			vm_map_lock(src_map);
11713 			if ((version.main_timestamp + 1) != src_map->timestamp) {
11714 				if (!vm_map_lookup_entry(src_map, src_start,
11715 				    &tmp_entry)) {
11716 					RETURN(KERN_INVALID_ADDRESS);
11717 				}
11718 				if (!tmp_entry->is_sub_map) {
11719 					vm_map_clip_start(src_map, tmp_entry, src_start);
11720 				}
11721 				continue; /* restart w/ new tmp_entry */
11722 			}
11723 		}
11724 
11725 		/*
11726 		 *	Verify that the region can be read.
11727 		 */
11728 		if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
11729 		    !use_maxprot) ||
11730 		    (src_entry->max_protection & VM_PROT_READ) == 0) {
11731 			RETURN(KERN_PROTECTION_FAILURE);
11732 		}
11733 
11734 		/*
11735 		 *	Clip against the endpoints of the entire region.
11736 		 */
11737 
11738 		vm_map_clip_end(src_map, src_entry, src_end);
11739 
11740 		src_size = src_entry->vme_end - src_start;
11741 		src_object = VME_OBJECT(src_entry);
11742 		src_offset = VME_OFFSET(src_entry);
11743 		was_wired = (src_entry->wired_count != 0);
11744 
11745 		vm_map_entry_copy(src_map, new_entry, src_entry);
11746 		if (new_entry->is_sub_map) {
11747 			/* clr address space specifics */
11748 			new_entry->use_pmap = FALSE;
11749 		} else {
11750 			/*
11751 			 * We're dealing with a copy-on-write operation,
11752 			 * so the resulting mapping should not inherit the
11753 			 * original mapping's accounting settings.
11754 			 * "iokit_acct" should have been cleared in
11755 			 * vm_map_entry_copy().
11756 			 * "use_pmap" should be reset to its default (TRUE)
11757 			 * so that the new mapping gets accounted for in
11758 			 * the task's memory footprint.
11759 			 */
11760 			assert(!new_entry->iokit_acct);
11761 			new_entry->use_pmap = TRUE;
11762 		}
11763 
11764 		/*
11765 		 *	Attempt non-blocking copy-on-write optimizations.
11766 		 */
11767 
11768 		/*
11769 		 * If we are destroying the source, and the object
11770 		 * is internal, we could move the object reference
11771 		 * from the source to the copy.  The copy is
11772 		 * copy-on-write only if the source is.
11773 		 * We make another reference to the object, because
11774 		 * destroying the source entry will deallocate it.
11775 		 *
11776 		 * This memory transfer has to be atomic, (to prevent
11777 		 * the VM object from being shared or copied while
11778 		 * it's being moved here), so we could only do this
11779 		 * if we won't have to unlock the VM map until the
11780 		 * original mapping has been fully removed.
11781 		 */
11782 
11783 RestartCopy:
11784 		if ((src_object == VM_OBJECT_NULL ||
11785 		    (!was_wired && !map_share && !tmp_entry->is_shared
11786 		    && !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT))) &&
11787 		    vm_object_copy_quickly(
11788 			    VME_OBJECT(new_entry),
11789 			    src_offset,
11790 			    src_size,
11791 			    &src_needs_copy,
11792 			    &new_entry_needs_copy)) {
11793 			new_entry->needs_copy = new_entry_needs_copy;
11794 
11795 			/*
11796 			 *	Handle copy-on-write obligations
11797 			 */
11798 
11799 			if (src_needs_copy && !tmp_entry->needs_copy) {
11800 				vm_prot_t prot;
11801 
11802 				prot = src_entry->protection & ~VM_PROT_WRITE;
11803 
11804 				if (override_nx(src_map, VME_ALIAS(src_entry))
11805 				    && prot) {
11806 					prot |= VM_PROT_EXECUTE;
11807 				}
11808 
11809 				vm_object_pmap_protect(
11810 					src_object,
11811 					src_offset,
11812 					src_size,
11813 					(src_entry->is_shared ?
11814 					PMAP_NULL
11815 					: src_map->pmap),
11816 					VM_MAP_PAGE_SIZE(src_map),
11817 					src_entry->vme_start,
11818 					prot);
11819 
11820 				assert(tmp_entry->wired_count == 0);
11821 				tmp_entry->needs_copy = TRUE;
11822 			}
11823 
11824 			/*
11825 			 *	The map has never been unlocked, so it's safe
11826 			 *	to move to the next entry rather than doing
11827 			 *	another lookup.
11828 			 */
11829 
11830 			goto CopySuccessful;
11831 		}
11832 
11833 		entry_was_shared = tmp_entry->is_shared;
11834 
11835 		/*
11836 		 *	Take an object reference, so that we may
11837 		 *	release the map lock(s).
11838 		 */
11839 
11840 		assert(src_object != VM_OBJECT_NULL);
11841 		vm_object_reference(src_object);
11842 
11843 		/*
11844 		 *	Record the timestamp for later verification.
11845 		 *	Unlock the map.
11846 		 */
11847 
11848 		version.main_timestamp = src_map->timestamp;
11849 		vm_map_unlock(src_map); /* Increments timestamp once! */
11850 		saved_src_entry = src_entry;
11851 		tmp_entry = VM_MAP_ENTRY_NULL;
11852 		src_entry = VM_MAP_ENTRY_NULL;
11853 
11854 		/*
11855 		 *	Perform the copy
11856 		 */
11857 
11858 		if (was_wired ||
11859 		    (debug4k_no_cow_copyin &&
11860 		    VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT)) {
11861 CopySlowly:
11862 			vm_object_lock(src_object);
11863 			result = vm_object_copy_slowly(
11864 				src_object,
11865 				src_offset,
11866 				src_size,
11867 				THREAD_UNINT,
11868 				&new_copy_object);
11869 			/* VME_OBJECT_SET will reset used_for_jit, so preserve it. */
11870 			saved_used_for_jit = new_entry->used_for_jit;
11871 			VME_OBJECT_SET(new_entry, new_copy_object);
11872 			new_entry->used_for_jit = saved_used_for_jit;
11873 			VME_OFFSET_SET(new_entry,
11874 			    src_offset - vm_object_trunc_page(src_offset));
11875 			new_entry->needs_copy = FALSE;
11876 		} else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11877 		    (entry_was_shared || map_share)) {
11878 			vm_object_t new_object;
11879 
11880 			vm_object_lock_shared(src_object);
11881 			new_object = vm_object_copy_delayed(
11882 				src_object,
11883 				src_offset,
11884 				src_size,
11885 				TRUE);
11886 			if (new_object == VM_OBJECT_NULL) {
11887 				goto CopySlowly;
11888 			}
11889 
11890 			VME_OBJECT_SET(new_entry, new_object);
11891 			assert(new_entry->wired_count == 0);
11892 			new_entry->needs_copy = TRUE;
11893 			assert(!new_entry->iokit_acct);
11894 			assert(new_object->purgable == VM_PURGABLE_DENY);
11895 			assertf(new_entry->use_pmap, "src_map %p new_entry %p\n", src_map, new_entry);
11896 			result = KERN_SUCCESS;
11897 		} else {
11898 			vm_object_offset_t new_offset;
11899 			new_offset = VME_OFFSET(new_entry);
11900 			result = vm_object_copy_strategically(src_object,
11901 			    src_offset,
11902 			    src_size,
11903 			    &new_copy_object,
11904 			    &new_offset,
11905 			    &new_entry_needs_copy);
11906 			/* VME_OBJECT_SET will reset used_for_jit, so preserve it. */
11907 			saved_used_for_jit = new_entry->used_for_jit;
11908 			VME_OBJECT_SET(new_entry, new_copy_object);
11909 			new_entry->used_for_jit = saved_used_for_jit;
11910 			if (new_offset != VME_OFFSET(new_entry)) {
11911 				VME_OFFSET_SET(new_entry, new_offset);
11912 			}
11913 
11914 			new_entry->needs_copy = new_entry_needs_copy;
11915 		}
11916 
11917 		if (result == KERN_SUCCESS &&
11918 		    ((preserve_purgeable &&
11919 		    src_object->purgable != VM_PURGABLE_DENY) ||
11920 		    new_entry->used_for_jit)) {
11921 			/*
11922 			 * Purgeable objects should be COPY_NONE, true share;
11923 			 * this should be propogated to the copy.
11924 			 *
11925 			 * Also force mappings the pmap specially protects to
11926 			 * be COPY_NONE; trying to COW these mappings would
11927 			 * change the effective protections, which could have
11928 			 * side effects if the pmap layer relies on the
11929 			 * specified protections.
11930 			 */
11931 
11932 			vm_object_t     new_object;
11933 
11934 			new_object = VME_OBJECT(new_entry);
11935 			assert(new_object != src_object);
11936 			vm_object_lock(new_object);
11937 			assert(new_object->ref_count == 1);
11938 			assert(new_object->shadow == VM_OBJECT_NULL);
11939 			assert(new_object->copy == VM_OBJECT_NULL);
11940 			assert(new_object->vo_owner == NULL);
11941 
11942 			new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
11943 
11944 			if (preserve_purgeable &&
11945 			    src_object->purgable != VM_PURGABLE_DENY) {
11946 				new_object->true_share = TRUE;
11947 
11948 				/* start as non-volatile with no owner... */
11949 				new_object->purgable = VM_PURGABLE_NONVOLATILE;
11950 				vm_purgeable_nonvolatile_enqueue(new_object, NULL);
11951 				/* ... and move to src_object's purgeable state */
11952 				if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
11953 					int state;
11954 					state = src_object->purgable;
11955 					vm_object_purgable_control(
11956 						new_object,
11957 						VM_PURGABLE_SET_STATE_FROM_KERNEL,
11958 						&state);
11959 				}
11960 				/* no pmap accounting for purgeable objects */
11961 				new_entry->use_pmap = FALSE;
11962 			}
11963 
11964 			vm_object_unlock(new_object);
11965 			new_object = VM_OBJECT_NULL;
11966 		}
11967 
11968 		if (result != KERN_SUCCESS &&
11969 		    result != KERN_MEMORY_RESTART_COPY) {
11970 			vm_map_lock(src_map);
11971 			RETURN(result);
11972 		}
11973 
11974 		/*
11975 		 *	Throw away the extra reference
11976 		 */
11977 
11978 		vm_object_deallocate(src_object);
11979 
11980 		/*
11981 		 *	Verify that the map has not substantially
11982 		 *	changed while the copy was being made.
11983 		 */
11984 
11985 		vm_map_lock(src_map);
11986 
11987 		if ((version.main_timestamp + 1) == src_map->timestamp) {
11988 			/* src_map hasn't changed: src_entry is still valid */
11989 			src_entry = saved_src_entry;
11990 			goto VerificationSuccessful;
11991 		}
11992 
11993 		/*
11994 		 *	Simple version comparison failed.
11995 		 *
11996 		 *	Retry the lookup and verify that the
11997 		 *	same object/offset are still present.
11998 		 *
11999 		 *	[Note: a memory manager that colludes with
12000 		 *	the calling task can detect that we have
12001 		 *	cheated.  While the map was unlocked, the
12002 		 *	mapping could have been changed and restored.]
12003 		 */
12004 
12005 		if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
12006 			if (result != KERN_MEMORY_RESTART_COPY) {
12007 				vm_object_deallocate(VME_OBJECT(new_entry));
12008 				VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
12009 				/* reset accounting state */
12010 				new_entry->iokit_acct = FALSE;
12011 				new_entry->use_pmap = TRUE;
12012 			}
12013 			RETURN(KERN_INVALID_ADDRESS);
12014 		}
12015 
12016 		src_entry = tmp_entry;
12017 		vm_map_clip_start(src_map, src_entry, src_start);
12018 
12019 		if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
12020 		    !use_maxprot) ||
12021 		    ((src_entry->max_protection & VM_PROT_READ) == 0)) {
12022 			goto VerificationFailed;
12023 		}
12024 
12025 		if (src_entry->vme_end < new_entry->vme_end) {
12026 			/*
12027 			 * This entry might have been shortened
12028 			 * (vm_map_clip_end) or been replaced with
12029 			 * an entry that ends closer to "src_start"
12030 			 * than before.
12031 			 * Adjust "new_entry" accordingly; copying
12032 			 * less memory would be correct but we also
12033 			 * redo the copy (see below) if the new entry
12034 			 * no longer points at the same object/offset.
12035 			 */
12036 			assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
12037 			    VM_MAP_COPY_PAGE_MASK(copy)));
12038 			new_entry->vme_end = src_entry->vme_end;
12039 			src_size = new_entry->vme_end - src_start;
12040 		} else if (src_entry->vme_end > new_entry->vme_end) {
12041 			/*
12042 			 * This entry might have been extended
12043 			 * (vm_map_entry_simplify() or coalesce)
12044 			 * or been replaced with an entry that ends farther
12045 			 * from "src_start" than before.
12046 			 *
12047 			 * We've called vm_object_copy_*() only on
12048 			 * the previous <start:end> range, so we can't
12049 			 * just extend new_entry.  We have to re-do
12050 			 * the copy based on the new entry as if it was
12051 			 * pointing at a different object/offset (see
12052 			 * "Verification failed" below).
12053 			 */
12054 		}
12055 
12056 		if ((VME_OBJECT(src_entry) != src_object) ||
12057 		    (VME_OFFSET(src_entry) != src_offset) ||
12058 		    (src_entry->vme_end > new_entry->vme_end)) {
12059 			/*
12060 			 *	Verification failed.
12061 			 *
12062 			 *	Start over with this top-level entry.
12063 			 */
12064 
12065 VerificationFailed:     ;
12066 
12067 			vm_object_deallocate(VME_OBJECT(new_entry));
12068 			tmp_entry = src_entry;
12069 			continue;
12070 		}
12071 
12072 		/*
12073 		 *	Verification succeeded.
12074 		 */
12075 
12076 VerificationSuccessful:;
12077 
12078 		if (result == KERN_MEMORY_RESTART_COPY) {
12079 			goto RestartCopy;
12080 		}
12081 
12082 		/*
12083 		 *	Copy succeeded.
12084 		 */
12085 
12086 CopySuccessful: ;
12087 
12088 		/*
12089 		 *	Link in the new copy entry.
12090 		 */
12091 
12092 		vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
12093 		    new_entry);
12094 
12095 		/*
12096 		 *	Determine whether the entire region
12097 		 *	has been copied.
12098 		 */
12099 		src_base = src_start;
12100 		src_start = new_entry->vme_end;
12101 		new_entry = VM_MAP_ENTRY_NULL;
12102 		while ((src_start >= src_end) && (src_end != 0)) {
12103 			submap_map_t    *ptr;
12104 
12105 			if (src_map == base_map) {
12106 				/* back to the top */
12107 				break;
12108 			}
12109 
12110 			ptr = parent_maps;
12111 			assert(ptr != NULL);
12112 			parent_maps = parent_maps->next;
12113 
12114 			/* fix up the damage we did in that submap */
12115 			vm_map_simplify_range(src_map,
12116 			    src_base,
12117 			    src_end);
12118 
12119 			vm_map_unlock(src_map);
12120 			vm_map_deallocate(src_map);
12121 			vm_map_lock(ptr->parent_map);
12122 			src_map = ptr->parent_map;
12123 			src_base = ptr->base_start;
12124 			src_start = ptr->base_start + ptr->base_len;
12125 			src_end = ptr->base_end;
12126 			if (!vm_map_lookup_entry(src_map,
12127 			    src_start,
12128 			    &tmp_entry) &&
12129 			    (src_end > src_start)) {
12130 				RETURN(KERN_INVALID_ADDRESS);
12131 			}
12132 			kfree_type(submap_map_t, ptr);
12133 			if (parent_maps == NULL) {
12134 				map_share = FALSE;
12135 			}
12136 			src_entry = tmp_entry->vme_prev;
12137 		}
12138 
12139 		if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
12140 		    (src_start >= src_addr + len) &&
12141 		    (src_addr + len != 0)) {
12142 			/*
12143 			 * Stop copying now, even though we haven't reached
12144 			 * "src_end".  We'll adjust the end of the last copy
12145 			 * entry at the end, if needed.
12146 			 *
12147 			 * If src_map's aligment is different from the
12148 			 * system's page-alignment, there could be
12149 			 * extra non-map-aligned map entries between
12150 			 * the original (non-rounded) "src_addr + len"
12151 			 * and the rounded "src_end".
12152 			 * We do not want to copy those map entries since
12153 			 * they're not part of the copied range.
12154 			 */
12155 			break;
12156 		}
12157 
12158 		if ((src_start >= src_end) && (src_end != 0)) {
12159 			break;
12160 		}
12161 
12162 		/*
12163 		 *	Verify that there are no gaps in the region
12164 		 */
12165 
12166 		tmp_entry = src_entry->vme_next;
12167 		if ((tmp_entry->vme_start != src_start) ||
12168 		    (tmp_entry == vm_map_to_entry(src_map))) {
12169 			RETURN(KERN_INVALID_ADDRESS);
12170 		}
12171 	}
12172 
12173 	/*
12174 	 * If the source should be destroyed, do it now, since the
12175 	 * copy was successful.
12176 	 */
12177 	if (src_destroy) {
12178 		(void) vm_map_delete(
12179 			src_map,
12180 			vm_map_trunc_page(src_addr,
12181 			VM_MAP_PAGE_MASK(src_map)),
12182 			src_end,
12183 			((src_map == kernel_map) ?
12184 			VM_MAP_REMOVE_KUNWIRE :
12185 			VM_MAP_REMOVE_NO_FLAGS),
12186 			VM_MAP_NULL);
12187 	} else {
12188 		/* fix up the damage we did in the base map */
12189 		vm_map_simplify_range(
12190 			src_map,
12191 			vm_map_trunc_page(src_addr,
12192 			VM_MAP_PAGE_MASK(src_map)),
12193 			vm_map_round_page(src_end,
12194 			VM_MAP_PAGE_MASK(src_map)));
12195 	}
12196 
12197 	vm_map_unlock(src_map);
12198 	tmp_entry = VM_MAP_ENTRY_NULL;
12199 
12200 	if (VM_MAP_PAGE_SHIFT(src_map) > PAGE_SHIFT &&
12201 	    VM_MAP_PAGE_SHIFT(src_map) != VM_MAP_COPY_PAGE_SHIFT(copy)) {
12202 		vm_map_offset_t original_start, original_offset, original_end;
12203 
12204 		assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
12205 
12206 		/* adjust alignment of first copy_entry's "vme_start" */
12207 		tmp_entry = vm_map_copy_first_entry(copy);
12208 		if (tmp_entry != vm_map_copy_to_entry(copy)) {
12209 			vm_map_offset_t adjustment;
12210 
12211 			original_start = tmp_entry->vme_start;
12212 			original_offset = VME_OFFSET(tmp_entry);
12213 
12214 			/* map-align the start of the first copy entry... */
12215 			adjustment = (tmp_entry->vme_start -
12216 			    vm_map_trunc_page(
12217 				    tmp_entry->vme_start,
12218 				    VM_MAP_PAGE_MASK(src_map)));
12219 			tmp_entry->vme_start -= adjustment;
12220 			VME_OFFSET_SET(tmp_entry,
12221 			    VME_OFFSET(tmp_entry) - adjustment);
12222 			copy_addr -= adjustment;
12223 			assert(tmp_entry->vme_start < tmp_entry->vme_end);
12224 			/* ... adjust for mis-aligned start of copy range */
12225 			adjustment =
12226 			    (vm_map_trunc_page(copy->offset,
12227 			    PAGE_MASK) -
12228 			    vm_map_trunc_page(copy->offset,
12229 			    VM_MAP_PAGE_MASK(src_map)));
12230 			if (adjustment) {
12231 				assert(page_aligned(adjustment));
12232 				assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
12233 				tmp_entry->vme_start += adjustment;
12234 				VME_OFFSET_SET(tmp_entry,
12235 				    (VME_OFFSET(tmp_entry) +
12236 				    adjustment));
12237 				copy_addr += adjustment;
12238 				assert(tmp_entry->vme_start < tmp_entry->vme_end);
12239 			}
12240 
12241 			/*
12242 			 * Assert that the adjustments haven't exposed
12243 			 * more than was originally copied...
12244 			 */
12245 			assert(tmp_entry->vme_start >= original_start);
12246 			assert(VME_OFFSET(tmp_entry) >= original_offset);
12247 			/*
12248 			 * ... and that it did not adjust outside of a
12249 			 * a single 16K page.
12250 			 */
12251 			assert(vm_map_trunc_page(tmp_entry->vme_start,
12252 			    VM_MAP_PAGE_MASK(src_map)) ==
12253 			    vm_map_trunc_page(original_start,
12254 			    VM_MAP_PAGE_MASK(src_map)));
12255 		}
12256 
12257 		/* adjust alignment of last copy_entry's "vme_end" */
12258 		tmp_entry = vm_map_copy_last_entry(copy);
12259 		if (tmp_entry != vm_map_copy_to_entry(copy)) {
12260 			vm_map_offset_t adjustment;
12261 
12262 			original_end = tmp_entry->vme_end;
12263 
12264 			/* map-align the end of the last copy entry... */
12265 			tmp_entry->vme_end =
12266 			    vm_map_round_page(tmp_entry->vme_end,
12267 			    VM_MAP_PAGE_MASK(src_map));
12268 			/* ... adjust for mis-aligned end of copy range */
12269 			adjustment =
12270 			    (vm_map_round_page((copy->offset +
12271 			    copy->size),
12272 			    VM_MAP_PAGE_MASK(src_map)) -
12273 			    vm_map_round_page((copy->offset +
12274 			    copy->size),
12275 			    PAGE_MASK));
12276 			if (adjustment) {
12277 				assert(page_aligned(adjustment));
12278 				assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
12279 				tmp_entry->vme_end -= adjustment;
12280 				assert(tmp_entry->vme_start < tmp_entry->vme_end);
12281 			}
12282 
12283 			/*
12284 			 * Assert that the adjustments haven't exposed
12285 			 * more than was originally copied...
12286 			 */
12287 			assert(tmp_entry->vme_end <= original_end);
12288 			/*
12289 			 * ... and that it did not adjust outside of a
12290 			 * a single 16K page.
12291 			 */
12292 			assert(vm_map_round_page(tmp_entry->vme_end,
12293 			    VM_MAP_PAGE_MASK(src_map)) ==
12294 			    vm_map_round_page(original_end,
12295 			    VM_MAP_PAGE_MASK(src_map)));
12296 		}
12297 	}
12298 
12299 	/* Fix-up start and end points in copy.  This is necessary */
12300 	/* when the various entries in the copy object were picked */
12301 	/* up from different sub-maps */
12302 
12303 	tmp_entry = vm_map_copy_first_entry(copy);
12304 	copy_size = 0; /* compute actual size */
12305 	while (tmp_entry != vm_map_copy_to_entry(copy)) {
12306 		assert(VM_MAP_PAGE_ALIGNED(
12307 			    copy_addr + (tmp_entry->vme_end -
12308 			    tmp_entry->vme_start),
12309 			    MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
12310 		assert(VM_MAP_PAGE_ALIGNED(
12311 			    copy_addr,
12312 			    MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
12313 
12314 		/*
12315 		 * The copy_entries will be injected directly into the
12316 		 * destination map and might not be "map aligned" there...
12317 		 */
12318 		tmp_entry->map_aligned = FALSE;
12319 
12320 		tmp_entry->vme_end = copy_addr +
12321 		    (tmp_entry->vme_end - tmp_entry->vme_start);
12322 		tmp_entry->vme_start = copy_addr;
12323 		assert(tmp_entry->vme_start < tmp_entry->vme_end);
12324 		copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
12325 		copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
12326 		tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
12327 	}
12328 
12329 	if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
12330 	    copy_size < copy->size) {
12331 		/*
12332 		 * The actual size of the VM map copy is smaller than what
12333 		 * was requested by the caller.  This must be because some
12334 		 * PAGE_SIZE-sized pages are missing at the end of the last
12335 		 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
12336 		 * The caller might not have been aware of those missing
12337 		 * pages and might not want to be aware of it, which is
12338 		 * fine as long as they don't try to access (and crash on)
12339 		 * those missing pages.
12340 		 * Let's adjust the size of the "copy", to avoid failing
12341 		 * in vm_map_copyout() or vm_map_copy_overwrite().
12342 		 */
12343 		assert(vm_map_round_page(copy_size,
12344 		    VM_MAP_PAGE_MASK(src_map)) ==
12345 		    vm_map_round_page(copy->size,
12346 		    VM_MAP_PAGE_MASK(src_map)));
12347 		copy->size = copy_size;
12348 	}
12349 
12350 	*copy_result = copy;
12351 	return KERN_SUCCESS;
12352 
12353 #undef  RETURN
12354 }
12355 
12356 kern_return_t
vm_map_copy_extract(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,boolean_t do_copy,vm_map_copy_t * copy_result,vm_prot_t * cur_prot,vm_prot_t * max_prot,vm_inherit_t inheritance,vm_map_kernel_flags_t vmk_flags)12357 vm_map_copy_extract(
12358 	vm_map_t                src_map,
12359 	vm_map_address_t        src_addr,
12360 	vm_map_size_t           len,
12361 	boolean_t               do_copy,
12362 	vm_map_copy_t           *copy_result,   /* OUT */
12363 	vm_prot_t               *cur_prot,      /* IN/OUT */
12364 	vm_prot_t               *max_prot,      /* IN/OUT */
12365 	vm_inherit_t            inheritance,
12366 	vm_map_kernel_flags_t   vmk_flags)
12367 {
12368 	vm_map_copy_t   copy;
12369 	kern_return_t   kr;
12370 	vm_prot_t required_cur_prot, required_max_prot;
12371 
12372 	/*
12373 	 *	Check for copies of zero bytes.
12374 	 */
12375 
12376 	if (len == 0) {
12377 		*copy_result = VM_MAP_COPY_NULL;
12378 		return KERN_SUCCESS;
12379 	}
12380 
12381 	/*
12382 	 *	Check that the end address doesn't overflow
12383 	 */
12384 	if (src_addr + len < src_addr) {
12385 		return KERN_INVALID_ADDRESS;
12386 	}
12387 
12388 	if (VM_MAP_PAGE_SIZE(src_map) < PAGE_SIZE) {
12389 		DEBUG4K_SHARE("src_map %p src_addr 0x%llx src_end 0x%llx\n", src_map, (uint64_t)src_addr, (uint64_t)(src_addr + len));
12390 	}
12391 
12392 	required_cur_prot = *cur_prot;
12393 	required_max_prot = *max_prot;
12394 
12395 	/*
12396 	 *	Allocate a header element for the list.
12397 	 *
12398 	 *	Use the start and end in the header to
12399 	 *	remember the endpoints prior to rounding.
12400 	 */
12401 
12402 	copy = vm_map_copy_allocate();
12403 	copy->type = VM_MAP_COPY_ENTRY_LIST;
12404 	copy->cpy_hdr.entries_pageable = vmk_flags.vmkf_copy_pageable;
12405 
12406 	vm_map_store_init(&copy->cpy_hdr);
12407 
12408 	copy->offset = 0;
12409 	copy->size = len;
12410 
12411 	kr = vm_map_remap_extract(src_map,
12412 	    src_addr,
12413 	    len,
12414 	    do_copy,             /* copy */
12415 	    &copy->cpy_hdr,
12416 	    cur_prot,            /* IN/OUT */
12417 	    max_prot,            /* IN/OUT */
12418 	    inheritance,
12419 	    vmk_flags);
12420 	if (kr != KERN_SUCCESS) {
12421 		vm_map_copy_discard(copy);
12422 		return kr;
12423 	}
12424 	if (required_cur_prot != VM_PROT_NONE) {
12425 		assert((*cur_prot & required_cur_prot) == required_cur_prot);
12426 		assert((*max_prot & required_max_prot) == required_max_prot);
12427 	}
12428 
12429 	*copy_result = copy;
12430 	return KERN_SUCCESS;
12431 }
12432 
12433 /*
12434  *	vm_map_copyin_object:
12435  *
12436  *	Create a copy object from an object.
12437  *	Our caller donates an object reference.
12438  */
12439 
12440 kern_return_t
vm_map_copyin_object(vm_object_t object,vm_object_offset_t offset,vm_object_size_t size,vm_map_copy_t * copy_result)12441 vm_map_copyin_object(
12442 	vm_object_t             object,
12443 	vm_object_offset_t      offset, /* offset of region in object */
12444 	vm_object_size_t        size,   /* size of region in object */
12445 	vm_map_copy_t   *copy_result)   /* OUT */
12446 {
12447 	vm_map_copy_t   copy;           /* Resulting copy */
12448 
12449 	/*
12450 	 *	We drop the object into a special copy object
12451 	 *	that contains the object directly.
12452 	 */
12453 
12454 	copy = vm_map_copy_allocate();
12455 	copy->type = VM_MAP_COPY_OBJECT;
12456 	copy->cpy_object = object;
12457 	copy->offset = offset;
12458 	copy->size = size;
12459 
12460 	*copy_result = copy;
12461 	return KERN_SUCCESS;
12462 }
12463 
12464 static void
vm_map_fork_share(vm_map_t old_map,vm_map_entry_t old_entry,vm_map_t new_map)12465 vm_map_fork_share(
12466 	vm_map_t        old_map,
12467 	vm_map_entry_t  old_entry,
12468 	vm_map_t        new_map)
12469 {
12470 	vm_object_t     object;
12471 	vm_map_entry_t  new_entry;
12472 
12473 	/*
12474 	 *	New sharing code.  New map entry
12475 	 *	references original object.  Internal
12476 	 *	objects use asynchronous copy algorithm for
12477 	 *	future copies.  First make sure we have
12478 	 *	the right object.  If we need a shadow,
12479 	 *	or someone else already has one, then
12480 	 *	make a new shadow and share it.
12481 	 */
12482 
12483 	object = VME_OBJECT(old_entry);
12484 	if (old_entry->is_sub_map) {
12485 		assert(old_entry->wired_count == 0);
12486 #ifndef NO_NESTED_PMAP
12487 		if (old_entry->use_pmap) {
12488 			kern_return_t   result;
12489 
12490 			result = pmap_nest(new_map->pmap,
12491 			    (VME_SUBMAP(old_entry))->pmap,
12492 			    (addr64_t)old_entry->vme_start,
12493 			    (uint64_t)(old_entry->vme_end - old_entry->vme_start));
12494 			if (result) {
12495 				panic("vm_map_fork_share: pmap_nest failed!");
12496 			}
12497 		}
12498 #endif  /* NO_NESTED_PMAP */
12499 	} else if (object == VM_OBJECT_NULL) {
12500 		object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
12501 		    old_entry->vme_start));
12502 		VME_OFFSET_SET(old_entry, 0);
12503 		VME_OBJECT_SET(old_entry, object);
12504 		old_entry->use_pmap = TRUE;
12505 //		assert(!old_entry->needs_copy);
12506 	} else if (object->copy_strategy !=
12507 	    MEMORY_OBJECT_COPY_SYMMETRIC) {
12508 		/*
12509 		 *	We are already using an asymmetric
12510 		 *	copy, and therefore we already have
12511 		 *	the right object.
12512 		 */
12513 
12514 		assert(!old_entry->needs_copy);
12515 	} else if (old_entry->needs_copy ||       /* case 1 */
12516 	    object->shadowed ||                 /* case 2 */
12517 	    (!object->true_share &&             /* case 3 */
12518 	    !old_entry->is_shared &&
12519 	    (object->vo_size >
12520 	    (vm_map_size_t)(old_entry->vme_end -
12521 	    old_entry->vme_start)))) {
12522 		/*
12523 		 *	We need to create a shadow.
12524 		 *	There are three cases here.
12525 		 *	In the first case, we need to
12526 		 *	complete a deferred symmetrical
12527 		 *	copy that we participated in.
12528 		 *	In the second and third cases,
12529 		 *	we need to create the shadow so
12530 		 *	that changes that we make to the
12531 		 *	object do not interfere with
12532 		 *	any symmetrical copies which
12533 		 *	have occured (case 2) or which
12534 		 *	might occur (case 3).
12535 		 *
12536 		 *	The first case is when we had
12537 		 *	deferred shadow object creation
12538 		 *	via the entry->needs_copy mechanism.
12539 		 *	This mechanism only works when
12540 		 *	only one entry points to the source
12541 		 *	object, and we are about to create
12542 		 *	a second entry pointing to the
12543 		 *	same object. The problem is that
12544 		 *	there is no way of mapping from
12545 		 *	an object to the entries pointing
12546 		 *	to it. (Deferred shadow creation
12547 		 *	works with one entry because occurs
12548 		 *	at fault time, and we walk from the
12549 		 *	entry to the object when handling
12550 		 *	the fault.)
12551 		 *
12552 		 *	The second case is when the object
12553 		 *	to be shared has already been copied
12554 		 *	with a symmetric copy, but we point
12555 		 *	directly to the object without
12556 		 *	needs_copy set in our entry. (This
12557 		 *	can happen because different ranges
12558 		 *	of an object can be pointed to by
12559 		 *	different entries. In particular,
12560 		 *	a single entry pointing to an object
12561 		 *	can be split by a call to vm_inherit,
12562 		 *	which, combined with task_create, can
12563 		 *	result in the different entries
12564 		 *	having different needs_copy values.)
12565 		 *	The shadowed flag in the object allows
12566 		 *	us to detect this case. The problem
12567 		 *	with this case is that if this object
12568 		 *	has or will have shadows, then we
12569 		 *	must not perform an asymmetric copy
12570 		 *	of this object, since such a copy
12571 		 *	allows the object to be changed, which
12572 		 *	will break the previous symmetrical
12573 		 *	copies (which rely upon the object
12574 		 *	not changing). In a sense, the shadowed
12575 		 *	flag says "don't change this object".
12576 		 *	We fix this by creating a shadow
12577 		 *	object for this object, and sharing
12578 		 *	that. This works because we are free
12579 		 *	to change the shadow object (and thus
12580 		 *	to use an asymmetric copy strategy);
12581 		 *	this is also semantically correct,
12582 		 *	since this object is temporary, and
12583 		 *	therefore a copy of the object is
12584 		 *	as good as the object itself. (This
12585 		 *	is not true for permanent objects,
12586 		 *	since the pager needs to see changes,
12587 		 *	which won't happen if the changes
12588 		 *	are made to a copy.)
12589 		 *
12590 		 *	The third case is when the object
12591 		 *	to be shared has parts sticking
12592 		 *	outside of the entry we're working
12593 		 *	with, and thus may in the future
12594 		 *	be subject to a symmetrical copy.
12595 		 *	(This is a preemptive version of
12596 		 *	case 2.)
12597 		 */
12598 		VME_OBJECT_SHADOW(old_entry,
12599 		    (vm_map_size_t) (old_entry->vme_end -
12600 		    old_entry->vme_start));
12601 
12602 		/*
12603 		 *	If we're making a shadow for other than
12604 		 *	copy on write reasons, then we have
12605 		 *	to remove write permission.
12606 		 */
12607 
12608 		if (!old_entry->needs_copy &&
12609 		    (old_entry->protection & VM_PROT_WRITE)) {
12610 			vm_prot_t prot;
12611 
12612 			assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
12613 
12614 			prot = old_entry->protection & ~VM_PROT_WRITE;
12615 
12616 			assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
12617 
12618 			if (override_nx(old_map, VME_ALIAS(old_entry)) && prot) {
12619 				prot |= VM_PROT_EXECUTE;
12620 			}
12621 
12622 
12623 			if (old_map->mapped_in_other_pmaps) {
12624 				vm_object_pmap_protect(
12625 					VME_OBJECT(old_entry),
12626 					VME_OFFSET(old_entry),
12627 					(old_entry->vme_end -
12628 					old_entry->vme_start),
12629 					PMAP_NULL,
12630 					PAGE_SIZE,
12631 					old_entry->vme_start,
12632 					prot);
12633 			} else {
12634 				pmap_protect(old_map->pmap,
12635 				    old_entry->vme_start,
12636 				    old_entry->vme_end,
12637 				    prot);
12638 			}
12639 		}
12640 
12641 		old_entry->needs_copy = FALSE;
12642 		object = VME_OBJECT(old_entry);
12643 	}
12644 
12645 
12646 	/*
12647 	 *	If object was using a symmetric copy strategy,
12648 	 *	change its copy strategy to the default
12649 	 *	asymmetric copy strategy, which is copy_delay
12650 	 *	in the non-norma case and copy_call in the
12651 	 *	norma case. Bump the reference count for the
12652 	 *	new entry.
12653 	 */
12654 
12655 	if (old_entry->is_sub_map) {
12656 		vm_map_lock(VME_SUBMAP(old_entry));
12657 		vm_map_reference(VME_SUBMAP(old_entry));
12658 		vm_map_unlock(VME_SUBMAP(old_entry));
12659 	} else {
12660 		vm_object_lock(object);
12661 		vm_object_reference_locked(object);
12662 		if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
12663 			object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
12664 		}
12665 		vm_object_unlock(object);
12666 	}
12667 
12668 	/*
12669 	 *	Clone the entry, using object ref from above.
12670 	 *	Mark both entries as shared.
12671 	 */
12672 
12673 	new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
12674 	                                                  * map or descendants */
12675 	vm_map_entry_copy(old_map, new_entry, old_entry);
12676 	old_entry->is_shared = TRUE;
12677 	new_entry->is_shared = TRUE;
12678 
12679 	/*
12680 	 * We're dealing with a shared mapping, so the resulting mapping
12681 	 * should inherit some of the original mapping's accounting settings.
12682 	 * "iokit_acct" should have been cleared in vm_map_entry_copy().
12683 	 * "use_pmap" should stay the same as before (if it hasn't been reset
12684 	 * to TRUE when we cleared "iokit_acct").
12685 	 */
12686 	assert(!new_entry->iokit_acct);
12687 
12688 	/*
12689 	 *	If old entry's inheritence is VM_INHERIT_NONE,
12690 	 *	the new entry is for corpse fork, remove the
12691 	 *	write permission from the new entry.
12692 	 */
12693 	if (old_entry->inheritance == VM_INHERIT_NONE) {
12694 		new_entry->protection &= ~VM_PROT_WRITE;
12695 		new_entry->max_protection &= ~VM_PROT_WRITE;
12696 	}
12697 
12698 	/*
12699 	 *	Insert the entry into the new map -- we
12700 	 *	know we're inserting at the end of the new
12701 	 *	map.
12702 	 */
12703 
12704 	vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry,
12705 	    VM_MAP_KERNEL_FLAGS_NONE);
12706 
12707 	/*
12708 	 *	Update the physical map
12709 	 */
12710 
12711 	if (old_entry->is_sub_map) {
12712 		/* Bill Angell pmap support goes here */
12713 	} else {
12714 		pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
12715 		    old_entry->vme_end - old_entry->vme_start,
12716 		    old_entry->vme_start);
12717 	}
12718 }
12719 
12720 static boolean_t
vm_map_fork_copy(vm_map_t old_map,vm_map_entry_t * old_entry_p,vm_map_t new_map,int vm_map_copyin_flags)12721 vm_map_fork_copy(
12722 	vm_map_t        old_map,
12723 	vm_map_entry_t  *old_entry_p,
12724 	vm_map_t        new_map,
12725 	int             vm_map_copyin_flags)
12726 {
12727 	vm_map_entry_t old_entry = *old_entry_p;
12728 	vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
12729 	vm_map_offset_t start = old_entry->vme_start;
12730 	vm_map_copy_t copy;
12731 	vm_map_entry_t last = vm_map_last_entry(new_map);
12732 
12733 	vm_map_unlock(old_map);
12734 	/*
12735 	 *	Use maxprot version of copyin because we
12736 	 *	care about whether this memory can ever
12737 	 *	be accessed, not just whether it's accessible
12738 	 *	right now.
12739 	 */
12740 	vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
12741 	if (vm_map_copyin_internal(old_map, start, entry_size,
12742 	    vm_map_copyin_flags, &copy)
12743 	    != KERN_SUCCESS) {
12744 		/*
12745 		 *	The map might have changed while it
12746 		 *	was unlocked, check it again.  Skip
12747 		 *	any blank space or permanently
12748 		 *	unreadable region.
12749 		 */
12750 		vm_map_lock(old_map);
12751 		if (!vm_map_lookup_entry(old_map, start, &last) ||
12752 		    (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
12753 			last = last->vme_next;
12754 		}
12755 		*old_entry_p = last;
12756 
12757 		/*
12758 		 * XXX	For some error returns, want to
12759 		 * XXX	skip to the next element.  Note
12760 		 *	that INVALID_ADDRESS and
12761 		 *	PROTECTION_FAILURE are handled above.
12762 		 */
12763 
12764 		return FALSE;
12765 	}
12766 
12767 	/*
12768 	 * Assert that the vm_map_copy is coming from the right
12769 	 * zone and hasn't been forged
12770 	 */
12771 	vm_map_copy_require(copy);
12772 
12773 	/*
12774 	 *	Insert the copy into the new map
12775 	 */
12776 	vm_map_copy_insert(new_map, last, copy);
12777 
12778 	/*
12779 	 *	Pick up the traversal at the end of
12780 	 *	the copied region.
12781 	 */
12782 
12783 	vm_map_lock(old_map);
12784 	start += entry_size;
12785 	if (!vm_map_lookup_entry(old_map, start, &last)) {
12786 		last = last->vme_next;
12787 	} else {
12788 		if (last->vme_start == start) {
12789 			/*
12790 			 * No need to clip here and we don't
12791 			 * want to cause any unnecessary
12792 			 * unnesting...
12793 			 */
12794 		} else {
12795 			vm_map_clip_start(old_map, last, start);
12796 		}
12797 	}
12798 	*old_entry_p = last;
12799 
12800 	return TRUE;
12801 }
12802 
12803 /*
12804  *	vm_map_fork:
12805  *
12806  *	Create and return a new map based on the old
12807  *	map, according to the inheritance values on the
12808  *	regions in that map and the options.
12809  *
12810  *	The source map must not be locked.
12811  */
12812 vm_map_t
vm_map_fork(ledger_t ledger,vm_map_t old_map,int options)12813 vm_map_fork(
12814 	ledger_t        ledger,
12815 	vm_map_t        old_map,
12816 	int             options)
12817 {
12818 	pmap_t          new_pmap;
12819 	vm_map_t        new_map;
12820 	vm_map_entry_t  old_entry;
12821 	vm_map_size_t   new_size = 0, entry_size;
12822 	vm_map_entry_t  new_entry;
12823 	boolean_t       src_needs_copy;
12824 	boolean_t       new_entry_needs_copy;
12825 	boolean_t       pmap_is64bit;
12826 	int             vm_map_copyin_flags;
12827 	vm_inherit_t    old_entry_inheritance;
12828 	int             map_create_options;
12829 	kern_return_t   footprint_collect_kr;
12830 
12831 	if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
12832 	    VM_MAP_FORK_PRESERVE_PURGEABLE |
12833 	    VM_MAP_FORK_CORPSE_FOOTPRINT)) {
12834 		/* unsupported option */
12835 		return VM_MAP_NULL;
12836 	}
12837 
12838 	pmap_is64bit =
12839 #if defined(__i386__) || defined(__x86_64__)
12840 	    old_map->pmap->pm_task_map != TASK_MAP_32BIT;
12841 #elif defined(__arm64__)
12842 	    old_map->pmap->is_64bit;
12843 #elif defined(__arm__)
12844 	    FALSE;
12845 #else
12846 #error Unknown architecture.
12847 #endif
12848 
12849 	unsigned int pmap_flags = 0;
12850 	pmap_flags |= pmap_is64bit ? PMAP_CREATE_64BIT : 0;
12851 #if defined(HAS_APPLE_PAC)
12852 	pmap_flags |= old_map->pmap->disable_jop ? PMAP_CREATE_DISABLE_JOP : 0;
12853 #endif
12854 #if PMAP_CREATE_FORCE_4K_PAGES
12855 	if (VM_MAP_PAGE_SIZE(old_map) == FOURK_PAGE_SIZE &&
12856 	    PAGE_SIZE != FOURK_PAGE_SIZE) {
12857 		pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
12858 	}
12859 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
12860 	new_pmap = pmap_create_options(ledger, (vm_map_size_t) 0, pmap_flags);
12861 	if (new_pmap == NULL) {
12862 		return VM_MAP_NULL;
12863 	}
12864 
12865 	vm_map_reference(old_map);
12866 	vm_map_lock(old_map);
12867 
12868 	map_create_options = 0;
12869 	if (old_map->hdr.entries_pageable) {
12870 		map_create_options |= VM_MAP_CREATE_PAGEABLE;
12871 	}
12872 	if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12873 		map_create_options |= VM_MAP_CREATE_CORPSE_FOOTPRINT;
12874 		footprint_collect_kr = KERN_SUCCESS;
12875 	}
12876 	new_map = vm_map_create_options(new_pmap,
12877 	    old_map->min_offset,
12878 	    old_map->max_offset,
12879 	    map_create_options);
12880 	/* inherit cs_enforcement */
12881 	vm_map_cs_enforcement_set(new_map, old_map->cs_enforcement);
12882 	vm_map_lock(new_map);
12883 	vm_commit_pagezero_status(new_map);
12884 	/* inherit the parent map's page size */
12885 	vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
12886 
12887 	/* ensure PMAP_CS structures are prepared for the fork */
12888 	pmap_cs_fork_prepare(old_map->pmap, new_pmap);
12889 
12890 	for (old_entry = vm_map_first_entry(old_map); old_entry != vm_map_to_entry(old_map);) {
12891 		/*
12892 		 * Abort any corpse collection if the system is shutting down.
12893 		 */
12894 		if ((options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
12895 		    get_system_inshutdown()) {
12896 			vm_map_corpse_footprint_collect_done(new_map);
12897 			vm_map_unlock(new_map);
12898 			vm_map_unlock(old_map);
12899 			vm_map_deallocate(new_map);
12900 			vm_map_deallocate(old_map);
12901 			printf("Aborting corpse map due to system shutdown\n");
12902 			return VM_MAP_NULL;
12903 		}
12904 
12905 		entry_size = old_entry->vme_end - old_entry->vme_start;
12906 
12907 		old_entry_inheritance = old_entry->inheritance;
12908 		/*
12909 		 * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
12910 		 * share VM_INHERIT_NONE entries that are not backed by a
12911 		 * device pager.
12912 		 */
12913 		if (old_entry_inheritance == VM_INHERIT_NONE &&
12914 		    (options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE) &&
12915 		    (old_entry->protection & VM_PROT_READ) &&
12916 		    !(!old_entry->is_sub_map &&
12917 		    VME_OBJECT(old_entry) != NULL &&
12918 		    VME_OBJECT(old_entry)->pager != NULL &&
12919 		    is_device_pager_ops(
12920 			    VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
12921 			old_entry_inheritance = VM_INHERIT_SHARE;
12922 		}
12923 
12924 		if (old_entry_inheritance != VM_INHERIT_NONE &&
12925 		    (options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
12926 		    footprint_collect_kr == KERN_SUCCESS) {
12927 			/*
12928 			 * The corpse won't have old_map->pmap to query
12929 			 * footprint information, so collect that data now
12930 			 * and store it in new_map->vmmap_corpse_footprint
12931 			 * for later autopsy.
12932 			 */
12933 			footprint_collect_kr =
12934 			    vm_map_corpse_footprint_collect(old_map,
12935 			    old_entry,
12936 			    new_map);
12937 		}
12938 
12939 		switch (old_entry_inheritance) {
12940 		case VM_INHERIT_NONE:
12941 			break;
12942 
12943 		case VM_INHERIT_SHARE:
12944 			vm_map_fork_share(old_map, old_entry, new_map);
12945 			new_size += entry_size;
12946 			break;
12947 
12948 		case VM_INHERIT_COPY:
12949 
12950 			/*
12951 			 *	Inline the copy_quickly case;
12952 			 *	upon failure, fall back on call
12953 			 *	to vm_map_fork_copy.
12954 			 */
12955 
12956 			if (old_entry->is_sub_map) {
12957 				break;
12958 			}
12959 			if ((old_entry->wired_count != 0) ||
12960 			    ((VME_OBJECT(old_entry) != NULL) &&
12961 			    (VME_OBJECT(old_entry)->true_share))) {
12962 				goto slow_vm_map_fork_copy;
12963 			}
12964 
12965 			new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
12966 			vm_map_entry_copy(old_map, new_entry, old_entry);
12967 			if (old_entry->permanent) {
12968 				/* inherit "permanent" on fork() */
12969 				new_entry->permanent = TRUE;
12970 			}
12971 
12972 			if (new_entry->used_for_jit == TRUE && new_map->jit_entry_exists == FALSE) {
12973 				new_map->jit_entry_exists = TRUE;
12974 			}
12975 
12976 			if (new_entry->is_sub_map) {
12977 				/* clear address space specifics */
12978 				new_entry->use_pmap = FALSE;
12979 			} else {
12980 				/*
12981 				 * We're dealing with a copy-on-write operation,
12982 				 * so the resulting mapping should not inherit
12983 				 * the original mapping's accounting settings.
12984 				 * "iokit_acct" should have been cleared in
12985 				 * vm_map_entry_copy().
12986 				 * "use_pmap" should be reset to its default
12987 				 * (TRUE) so that the new mapping gets
12988 				 * accounted for in the task's memory footprint.
12989 				 */
12990 				assert(!new_entry->iokit_acct);
12991 				new_entry->use_pmap = TRUE;
12992 			}
12993 
12994 			if (!vm_object_copy_quickly(
12995 				    VME_OBJECT(new_entry),
12996 				    VME_OFFSET(old_entry),
12997 				    (old_entry->vme_end -
12998 				    old_entry->vme_start),
12999 				    &src_needs_copy,
13000 				    &new_entry_needs_copy)) {
13001 				vm_map_entry_dispose(new_map, new_entry);
13002 				goto slow_vm_map_fork_copy;
13003 			}
13004 
13005 			/*
13006 			 *	Handle copy-on-write obligations
13007 			 */
13008 
13009 			if (src_needs_copy && !old_entry->needs_copy) {
13010 				vm_prot_t prot;
13011 
13012 				assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
13013 
13014 				prot = old_entry->protection & ~VM_PROT_WRITE;
13015 
13016 				if (override_nx(old_map, VME_ALIAS(old_entry))
13017 				    && prot) {
13018 					prot |= VM_PROT_EXECUTE;
13019 				}
13020 
13021 				assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
13022 
13023 				vm_object_pmap_protect(
13024 					VME_OBJECT(old_entry),
13025 					VME_OFFSET(old_entry),
13026 					(old_entry->vme_end -
13027 					old_entry->vme_start),
13028 					((old_entry->is_shared
13029 					|| old_map->mapped_in_other_pmaps)
13030 					? PMAP_NULL :
13031 					old_map->pmap),
13032 					VM_MAP_PAGE_SIZE(old_map),
13033 					old_entry->vme_start,
13034 					prot);
13035 
13036 				assert(old_entry->wired_count == 0);
13037 				old_entry->needs_copy = TRUE;
13038 			}
13039 			new_entry->needs_copy = new_entry_needs_copy;
13040 
13041 			/*
13042 			 *	Insert the entry at the end
13043 			 *	of the map.
13044 			 */
13045 
13046 			vm_map_store_entry_link(new_map,
13047 			    vm_map_last_entry(new_map),
13048 			    new_entry,
13049 			    VM_MAP_KERNEL_FLAGS_NONE);
13050 			new_size += entry_size;
13051 			break;
13052 
13053 slow_vm_map_fork_copy:
13054 			vm_map_copyin_flags = 0;
13055 			if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
13056 				vm_map_copyin_flags |=
13057 				    VM_MAP_COPYIN_PRESERVE_PURGEABLE;
13058 			}
13059 			if (vm_map_fork_copy(old_map,
13060 			    &old_entry,
13061 			    new_map,
13062 			    vm_map_copyin_flags)) {
13063 				new_size += entry_size;
13064 			}
13065 			continue;
13066 		}
13067 		old_entry = old_entry->vme_next;
13068 	}
13069 
13070 #if defined(__arm64__)
13071 	pmap_insert_sharedpage(new_map->pmap);
13072 #endif /* __arm64__ */
13073 
13074 	new_map->size = new_size;
13075 
13076 	if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
13077 		vm_map_corpse_footprint_collect_done(new_map);
13078 	}
13079 
13080 	/* Propagate JIT entitlement for the pmap layer. */
13081 	if (pmap_get_jit_entitled(old_map->pmap)) {
13082 		/* Tell the pmap that it supports JIT. */
13083 		pmap_set_jit_entitled(new_map->pmap);
13084 	}
13085 
13086 	vm_map_unlock(new_map);
13087 	vm_map_unlock(old_map);
13088 	vm_map_deallocate(old_map);
13089 
13090 	return new_map;
13091 }
13092 
13093 /*
13094  * vm_map_exec:
13095  *
13096  *      Setup the "new_map" with the proper execution environment according
13097  *	to the type of executable (platform, 64bit, chroot environment).
13098  *	Map the comm page and shared region, etc...
13099  */
13100 kern_return_t
vm_map_exec(vm_map_t new_map,task_t task,boolean_t is64bit,void * fsroot,cpu_type_t cpu,cpu_subtype_t cpu_subtype,boolean_t reslide,boolean_t is_driverkit)13101 vm_map_exec(
13102 	vm_map_t        new_map,
13103 	task_t          task,
13104 	boolean_t       is64bit,
13105 	void            *fsroot,
13106 	cpu_type_t      cpu,
13107 	cpu_subtype_t   cpu_subtype,
13108 	boolean_t       reslide,
13109 	boolean_t       is_driverkit)
13110 {
13111 	SHARED_REGION_TRACE_DEBUG(
13112 		("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
13113 		(void *)VM_KERNEL_ADDRPERM(current_task()),
13114 		(void *)VM_KERNEL_ADDRPERM(new_map),
13115 		(void *)VM_KERNEL_ADDRPERM(task),
13116 		(void *)VM_KERNEL_ADDRPERM(fsroot),
13117 		cpu,
13118 		cpu_subtype));
13119 	(void) vm_commpage_enter(new_map, task, is64bit);
13120 
13121 	(void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu, cpu_subtype, reslide, is_driverkit);
13122 
13123 	SHARED_REGION_TRACE_DEBUG(
13124 		("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
13125 		(void *)VM_KERNEL_ADDRPERM(current_task()),
13126 		(void *)VM_KERNEL_ADDRPERM(new_map),
13127 		(void *)VM_KERNEL_ADDRPERM(task),
13128 		(void *)VM_KERNEL_ADDRPERM(fsroot),
13129 		cpu,
13130 		cpu_subtype));
13131 
13132 	/*
13133 	 * Some devices have region(s) of memory that shouldn't get allocated by
13134 	 * user processes. The following code creates dummy vm_map_entry_t's for each
13135 	 * of the regions that needs to be reserved to prevent any allocations in
13136 	 * those regions.
13137 	 */
13138 	kern_return_t kr = KERN_FAILURE;
13139 	vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
13140 	vmk_flags.vmkf_permanent = TRUE;
13141 	vmk_flags.vmkf_beyond_max = TRUE;
13142 
13143 	struct vm_reserved_region *regions = NULL;
13144 	size_t num_regions = ml_get_vm_reserved_regions(is64bit, &regions);
13145 	assert((num_regions == 0) || (num_regions > 0 && regions != NULL));
13146 
13147 	for (size_t i = 0; i < num_regions; ++i) {
13148 		kr = vm_map_enter(
13149 			new_map,
13150 			&regions[i].vmrr_addr,
13151 			regions[i].vmrr_size,
13152 			(vm_map_offset_t)0,
13153 			VM_FLAGS_FIXED,
13154 			vmk_flags,
13155 			VM_KERN_MEMORY_NONE,
13156 			VM_OBJECT_NULL,
13157 			(vm_object_offset_t)0,
13158 			FALSE,
13159 			VM_PROT_NONE,
13160 			VM_PROT_NONE,
13161 			VM_INHERIT_COPY);
13162 
13163 		if (kr != KERN_SUCCESS) {
13164 			panic("Failed to reserve %s region in user map %p %d", regions[i].vmrr_name, new_map, kr);
13165 		}
13166 	}
13167 
13168 	new_map->reserved_regions = (num_regions ? TRUE : FALSE);
13169 
13170 	return KERN_SUCCESS;
13171 }
13172 
13173 uint64_t vm_map_lookup_locked_copy_slowly_count = 0;
13174 uint64_t vm_map_lookup_locked_copy_slowly_size = 0;
13175 uint64_t vm_map_lookup_locked_copy_slowly_max = 0;
13176 uint64_t vm_map_lookup_locked_copy_slowly_restart = 0;
13177 uint64_t vm_map_lookup_locked_copy_slowly_error = 0;
13178 uint64_t vm_map_lookup_locked_copy_strategically_count = 0;
13179 uint64_t vm_map_lookup_locked_copy_strategically_size = 0;
13180 uint64_t vm_map_lookup_locked_copy_strategically_max = 0;
13181 uint64_t vm_map_lookup_locked_copy_strategically_restart = 0;
13182 uint64_t vm_map_lookup_locked_copy_strategically_error = 0;
13183 uint64_t vm_map_lookup_locked_copy_shadow_count = 0;
13184 uint64_t vm_map_lookup_locked_copy_shadow_size = 0;
13185 uint64_t vm_map_lookup_locked_copy_shadow_max = 0;
13186 /*
13187  *	vm_map_lookup_locked:
13188  *
13189  *	Finds the VM object, offset, and
13190  *	protection for a given virtual address in the
13191  *	specified map, assuming a page fault of the
13192  *	type specified.
13193  *
13194  *	Returns the (object, offset, protection) for
13195  *	this address, whether it is wired down, and whether
13196  *	this map has the only reference to the data in question.
13197  *	In order to later verify this lookup, a "version"
13198  *	is returned.
13199  *	If contended != NULL, *contended will be set to
13200  *	true iff the thread had to spin or block to acquire
13201  *	an exclusive lock.
13202  *
13203  *	The map MUST be locked by the caller and WILL be
13204  *	locked on exit.  In order to guarantee the
13205  *	existence of the returned object, it is returned
13206  *	locked.
13207  *
13208  *	If a lookup is requested with "write protection"
13209  *	specified, the map may be changed to perform virtual
13210  *	copying operations, although the data referenced will
13211  *	remain the same.
13212  */
13213 kern_return_t
vm_map_lookup_locked(vm_map_t * var_map,vm_map_offset_t vaddr,vm_prot_t fault_type,int object_lock_type,vm_map_version_t * out_version,vm_object_t * object,vm_object_offset_t * offset,vm_prot_t * out_prot,boolean_t * wired,vm_object_fault_info_t fault_info,vm_map_t * real_map,bool * contended)13214 vm_map_lookup_locked(
13215 	vm_map_t                *var_map,       /* IN/OUT */
13216 	vm_map_offset_t         vaddr,
13217 	vm_prot_t               fault_type,
13218 	int                     object_lock_type,
13219 	vm_map_version_t        *out_version,   /* OUT */
13220 	vm_object_t             *object,        /* OUT */
13221 	vm_object_offset_t      *offset,        /* OUT */
13222 	vm_prot_t               *out_prot,      /* OUT */
13223 	boolean_t               *wired,         /* OUT */
13224 	vm_object_fault_info_t  fault_info,     /* OUT */
13225 	vm_map_t                *real_map,      /* OUT */
13226 	bool                    *contended)     /* OUT */
13227 {
13228 	vm_map_entry_t                  entry;
13229 	vm_map_t                        map = *var_map;
13230 	vm_map_t                        old_map = *var_map;
13231 	vm_map_t                        cow_sub_map_parent = VM_MAP_NULL;
13232 	vm_map_offset_t                 cow_parent_vaddr = 0;
13233 	vm_map_offset_t                 old_start = 0;
13234 	vm_map_offset_t                 old_end = 0;
13235 	vm_prot_t                       prot;
13236 	boolean_t                       mask_protections;
13237 	boolean_t                       force_copy;
13238 	boolean_t                       no_force_copy_if_executable;
13239 	boolean_t                       submap_needed_copy;
13240 	vm_prot_t                       original_fault_type;
13241 	vm_map_size_t                   fault_page_mask;
13242 
13243 	/*
13244 	 * VM_PROT_MASK means that the caller wants us to use "fault_type"
13245 	 * as a mask against the mapping's actual protections, not as an
13246 	 * absolute value.
13247 	 */
13248 	mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
13249 	force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
13250 	no_force_copy_if_executable = (fault_type & VM_PROT_COPY_FAIL_IF_EXECUTABLE) ? TRUE : FALSE;
13251 	fault_type &= VM_PROT_ALL;
13252 	original_fault_type = fault_type;
13253 	if (contended) {
13254 		*contended = false;
13255 	}
13256 
13257 	*real_map = map;
13258 
13259 	fault_page_mask = MIN(VM_MAP_PAGE_MASK(map), PAGE_MASK);
13260 	vaddr = VM_MAP_TRUNC_PAGE(vaddr, fault_page_mask);
13261 
13262 RetryLookup:
13263 	fault_type = original_fault_type;
13264 
13265 	/*
13266 	 *	If the map has an interesting hint, try it before calling
13267 	 *	full blown lookup routine.
13268 	 */
13269 	entry = map->hint;
13270 
13271 	if ((entry == vm_map_to_entry(map)) ||
13272 	    (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
13273 		vm_map_entry_t  tmp_entry;
13274 
13275 		/*
13276 		 *	Entry was either not a valid hint, or the vaddr
13277 		 *	was not contained in the entry, so do a full lookup.
13278 		 */
13279 		if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
13280 			if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13281 				vm_map_unlock(cow_sub_map_parent);
13282 			}
13283 			if ((*real_map != map)
13284 			    && (*real_map != cow_sub_map_parent)) {
13285 				vm_map_unlock(*real_map);
13286 			}
13287 			return KERN_INVALID_ADDRESS;
13288 		}
13289 
13290 		entry = tmp_entry;
13291 	}
13292 	if (map == old_map) {
13293 		old_start = entry->vme_start;
13294 		old_end = entry->vme_end;
13295 	}
13296 
13297 	/*
13298 	 *	Handle submaps.  Drop lock on upper map, submap is
13299 	 *	returned locked.
13300 	 */
13301 
13302 	submap_needed_copy = FALSE;
13303 submap_recurse:
13304 	if (entry->is_sub_map) {
13305 		vm_map_offset_t         local_vaddr;
13306 		vm_map_offset_t         end_delta;
13307 		vm_map_offset_t         start_delta;
13308 		vm_map_entry_t          submap_entry, saved_submap_entry;
13309 		vm_object_offset_t      submap_entry_offset;
13310 		vm_object_size_t        submap_entry_size;
13311 		vm_prot_t               subentry_protection;
13312 		vm_prot_t               subentry_max_protection;
13313 		boolean_t               subentry_no_copy_on_read;
13314 		boolean_t               mapped_needs_copy = FALSE;
13315 		vm_map_version_t        version;
13316 
13317 		assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) >= VM_MAP_PAGE_SHIFT(map),
13318 		    "map %p (%d) entry %p submap %p (%d)\n",
13319 		    map, VM_MAP_PAGE_SHIFT(map), entry,
13320 		    VME_SUBMAP(entry), VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
13321 
13322 		local_vaddr = vaddr;
13323 
13324 		if ((entry->use_pmap &&
13325 		    !((fault_type & VM_PROT_WRITE) ||
13326 		    force_copy))) {
13327 			/* if real_map equals map we unlock below */
13328 			if ((*real_map != map) &&
13329 			    (*real_map != cow_sub_map_parent)) {
13330 				vm_map_unlock(*real_map);
13331 			}
13332 			*real_map = VME_SUBMAP(entry);
13333 		}
13334 
13335 		if (entry->needs_copy &&
13336 		    ((fault_type & VM_PROT_WRITE) ||
13337 		    force_copy)) {
13338 			if (!mapped_needs_copy) {
13339 				if (vm_map_lock_read_to_write(map)) {
13340 					vm_map_lock_read(map);
13341 					*real_map = map;
13342 					goto RetryLookup;
13343 				}
13344 				vm_map_lock_read(VME_SUBMAP(entry));
13345 				*var_map = VME_SUBMAP(entry);
13346 				cow_sub_map_parent = map;
13347 				/* reset base to map before cow object */
13348 				/* this is the map which will accept   */
13349 				/* the new cow object */
13350 				old_start = entry->vme_start;
13351 				old_end = entry->vme_end;
13352 				cow_parent_vaddr = vaddr;
13353 				mapped_needs_copy = TRUE;
13354 			} else {
13355 				vm_map_lock_read(VME_SUBMAP(entry));
13356 				*var_map = VME_SUBMAP(entry);
13357 				if ((cow_sub_map_parent != map) &&
13358 				    (*real_map != map)) {
13359 					vm_map_unlock(map);
13360 				}
13361 			}
13362 		} else {
13363 			if (entry->needs_copy) {
13364 				submap_needed_copy = TRUE;
13365 			}
13366 			vm_map_lock_read(VME_SUBMAP(entry));
13367 			*var_map = VME_SUBMAP(entry);
13368 			/* leave map locked if it is a target */
13369 			/* cow sub_map above otherwise, just  */
13370 			/* follow the maps down to the object */
13371 			/* here we unlock knowing we are not  */
13372 			/* revisiting the map.  */
13373 			if ((*real_map != map) && (map != cow_sub_map_parent)) {
13374 				vm_map_unlock_read(map);
13375 			}
13376 		}
13377 
13378 		map = *var_map;
13379 
13380 		/* calculate the offset in the submap for vaddr */
13381 		local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
13382 		assertf(VM_MAP_PAGE_ALIGNED(local_vaddr, fault_page_mask),
13383 		    "local_vaddr 0x%llx entry->vme_start 0x%llx fault_page_mask 0x%llx\n",
13384 		    (uint64_t)local_vaddr, (uint64_t)entry->vme_start, (uint64_t)fault_page_mask);
13385 
13386 RetrySubMap:
13387 		if (!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
13388 			if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13389 				vm_map_unlock(cow_sub_map_parent);
13390 			}
13391 			if ((*real_map != map)
13392 			    && (*real_map != cow_sub_map_parent)) {
13393 				vm_map_unlock(*real_map);
13394 			}
13395 			*real_map = map;
13396 			return KERN_INVALID_ADDRESS;
13397 		}
13398 
13399 		/* find the attenuated shadow of the underlying object */
13400 		/* on our target map */
13401 
13402 		/* in english the submap object may extend beyond the     */
13403 		/* region mapped by the entry or, may only fill a portion */
13404 		/* of it.  For our purposes, we only care if the object   */
13405 		/* doesn't fill.  In this case the area which will        */
13406 		/* ultimately be clipped in the top map will only need    */
13407 		/* to be as big as the portion of the underlying entry    */
13408 		/* which is mapped */
13409 		start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
13410 		    submap_entry->vme_start - VME_OFFSET(entry) : 0;
13411 
13412 		end_delta =
13413 		    (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
13414 		    submap_entry->vme_end ?
13415 		    0 : (VME_OFFSET(entry) +
13416 		    (old_end - old_start))
13417 		    - submap_entry->vme_end;
13418 
13419 		old_start += start_delta;
13420 		old_end -= end_delta;
13421 
13422 		if (submap_entry->is_sub_map) {
13423 			entry = submap_entry;
13424 			vaddr = local_vaddr;
13425 			goto submap_recurse;
13426 		}
13427 
13428 		if (((fault_type & VM_PROT_WRITE) ||
13429 		    force_copy)
13430 		    && cow_sub_map_parent) {
13431 			vm_object_t     sub_object, copy_object;
13432 			vm_object_offset_t copy_offset;
13433 			vm_map_offset_t local_start;
13434 			vm_map_offset_t local_end;
13435 			boolean_t       object_copied = FALSE;
13436 			vm_object_offset_t object_copied_offset = 0;
13437 			boolean_t       object_copied_needs_copy = FALSE;
13438 			kern_return_t   kr = KERN_SUCCESS;
13439 
13440 			if (vm_map_lock_read_to_write(map)) {
13441 				vm_map_lock_read(map);
13442 				old_start -= start_delta;
13443 				old_end += end_delta;
13444 				goto RetrySubMap;
13445 			}
13446 
13447 
13448 			sub_object = VME_OBJECT(submap_entry);
13449 			if (sub_object == VM_OBJECT_NULL) {
13450 				sub_object =
13451 				    vm_object_allocate(
13452 					(vm_map_size_t)
13453 					(submap_entry->vme_end -
13454 					submap_entry->vme_start));
13455 				VME_OBJECT_SET(submap_entry, sub_object);
13456 				VME_OFFSET_SET(submap_entry, 0);
13457 				assert(!submap_entry->is_sub_map);
13458 				assert(submap_entry->use_pmap);
13459 			}
13460 			local_start =  local_vaddr -
13461 			    (cow_parent_vaddr - old_start);
13462 			local_end = local_vaddr +
13463 			    (old_end - cow_parent_vaddr);
13464 			vm_map_clip_start(map, submap_entry, local_start);
13465 			vm_map_clip_end(map, submap_entry, local_end);
13466 			if (submap_entry->is_sub_map) {
13467 				/* unnesting was done when clipping */
13468 				assert(!submap_entry->use_pmap);
13469 			}
13470 
13471 			/* This is the COW case, lets connect */
13472 			/* an entry in our space to the underlying */
13473 			/* object in the submap, bypassing the  */
13474 			/* submap. */
13475 			submap_entry_offset = VME_OFFSET(submap_entry);
13476 			submap_entry_size = submap_entry->vme_end - submap_entry->vme_start;
13477 
13478 			if ((submap_entry->wired_count != 0 ||
13479 			    sub_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) &&
13480 			    (submap_entry->protection & VM_PROT_EXECUTE) &&
13481 			    no_force_copy_if_executable) {
13482 //				printf("FBDP map %p entry %p start 0x%llx end 0x%llx wired %d strat %d\n", map, submap_entry, (uint64_t)local_start, (uint64_t)local_end, submap_entry->wired_count, sub_object->copy_strategy);
13483 				if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13484 					vm_map_unlock(cow_sub_map_parent);
13485 				}
13486 				if ((*real_map != map)
13487 				    && (*real_map != cow_sub_map_parent)) {
13488 					vm_map_unlock(*real_map);
13489 				}
13490 				*real_map = map;
13491 				kernel_triage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_SUBMAP_NO_COW_ON_EXECUTABLE), 0 /* arg */);
13492 				vm_map_lock_write_to_read(map);
13493 				kr = KERN_PROTECTION_FAILURE;
13494 				DTRACE_VM4(submap_no_copy_executable,
13495 				    vm_map_t, map,
13496 				    vm_object_offset_t, submap_entry_offset,
13497 				    vm_object_size_t, submap_entry_size,
13498 				    int, kr);
13499 				return kr;
13500 			}
13501 
13502 			if (submap_entry->wired_count != 0) {
13503 				vm_object_reference(sub_object);
13504 
13505 				assertf(VM_MAP_PAGE_ALIGNED(VME_OFFSET(submap_entry), VM_MAP_PAGE_MASK(map)),
13506 				    "submap_entry %p offset 0x%llx\n",
13507 				    submap_entry, VME_OFFSET(submap_entry));
13508 
13509 				DTRACE_VM6(submap_copy_slowly,
13510 				    vm_map_t, cow_sub_map_parent,
13511 				    vm_map_offset_t, vaddr,
13512 				    vm_map_t, map,
13513 				    vm_object_size_t, submap_entry_size,
13514 				    int, submap_entry->wired_count,
13515 				    int, sub_object->copy_strategy);
13516 
13517 				saved_submap_entry = submap_entry;
13518 				version.main_timestamp = map->timestamp;
13519 				vm_map_unlock(map); /* Increments timestamp by 1 */
13520 				submap_entry = VM_MAP_ENTRY_NULL;
13521 
13522 				vm_object_lock(sub_object);
13523 				kr = vm_object_copy_slowly(sub_object,
13524 				    submap_entry_offset,
13525 				    submap_entry_size,
13526 				    FALSE,
13527 				    &copy_object);
13528 				object_copied = TRUE;
13529 				object_copied_offset = 0;
13530 				/* 4k: account for extra offset in physical page */
13531 				object_copied_offset += submap_entry_offset - vm_object_trunc_page(submap_entry_offset);
13532 				object_copied_needs_copy = FALSE;
13533 				vm_object_deallocate(sub_object);
13534 
13535 				vm_map_lock(map);
13536 
13537 				if (kr != KERN_SUCCESS &&
13538 				    kr != KERN_MEMORY_RESTART_COPY) {
13539 					if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13540 						vm_map_unlock(cow_sub_map_parent);
13541 					}
13542 					if ((*real_map != map)
13543 					    && (*real_map != cow_sub_map_parent)) {
13544 						vm_map_unlock(*real_map);
13545 					}
13546 					*real_map = map;
13547 					vm_object_deallocate(copy_object);
13548 					copy_object = VM_OBJECT_NULL;
13549 					kernel_triage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_SUBMAP_COPY_SLOWLY_FAILED), 0 /* arg */);
13550 					vm_map_lock_write_to_read(map);
13551 					DTRACE_VM4(submap_copy_error_slowly,
13552 					    vm_object_t, sub_object,
13553 					    vm_object_offset_t, submap_entry_offset,
13554 					    vm_object_size_t, submap_entry_size,
13555 					    int, kr);
13556 					vm_map_lookup_locked_copy_slowly_error++;
13557 					return kr;
13558 				}
13559 
13560 				if ((kr == KERN_SUCCESS) &&
13561 				    (version.main_timestamp + 1) == map->timestamp) {
13562 					submap_entry = saved_submap_entry;
13563 				} else {
13564 					saved_submap_entry = NULL;
13565 					old_start -= start_delta;
13566 					old_end += end_delta;
13567 					vm_object_deallocate(copy_object);
13568 					copy_object = VM_OBJECT_NULL;
13569 					vm_map_lock_write_to_read(map);
13570 					vm_map_lookup_locked_copy_slowly_restart++;
13571 					goto RetrySubMap;
13572 				}
13573 				vm_map_lookup_locked_copy_slowly_count++;
13574 				vm_map_lookup_locked_copy_slowly_size += submap_entry_size;
13575 				if (submap_entry_size > vm_map_lookup_locked_copy_slowly_max) {
13576 					vm_map_lookup_locked_copy_slowly_max = submap_entry_size;
13577 				}
13578 			} else if (sub_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
13579 				submap_entry_offset = VME_OFFSET(submap_entry);
13580 				copy_object = VM_OBJECT_NULL;
13581 				object_copied_offset = submap_entry_offset;
13582 				object_copied_needs_copy = FALSE;
13583 				DTRACE_VM6(submap_copy_strategically,
13584 				    vm_map_t, cow_sub_map_parent,
13585 				    vm_map_offset_t, vaddr,
13586 				    vm_map_t, map,
13587 				    vm_object_size_t, submap_entry_size,
13588 				    int, submap_entry->wired_count,
13589 				    int, sub_object->copy_strategy);
13590 				kr = vm_object_copy_strategically(
13591 					sub_object,
13592 					submap_entry_offset,
13593 					submap_entry->vme_end - submap_entry->vme_start,
13594 					&copy_object,
13595 					&object_copied_offset,
13596 					&object_copied_needs_copy);
13597 				if (kr == KERN_MEMORY_RESTART_COPY) {
13598 					old_start -= start_delta;
13599 					old_end += end_delta;
13600 					vm_object_deallocate(copy_object);
13601 					copy_object = VM_OBJECT_NULL;
13602 					vm_map_lock_write_to_read(map);
13603 					vm_map_lookup_locked_copy_strategically_restart++;
13604 					goto RetrySubMap;
13605 				}
13606 				if (kr != KERN_SUCCESS) {
13607 					if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13608 						vm_map_unlock(cow_sub_map_parent);
13609 					}
13610 					if ((*real_map != map)
13611 					    && (*real_map != cow_sub_map_parent)) {
13612 						vm_map_unlock(*real_map);
13613 					}
13614 					*real_map = map;
13615 					vm_object_deallocate(copy_object);
13616 					copy_object = VM_OBJECT_NULL;
13617 					kernel_triage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_SUBMAP_COPY_STRAT_FAILED), 0 /* arg */);
13618 					vm_map_lock_write_to_read(map);
13619 					DTRACE_VM4(submap_copy_error_strategically,
13620 					    vm_object_t, sub_object,
13621 					    vm_object_offset_t, submap_entry_offset,
13622 					    vm_object_size_t, submap_entry_size,
13623 					    int, kr);
13624 					vm_map_lookup_locked_copy_strategically_error++;
13625 					return kr;
13626 				}
13627 				assert(copy_object != VM_OBJECT_NULL);
13628 				assert(copy_object != sub_object);
13629 				object_copied = TRUE;
13630 				vm_map_lookup_locked_copy_strategically_count++;
13631 				vm_map_lookup_locked_copy_strategically_size += submap_entry_size;
13632 				if (submap_entry_size > vm_map_lookup_locked_copy_strategically_max) {
13633 					vm_map_lookup_locked_copy_strategically_max = submap_entry_size;
13634 				}
13635 			} else {
13636 				/* set up shadow object */
13637 				object_copied = FALSE;
13638 				copy_object = sub_object;
13639 				vm_object_lock(sub_object);
13640 				vm_object_reference_locked(sub_object);
13641 				sub_object->shadowed = TRUE;
13642 				vm_object_unlock(sub_object);
13643 
13644 				assert(submap_entry->wired_count == 0);
13645 				submap_entry->needs_copy = TRUE;
13646 
13647 				prot = submap_entry->protection;
13648 				assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
13649 				prot = prot & ~VM_PROT_WRITE;
13650 				assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
13651 
13652 				if (override_nx(old_map,
13653 				    VME_ALIAS(submap_entry))
13654 				    && prot) {
13655 					prot |= VM_PROT_EXECUTE;
13656 				}
13657 
13658 				vm_object_pmap_protect(
13659 					sub_object,
13660 					VME_OFFSET(submap_entry),
13661 					submap_entry->vme_end -
13662 					submap_entry->vme_start,
13663 					(submap_entry->is_shared
13664 					|| map->mapped_in_other_pmaps) ?
13665 					PMAP_NULL : map->pmap,
13666 					VM_MAP_PAGE_SIZE(map),
13667 					submap_entry->vme_start,
13668 					prot);
13669 				vm_map_lookup_locked_copy_shadow_count++;
13670 				vm_map_lookup_locked_copy_shadow_size += submap_entry_size;
13671 				if (submap_entry_size > vm_map_lookup_locked_copy_shadow_max) {
13672 					vm_map_lookup_locked_copy_shadow_max = submap_entry_size;
13673 				}
13674 			}
13675 
13676 			/*
13677 			 * Adjust the fault offset to the submap entry.
13678 			 */
13679 			copy_offset = (local_vaddr -
13680 			    submap_entry->vme_start +
13681 			    VME_OFFSET(submap_entry));
13682 
13683 			/* This works diffently than the   */
13684 			/* normal submap case. We go back  */
13685 			/* to the parent of the cow map and*/
13686 			/* clip out the target portion of  */
13687 			/* the sub_map, substituting the   */
13688 			/* new copy object,                */
13689 
13690 			subentry_protection = submap_entry->protection;
13691 			subentry_max_protection = submap_entry->max_protection;
13692 			subentry_no_copy_on_read = submap_entry->vme_no_copy_on_read;
13693 			vm_map_unlock(map);
13694 			submap_entry = NULL; /* not valid after map unlock */
13695 
13696 			local_start = old_start;
13697 			local_end = old_end;
13698 			map = cow_sub_map_parent;
13699 			*var_map = cow_sub_map_parent;
13700 			vaddr = cow_parent_vaddr;
13701 			cow_sub_map_parent = NULL;
13702 
13703 			if (!vm_map_lookup_entry(map,
13704 			    vaddr, &entry)) {
13705 				if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13706 					vm_map_unlock(cow_sub_map_parent);
13707 				}
13708 				if ((*real_map != map)
13709 				    && (*real_map != cow_sub_map_parent)) {
13710 					vm_map_unlock(*real_map);
13711 				}
13712 				*real_map = map;
13713 				vm_object_deallocate(
13714 					copy_object);
13715 				copy_object = VM_OBJECT_NULL;
13716 				vm_map_lock_write_to_read(map);
13717 				DTRACE_VM4(submap_lookup_post_unlock,
13718 				    uint64_t, (uint64_t)entry->vme_start,
13719 				    uint64_t, (uint64_t)entry->vme_end,
13720 				    vm_map_offset_t, vaddr,
13721 				    int, object_copied);
13722 				return KERN_INVALID_ADDRESS;
13723 			}
13724 
13725 			/* clip out the portion of space */
13726 			/* mapped by the sub map which   */
13727 			/* corresponds to the underlying */
13728 			/* object */
13729 
13730 			/*
13731 			 * Clip (and unnest) the smallest nested chunk
13732 			 * possible around the faulting address...
13733 			 */
13734 			local_start = vaddr & ~(pmap_shared_region_size_min(map->pmap) - 1);
13735 			local_end = local_start + pmap_shared_region_size_min(map->pmap);
13736 			/*
13737 			 * ... but don't go beyond the "old_start" to "old_end"
13738 			 * range, to avoid spanning over another VM region
13739 			 * with a possibly different VM object and/or offset.
13740 			 */
13741 			if (local_start < old_start) {
13742 				local_start = old_start;
13743 			}
13744 			if (local_end > old_end) {
13745 				local_end = old_end;
13746 			}
13747 			/*
13748 			 * Adjust copy_offset to the start of the range.
13749 			 */
13750 			copy_offset -= (vaddr - local_start);
13751 
13752 			vm_map_clip_start(map, entry, local_start);
13753 			vm_map_clip_end(map, entry, local_end);
13754 			if (entry->is_sub_map) {
13755 				/* unnesting was done when clipping */
13756 				assert(!entry->use_pmap);
13757 			}
13758 
13759 			/* substitute copy object for */
13760 			/* shared map entry           */
13761 			vm_map_deallocate(VME_SUBMAP(entry));
13762 			assert(!entry->iokit_acct);
13763 			entry->is_sub_map = FALSE;
13764 			entry->use_pmap = TRUE;
13765 			VME_OBJECT_SET(entry, copy_object);
13766 
13767 			/* propagate the submap entry's protections */
13768 			if (entry->protection != VM_PROT_READ) {
13769 				/*
13770 				 * Someone has already altered the top entry's
13771 				 * protections via vm_protect(VM_PROT_COPY).
13772 				 * Respect these new values and ignore the
13773 				 * submap entry's protections.
13774 				 */
13775 			} else {
13776 				/*
13777 				 * Regular copy-on-write: propagate the submap
13778 				 * entry's protections to the top map entry.
13779 				 */
13780 				entry->protection |= subentry_protection;
13781 			}
13782 			entry->max_protection |= subentry_max_protection;
13783 			/* propagate no_copy_on_read */
13784 			entry->vme_no_copy_on_read = subentry_no_copy_on_read;
13785 
13786 			if ((entry->protection & VM_PROT_WRITE) &&
13787 			    (entry->protection & VM_PROT_EXECUTE) &&
13788 #if XNU_TARGET_OS_OSX
13789 			    map->pmap != kernel_pmap &&
13790 			    (vm_map_cs_enforcement(map)
13791 #if __arm64__
13792 			    || !VM_MAP_IS_EXOTIC(map)
13793 #endif /* __arm64__ */
13794 			    ) &&
13795 #endif /* XNU_TARGET_OS_OSX */
13796 			    !(entry->used_for_jit) &&
13797 			    VM_MAP_POLICY_WX_STRIP_X(map)) {
13798 				DTRACE_VM3(cs_wx,
13799 				    uint64_t, (uint64_t)entry->vme_start,
13800 				    uint64_t, (uint64_t)entry->vme_end,
13801 				    vm_prot_t, entry->protection);
13802 				printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
13803 				    proc_selfpid(),
13804 				    (current_task()->bsd_info
13805 				    ? proc_name_address(current_task()->bsd_info)
13806 				    : "?"),
13807 				    __FUNCTION__);
13808 				entry->protection &= ~VM_PROT_EXECUTE;
13809 			}
13810 
13811 			if (object_copied) {
13812 				VME_OFFSET_SET(entry, local_start - old_start + object_copied_offset);
13813 				entry->needs_copy = object_copied_needs_copy;
13814 				entry->is_shared = FALSE;
13815 			} else {
13816 				assert(VME_OBJECT(entry) != VM_OBJECT_NULL);
13817 				assert(VME_OBJECT(entry)->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
13818 				assert(entry->wired_count == 0);
13819 				VME_OFFSET_SET(entry, copy_offset);
13820 				entry->needs_copy = TRUE;
13821 				if (map != old_map) {
13822 					entry->is_shared = TRUE;
13823 				}
13824 			}
13825 			if (entry->inheritance == VM_INHERIT_SHARE) {
13826 				entry->inheritance = VM_INHERIT_COPY;
13827 			}
13828 
13829 			vm_map_lock_write_to_read(map);
13830 		} else {
13831 			if ((cow_sub_map_parent)
13832 			    && (cow_sub_map_parent != *real_map)
13833 			    && (cow_sub_map_parent != map)) {
13834 				vm_map_unlock(cow_sub_map_parent);
13835 			}
13836 			entry = submap_entry;
13837 			vaddr = local_vaddr;
13838 		}
13839 	}
13840 
13841 	/*
13842 	 *	Check whether this task is allowed to have
13843 	 *	this page.
13844 	 */
13845 
13846 	prot = entry->protection;
13847 
13848 	if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
13849 		/*
13850 		 * HACK -- if not a stack, then allow execution
13851 		 */
13852 		prot |= VM_PROT_EXECUTE;
13853 	}
13854 
13855 	if (mask_protections) {
13856 		fault_type &= prot;
13857 		if (fault_type == VM_PROT_NONE) {
13858 			goto protection_failure;
13859 		}
13860 	}
13861 	if (((fault_type & prot) != fault_type)
13862 #if __arm64__
13863 	    /* prefetch abort in execute-only page */
13864 	    && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
13865 #elif defined(__x86_64__)
13866 	    /* Consider the UEXEC bit when handling an EXECUTE fault */
13867 	    && !((fault_type & VM_PROT_EXECUTE) && !(prot & VM_PROT_EXECUTE) && (prot & VM_PROT_UEXEC))
13868 #endif
13869 	    ) {
13870 protection_failure:
13871 		if (*real_map != map) {
13872 			vm_map_unlock(*real_map);
13873 		}
13874 		*real_map = map;
13875 
13876 		if ((fault_type & VM_PROT_EXECUTE) && prot) {
13877 			log_stack_execution_failure((addr64_t)vaddr, prot);
13878 		}
13879 
13880 		DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
13881 		DTRACE_VM3(prot_fault_detailed, vm_prot_t, fault_type, vm_prot_t, prot, void *, vaddr);
13882 		/*
13883 		 * Noisy (esp. internally) and can be inferred from CrashReports. So OFF for now.
13884 		 *
13885 		 * kernel_triage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_PROTECTION_FAILURE), 0);
13886 		 */
13887 		return KERN_PROTECTION_FAILURE;
13888 	}
13889 
13890 	/*
13891 	 *	If this page is not pageable, we have to get
13892 	 *	it for all possible accesses.
13893 	 */
13894 
13895 	*wired = (entry->wired_count != 0);
13896 	if (*wired) {
13897 		fault_type = prot;
13898 	}
13899 
13900 	/*
13901 	 *	If the entry was copy-on-write, we either ...
13902 	 */
13903 
13904 	if (entry->needs_copy) {
13905 		/*
13906 		 *	If we want to write the page, we may as well
13907 		 *	handle that now since we've got the map locked.
13908 		 *
13909 		 *	If we don't need to write the page, we just
13910 		 *	demote the permissions allowed.
13911 		 */
13912 
13913 		if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
13914 			/*
13915 			 *	Make a new object, and place it in the
13916 			 *	object chain.  Note that no new references
13917 			 *	have appeared -- one just moved from the
13918 			 *	map to the new object.
13919 			 */
13920 
13921 			if (vm_map_lock_read_to_write(map)) {
13922 				vm_map_lock_read(map);
13923 				goto RetryLookup;
13924 			}
13925 
13926 			if (VME_OBJECT(entry)->shadowed == FALSE) {
13927 				vm_object_lock(VME_OBJECT(entry));
13928 				VME_OBJECT(entry)->shadowed = TRUE;
13929 				vm_object_unlock(VME_OBJECT(entry));
13930 			}
13931 			VME_OBJECT_SHADOW(entry,
13932 			    (vm_map_size_t) (entry->vme_end -
13933 			    entry->vme_start));
13934 			entry->needs_copy = FALSE;
13935 
13936 			vm_map_lock_write_to_read(map);
13937 		}
13938 		if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
13939 			/*
13940 			 *	We're attempting to read a copy-on-write
13941 			 *	page -- don't allow writes.
13942 			 */
13943 
13944 			prot &= (~VM_PROT_WRITE);
13945 		}
13946 	}
13947 
13948 	if (submap_needed_copy && (prot & VM_PROT_WRITE)) {
13949 		/*
13950 		 * We went through a "needs_copy" submap without triggering
13951 		 * a copy, so granting write access to the page would bypass
13952 		 * that submap's "needs_copy".
13953 		 */
13954 		assert(!(fault_type & VM_PROT_WRITE));
13955 		assert(!*wired);
13956 		assert(!force_copy);
13957 		// printf("FBDP %d[%s] submap_needed_copy for %p 0x%llx\n", proc_selfpid(), proc_name_address(current_task()->bsd_info), map, vaddr);
13958 		prot &= ~VM_PROT_WRITE;
13959 	}
13960 
13961 	/*
13962 	 *	Create an object if necessary.
13963 	 */
13964 	if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
13965 		if (vm_map_lock_read_to_write(map)) {
13966 			vm_map_lock_read(map);
13967 			goto RetryLookup;
13968 		}
13969 
13970 		VME_OBJECT_SET(entry,
13971 		    vm_object_allocate(
13972 			    (vm_map_size_t)(entry->vme_end -
13973 			    entry->vme_start)));
13974 		VME_OFFSET_SET(entry, 0);
13975 		assert(entry->use_pmap);
13976 		vm_map_lock_write_to_read(map);
13977 	}
13978 
13979 	/*
13980 	 *	Return the object/offset from this entry.  If the entry
13981 	 *	was copy-on-write or empty, it has been fixed up.  Also
13982 	 *	return the protection.
13983 	 */
13984 
13985 	*offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
13986 	*object = VME_OBJECT(entry);
13987 	*out_prot = prot;
13988 	KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_MAP_LOOKUP_OBJECT), VM_KERNEL_UNSLIDE_OR_PERM(*object), (unsigned long) VME_ALIAS(entry), 0, 0);
13989 
13990 	if (fault_info) {
13991 		fault_info->interruptible = THREAD_UNINT; /* for now... */
13992 		/* ... the caller will change "interruptible" if needed */
13993 		fault_info->cluster_size = 0;
13994 		fault_info->user_tag = VME_ALIAS(entry);
13995 		fault_info->pmap_options = 0;
13996 		if (entry->iokit_acct ||
13997 		    (!entry->is_sub_map && !entry->use_pmap)) {
13998 			fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
13999 		}
14000 		fault_info->behavior = entry->behavior;
14001 		fault_info->lo_offset = VME_OFFSET(entry);
14002 		fault_info->hi_offset =
14003 		    (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
14004 		fault_info->no_cache  = entry->no_cache;
14005 		fault_info->stealth = FALSE;
14006 		fault_info->io_sync = FALSE;
14007 		if (entry->used_for_jit ||
14008 		    entry->vme_resilient_codesign) {
14009 			fault_info->cs_bypass = TRUE;
14010 		} else {
14011 			fault_info->cs_bypass = FALSE;
14012 		}
14013 		fault_info->pmap_cs_associated = FALSE;
14014 #if CONFIG_PMAP_CS
14015 		if (entry->pmap_cs_associated) {
14016 			/*
14017 			 * The pmap layer will validate this page
14018 			 * before allowing it to be executed from.
14019 			 */
14020 			fault_info->pmap_cs_associated = TRUE;
14021 		}
14022 #endif /* CONFIG_PMAP_CS */
14023 		fault_info->mark_zf_absent = FALSE;
14024 		fault_info->batch_pmap_op = FALSE;
14025 		fault_info->resilient_media = entry->vme_resilient_media;
14026 		fault_info->no_copy_on_read = entry->vme_no_copy_on_read;
14027 		if (entry->translated_allow_execute) {
14028 			fault_info->pmap_options |= PMAP_OPTIONS_TRANSLATED_ALLOW_EXECUTE;
14029 		}
14030 	}
14031 
14032 	/*
14033 	 *	Lock the object to prevent it from disappearing
14034 	 */
14035 	if (object_lock_type == OBJECT_LOCK_EXCLUSIVE) {
14036 		if (contended == NULL) {
14037 			vm_object_lock(*object);
14038 		} else {
14039 			*contended = vm_object_lock_check_contended(*object);
14040 		}
14041 	} else {
14042 		vm_object_lock_shared(*object);
14043 	}
14044 
14045 	/*
14046 	 *	Save the version number
14047 	 */
14048 
14049 	out_version->main_timestamp = map->timestamp;
14050 
14051 	return KERN_SUCCESS;
14052 }
14053 
14054 
14055 /*
14056  *	vm_map_verify:
14057  *
14058  *	Verifies that the map in question has not changed
14059  *	since the given version. The map has to be locked
14060  *	("shared" mode is fine) before calling this function
14061  *	and it will be returned locked too.
14062  */
14063 boolean_t
vm_map_verify(vm_map_t map,vm_map_version_t * version)14064 vm_map_verify(
14065 	vm_map_t                map,
14066 	vm_map_version_t        *version)       /* REF */
14067 {
14068 	boolean_t       result;
14069 
14070 	vm_map_lock_assert_held(map);
14071 	result = (map->timestamp == version->main_timestamp);
14072 
14073 	return result;
14074 }
14075 
14076 /*
14077  *	TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
14078  *	Goes away after regular vm_region_recurse function migrates to
14079  *	64 bits
14080  *	vm_region_recurse: A form of vm_region which follows the
14081  *	submaps in a target map
14082  *
14083  */
14084 
14085 kern_return_t
vm_map_region_recurse_64(vm_map_t map,vm_map_offset_t * address,vm_map_size_t * size,natural_t * nesting_depth,vm_region_submap_info_64_t submap_info,mach_msg_type_number_t * count)14086 vm_map_region_recurse_64(
14087 	vm_map_t                 map,
14088 	vm_map_offset_t *address,               /* IN/OUT */
14089 	vm_map_size_t           *size,                  /* OUT */
14090 	natural_t               *nesting_depth, /* IN/OUT */
14091 	vm_region_submap_info_64_t      submap_info,    /* IN/OUT */
14092 	mach_msg_type_number_t  *count) /* IN/OUT */
14093 {
14094 	mach_msg_type_number_t  original_count;
14095 	vm_region_extended_info_data_t  extended;
14096 	vm_map_entry_t                  tmp_entry;
14097 	vm_map_offset_t                 user_address;
14098 	unsigned int                    user_max_depth;
14099 
14100 	/*
14101 	 * "curr_entry" is the VM map entry preceding or including the
14102 	 * address we're looking for.
14103 	 * "curr_map" is the map or sub-map containing "curr_entry".
14104 	 * "curr_address" is the equivalent of the top map's "user_address"
14105 	 * in the current map.
14106 	 * "curr_offset" is the cumulated offset of "curr_map" in the
14107 	 * target task's address space.
14108 	 * "curr_depth" is the depth of "curr_map" in the chain of
14109 	 * sub-maps.
14110 	 *
14111 	 * "curr_max_below" and "curr_max_above" limit the range (around
14112 	 * "curr_address") we should take into account in the current (sub)map.
14113 	 * They limit the range to what's visible through the map entries
14114 	 * we've traversed from the top map to the current map.
14115 	 *
14116 	 */
14117 	vm_map_entry_t                  curr_entry;
14118 	vm_map_address_t                curr_address;
14119 	vm_map_offset_t                 curr_offset;
14120 	vm_map_t                        curr_map;
14121 	unsigned int                    curr_depth;
14122 	vm_map_offset_t                 curr_max_below, curr_max_above;
14123 	vm_map_offset_t                 curr_skip;
14124 
14125 	/*
14126 	 * "next_" is the same as "curr_" but for the VM region immediately
14127 	 * after the address we're looking for.  We need to keep track of this
14128 	 * too because we want to return info about that region if the
14129 	 * address we're looking for is not mapped.
14130 	 */
14131 	vm_map_entry_t                  next_entry;
14132 	vm_map_offset_t                 next_offset;
14133 	vm_map_offset_t                 next_address;
14134 	vm_map_t                        next_map;
14135 	unsigned int                    next_depth;
14136 	vm_map_offset_t                 next_max_below, next_max_above;
14137 	vm_map_offset_t                 next_skip;
14138 
14139 	boolean_t                       look_for_pages;
14140 	vm_region_submap_short_info_64_t short_info;
14141 	boolean_t                       do_region_footprint;
14142 	int                             effective_page_size, effective_page_shift;
14143 	boolean_t                       submap_needed_copy;
14144 
14145 	if (map == VM_MAP_NULL) {
14146 		/* no address space to work on */
14147 		return KERN_INVALID_ARGUMENT;
14148 	}
14149 
14150 	effective_page_shift = vm_self_region_page_shift(map);
14151 	effective_page_size = (1 << effective_page_shift);
14152 
14153 	if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
14154 		/*
14155 		 * "info" structure is not big enough and
14156 		 * would overflow
14157 		 */
14158 		return KERN_INVALID_ARGUMENT;
14159 	}
14160 
14161 	do_region_footprint = task_self_region_footprint();
14162 	original_count = *count;
14163 
14164 	if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
14165 		*count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
14166 		look_for_pages = FALSE;
14167 		short_info = (vm_region_submap_short_info_64_t) submap_info;
14168 		submap_info = NULL;
14169 	} else {
14170 		look_for_pages = TRUE;
14171 		*count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
14172 		short_info = NULL;
14173 
14174 		if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
14175 			*count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
14176 		}
14177 		if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
14178 			*count = VM_REGION_SUBMAP_INFO_V2_COUNT_64;
14179 		}
14180 	}
14181 
14182 	user_address = *address;
14183 	user_max_depth = *nesting_depth;
14184 	submap_needed_copy = FALSE;
14185 
14186 	if (not_in_kdp) {
14187 		vm_map_lock_read(map);
14188 	}
14189 
14190 recurse_again:
14191 	curr_entry = NULL;
14192 	curr_map = map;
14193 	curr_address = user_address;
14194 	curr_offset = 0;
14195 	curr_skip = 0;
14196 	curr_depth = 0;
14197 	curr_max_above = ((vm_map_offset_t) -1) - curr_address;
14198 	curr_max_below = curr_address;
14199 
14200 	next_entry = NULL;
14201 	next_map = NULL;
14202 	next_address = 0;
14203 	next_offset = 0;
14204 	next_skip = 0;
14205 	next_depth = 0;
14206 	next_max_above = (vm_map_offset_t) -1;
14207 	next_max_below = (vm_map_offset_t) -1;
14208 
14209 	for (;;) {
14210 		if (vm_map_lookup_entry(curr_map,
14211 		    curr_address,
14212 		    &tmp_entry)) {
14213 			/* tmp_entry contains the address we're looking for */
14214 			curr_entry = tmp_entry;
14215 		} else {
14216 			vm_map_offset_t skip;
14217 			/*
14218 			 * The address is not mapped.  "tmp_entry" is the
14219 			 * map entry preceding the address.  We want the next
14220 			 * one, if it exists.
14221 			 */
14222 			curr_entry = tmp_entry->vme_next;
14223 
14224 			if (curr_entry == vm_map_to_entry(curr_map) ||
14225 			    (curr_entry->vme_start >=
14226 			    curr_address + curr_max_above)) {
14227 				/* no next entry at this level: stop looking */
14228 				if (not_in_kdp) {
14229 					vm_map_unlock_read(curr_map);
14230 				}
14231 				curr_entry = NULL;
14232 				curr_map = NULL;
14233 				curr_skip = 0;
14234 				curr_offset = 0;
14235 				curr_depth = 0;
14236 				curr_max_above = 0;
14237 				curr_max_below = 0;
14238 				break;
14239 			}
14240 
14241 			/* adjust current address and offset */
14242 			skip = curr_entry->vme_start - curr_address;
14243 			curr_address = curr_entry->vme_start;
14244 			curr_skip += skip;
14245 			curr_offset += skip;
14246 			curr_max_above -= skip;
14247 			curr_max_below = 0;
14248 		}
14249 
14250 		/*
14251 		 * Is the next entry at this level closer to the address (or
14252 		 * deeper in the submap chain) than the one we had
14253 		 * so far ?
14254 		 */
14255 		tmp_entry = curr_entry->vme_next;
14256 		if (tmp_entry == vm_map_to_entry(curr_map)) {
14257 			/* no next entry at this level */
14258 		} else if (tmp_entry->vme_start >=
14259 		    curr_address + curr_max_above) {
14260 			/*
14261 			 * tmp_entry is beyond the scope of what we mapped of
14262 			 * this submap in the upper level: ignore it.
14263 			 */
14264 		} else if ((next_entry == NULL) ||
14265 		    (tmp_entry->vme_start + curr_offset <=
14266 		    next_entry->vme_start + next_offset)) {
14267 			/*
14268 			 * We didn't have a "next_entry" or this one is
14269 			 * closer to the address we're looking for:
14270 			 * use this "tmp_entry" as the new "next_entry".
14271 			 */
14272 			if (next_entry != NULL) {
14273 				/* unlock the last "next_map" */
14274 				if (next_map != curr_map && not_in_kdp) {
14275 					vm_map_unlock_read(next_map);
14276 				}
14277 			}
14278 			next_entry = tmp_entry;
14279 			next_map = curr_map;
14280 			next_depth = curr_depth;
14281 			next_address = next_entry->vme_start;
14282 			next_skip = curr_skip;
14283 			next_skip += (next_address - curr_address);
14284 			next_offset = curr_offset;
14285 			next_offset += (next_address - curr_address);
14286 			next_max_above = MIN(next_max_above, curr_max_above);
14287 			next_max_above = MIN(next_max_above,
14288 			    next_entry->vme_end - next_address);
14289 			next_max_below = MIN(next_max_below, curr_max_below);
14290 			next_max_below = MIN(next_max_below,
14291 			    next_address - next_entry->vme_start);
14292 		}
14293 
14294 		/*
14295 		 * "curr_max_{above,below}" allow us to keep track of the
14296 		 * portion of the submap that is actually mapped at this level:
14297 		 * the rest of that submap is irrelevant to us, since it's not
14298 		 * mapped here.
14299 		 * The relevant portion of the map starts at
14300 		 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
14301 		 */
14302 		curr_max_above = MIN(curr_max_above,
14303 		    curr_entry->vme_end - curr_address);
14304 		curr_max_below = MIN(curr_max_below,
14305 		    curr_address - curr_entry->vme_start);
14306 
14307 		if (!curr_entry->is_sub_map ||
14308 		    curr_depth >= user_max_depth) {
14309 			/*
14310 			 * We hit a leaf map or we reached the maximum depth
14311 			 * we could, so stop looking.  Keep the current map
14312 			 * locked.
14313 			 */
14314 			break;
14315 		}
14316 
14317 		/*
14318 		 * Get down to the next submap level.
14319 		 */
14320 
14321 		if (curr_entry->needs_copy) {
14322 			/* everything below this is effectively copy-on-write */
14323 			submap_needed_copy = TRUE;
14324 		}
14325 
14326 		/*
14327 		 * Lock the next level and unlock the current level,
14328 		 * unless we need to keep it locked to access the "next_entry"
14329 		 * later.
14330 		 */
14331 		if (not_in_kdp) {
14332 			vm_map_lock_read(VME_SUBMAP(curr_entry));
14333 		}
14334 		if (curr_map == next_map) {
14335 			/* keep "next_map" locked in case we need it */
14336 		} else {
14337 			/* release this map */
14338 			if (not_in_kdp) {
14339 				vm_map_unlock_read(curr_map);
14340 			}
14341 		}
14342 
14343 		/*
14344 		 * Adjust the offset.  "curr_entry" maps the submap
14345 		 * at relative address "curr_entry->vme_start" in the
14346 		 * curr_map but skips the first "VME_OFFSET(curr_entry)"
14347 		 * bytes of the submap.
14348 		 * "curr_offset" always represents the offset of a virtual
14349 		 * address in the curr_map relative to the absolute address
14350 		 * space (i.e. the top-level VM map).
14351 		 */
14352 		curr_offset +=
14353 		    (VME_OFFSET(curr_entry) - curr_entry->vme_start);
14354 		curr_address = user_address + curr_offset;
14355 		/* switch to the submap */
14356 		curr_map = VME_SUBMAP(curr_entry);
14357 		curr_depth++;
14358 		curr_entry = NULL;
14359 	}
14360 
14361 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
14362 // so probably should be a real 32b ID vs. ptr.
14363 // Current users just check for equality
14364 
14365 	if (curr_entry == NULL) {
14366 		/* no VM region contains the address... */
14367 
14368 		if (do_region_footprint && /* we want footprint numbers */
14369 		    next_entry == NULL && /* & there are no more regions */
14370 		    /* & we haven't already provided our fake region: */
14371 		    user_address <= vm_map_last_entry(map)->vme_end) {
14372 			ledger_amount_t ledger_resident, ledger_compressed;
14373 
14374 			/*
14375 			 * Add a fake memory region to account for
14376 			 * purgeable and/or ledger-tagged memory that
14377 			 * counts towards this task's memory footprint,
14378 			 * i.e. the resident/compressed pages of non-volatile
14379 			 * objects owned by that task.
14380 			 */
14381 			task_ledgers_footprint(map->pmap->ledger,
14382 			    &ledger_resident,
14383 			    &ledger_compressed);
14384 			if (ledger_resident + ledger_compressed == 0) {
14385 				/* no purgeable memory usage to report */
14386 				return KERN_INVALID_ADDRESS;
14387 			}
14388 			/* fake region to show nonvolatile footprint */
14389 			if (look_for_pages) {
14390 				submap_info->protection = VM_PROT_DEFAULT;
14391 				submap_info->max_protection = VM_PROT_DEFAULT;
14392 				submap_info->inheritance = VM_INHERIT_DEFAULT;
14393 				submap_info->offset = 0;
14394 				submap_info->user_tag = -1;
14395 				submap_info->pages_resident = (unsigned int) (ledger_resident / effective_page_size);
14396 				submap_info->pages_shared_now_private = 0;
14397 				submap_info->pages_swapped_out = (unsigned int) (ledger_compressed / effective_page_size);
14398 				submap_info->pages_dirtied = submap_info->pages_resident;
14399 				submap_info->ref_count = 1;
14400 				submap_info->shadow_depth = 0;
14401 				submap_info->external_pager = 0;
14402 				submap_info->share_mode = SM_PRIVATE;
14403 				if (submap_needed_copy) {
14404 					submap_info->share_mode = SM_COW;
14405 				}
14406 				submap_info->is_submap = 0;
14407 				submap_info->behavior = VM_BEHAVIOR_DEFAULT;
14408 				submap_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
14409 				submap_info->user_wired_count = 0;
14410 				submap_info->pages_reusable = 0;
14411 			} else {
14412 				short_info->user_tag = -1;
14413 				short_info->offset = 0;
14414 				short_info->protection = VM_PROT_DEFAULT;
14415 				short_info->inheritance = VM_INHERIT_DEFAULT;
14416 				short_info->max_protection = VM_PROT_DEFAULT;
14417 				short_info->behavior = VM_BEHAVIOR_DEFAULT;
14418 				short_info->user_wired_count = 0;
14419 				short_info->is_submap = 0;
14420 				short_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
14421 				short_info->external_pager = 0;
14422 				short_info->shadow_depth = 0;
14423 				short_info->share_mode = SM_PRIVATE;
14424 				if (submap_needed_copy) {
14425 					short_info->share_mode = SM_COW;
14426 				}
14427 				short_info->ref_count = 1;
14428 			}
14429 			*nesting_depth = 0;
14430 			*size = (vm_map_size_t) (ledger_resident + ledger_compressed);
14431 //			*address = user_address;
14432 			*address = vm_map_last_entry(map)->vme_end;
14433 			return KERN_SUCCESS;
14434 		}
14435 
14436 		if (next_entry == NULL) {
14437 			/* ... and no VM region follows it either */
14438 			return KERN_INVALID_ADDRESS;
14439 		}
14440 		/* ... gather info about the next VM region */
14441 		curr_entry = next_entry;
14442 		curr_map = next_map;    /* still locked ... */
14443 		curr_address = next_address;
14444 		curr_skip = next_skip;
14445 		curr_offset = next_offset;
14446 		curr_depth = next_depth;
14447 		curr_max_above = next_max_above;
14448 		curr_max_below = next_max_below;
14449 	} else {
14450 		/* we won't need "next_entry" after all */
14451 		if (next_entry != NULL) {
14452 			/* release "next_map" */
14453 			if (next_map != curr_map && not_in_kdp) {
14454 				vm_map_unlock_read(next_map);
14455 			}
14456 		}
14457 	}
14458 	next_entry = NULL;
14459 	next_map = NULL;
14460 	next_offset = 0;
14461 	next_skip = 0;
14462 	next_depth = 0;
14463 	next_max_below = -1;
14464 	next_max_above = -1;
14465 
14466 	if (curr_entry->is_sub_map &&
14467 	    curr_depth < user_max_depth) {
14468 		/*
14469 		 * We're not as deep as we could be:  we must have
14470 		 * gone back up after not finding anything mapped
14471 		 * below the original top-level map entry's.
14472 		 * Let's move "curr_address" forward and recurse again.
14473 		 */
14474 		user_address = curr_address;
14475 		goto recurse_again;
14476 	}
14477 
14478 	*nesting_depth = curr_depth;
14479 	*size = curr_max_above + curr_max_below;
14480 	*address = user_address + curr_skip - curr_max_below;
14481 
14482 	if (look_for_pages) {
14483 		submap_info->user_tag = VME_ALIAS(curr_entry);
14484 		submap_info->offset = VME_OFFSET(curr_entry);
14485 		submap_info->protection = curr_entry->protection;
14486 		submap_info->inheritance = curr_entry->inheritance;
14487 		submap_info->max_protection = curr_entry->max_protection;
14488 		submap_info->behavior = curr_entry->behavior;
14489 		submap_info->user_wired_count = curr_entry->user_wired_count;
14490 		submap_info->is_submap = curr_entry->is_sub_map;
14491 		submap_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
14492 	} else {
14493 		short_info->user_tag = VME_ALIAS(curr_entry);
14494 		short_info->offset = VME_OFFSET(curr_entry);
14495 		short_info->protection = curr_entry->protection;
14496 		short_info->inheritance = curr_entry->inheritance;
14497 		short_info->max_protection = curr_entry->max_protection;
14498 		short_info->behavior = curr_entry->behavior;
14499 		short_info->user_wired_count = curr_entry->user_wired_count;
14500 		short_info->is_submap = curr_entry->is_sub_map;
14501 		short_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
14502 	}
14503 
14504 	extended.pages_resident = 0;
14505 	extended.pages_swapped_out = 0;
14506 	extended.pages_shared_now_private = 0;
14507 	extended.pages_dirtied = 0;
14508 	extended.pages_reusable = 0;
14509 	extended.external_pager = 0;
14510 	extended.shadow_depth = 0;
14511 	extended.share_mode = SM_EMPTY;
14512 	extended.ref_count = 0;
14513 
14514 	if (not_in_kdp) {
14515 		if (!curr_entry->is_sub_map) {
14516 			vm_map_offset_t range_start, range_end;
14517 			range_start = MAX((curr_address - curr_max_below),
14518 			    curr_entry->vme_start);
14519 			range_end = MIN((curr_address + curr_max_above),
14520 			    curr_entry->vme_end);
14521 			vm_map_region_walk(curr_map,
14522 			    range_start,
14523 			    curr_entry,
14524 			    (VME_OFFSET(curr_entry) +
14525 			    (range_start -
14526 			    curr_entry->vme_start)),
14527 			    range_end - range_start,
14528 			    &extended,
14529 			    look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
14530 			if (extended.external_pager &&
14531 			    extended.ref_count == 2 &&
14532 			    extended.share_mode == SM_SHARED) {
14533 				extended.share_mode = SM_PRIVATE;
14534 			}
14535 			if (submap_needed_copy) {
14536 				extended.share_mode = SM_COW;
14537 			}
14538 		} else {
14539 			if (curr_entry->use_pmap) {
14540 				extended.share_mode = SM_TRUESHARED;
14541 			} else {
14542 				extended.share_mode = SM_PRIVATE;
14543 			}
14544 			extended.ref_count = os_ref_get_count_raw(&VME_SUBMAP(curr_entry)->map_refcnt);
14545 		}
14546 	}
14547 
14548 	if (look_for_pages) {
14549 		submap_info->pages_resident = extended.pages_resident;
14550 		submap_info->pages_swapped_out = extended.pages_swapped_out;
14551 		submap_info->pages_shared_now_private =
14552 		    extended.pages_shared_now_private;
14553 		submap_info->pages_dirtied = extended.pages_dirtied;
14554 		submap_info->external_pager = extended.external_pager;
14555 		submap_info->shadow_depth = extended.shadow_depth;
14556 		submap_info->share_mode = extended.share_mode;
14557 		submap_info->ref_count = extended.ref_count;
14558 
14559 		if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
14560 			submap_info->pages_reusable = extended.pages_reusable;
14561 		}
14562 		if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
14563 			submap_info->object_id_full = (vm_object_id_t) (VME_OBJECT(curr_entry) != NULL) ? VM_KERNEL_ADDRPERM(VME_OBJECT(curr_entry)) : 0ULL;
14564 		}
14565 	} else {
14566 		short_info->external_pager = extended.external_pager;
14567 		short_info->shadow_depth = extended.shadow_depth;
14568 		short_info->share_mode = extended.share_mode;
14569 		short_info->ref_count = extended.ref_count;
14570 	}
14571 
14572 	if (not_in_kdp) {
14573 		vm_map_unlock_read(curr_map);
14574 	}
14575 
14576 	return KERN_SUCCESS;
14577 }
14578 
14579 /*
14580  *	vm_region:
14581  *
14582  *	User call to obtain information about a region in
14583  *	a task's address map. Currently, only one flavor is
14584  *	supported.
14585  *
14586  *	XXX The reserved and behavior fields cannot be filled
14587  *	    in until the vm merge from the IK is completed, and
14588  *	    vm_reserve is implemented.
14589  */
14590 
14591 kern_return_t
vm_map_region(vm_map_t map,vm_map_offset_t * address,vm_map_size_t * size,vm_region_flavor_t flavor,vm_region_info_t info,mach_msg_type_number_t * count,mach_port_t * object_name)14592 vm_map_region(
14593 	vm_map_t                 map,
14594 	vm_map_offset_t *address,               /* IN/OUT */
14595 	vm_map_size_t           *size,                  /* OUT */
14596 	vm_region_flavor_t       flavor,                /* IN */
14597 	vm_region_info_t         info,                  /* OUT */
14598 	mach_msg_type_number_t  *count, /* IN/OUT */
14599 	mach_port_t             *object_name)           /* OUT */
14600 {
14601 	vm_map_entry_t          tmp_entry;
14602 	vm_map_entry_t          entry;
14603 	vm_map_offset_t         start;
14604 
14605 	if (map == VM_MAP_NULL) {
14606 		return KERN_INVALID_ARGUMENT;
14607 	}
14608 
14609 	switch (flavor) {
14610 	case VM_REGION_BASIC_INFO:
14611 		/* legacy for old 32-bit objects info */
14612 	{
14613 		vm_region_basic_info_t  basic;
14614 
14615 		if (*count < VM_REGION_BASIC_INFO_COUNT) {
14616 			return KERN_INVALID_ARGUMENT;
14617 		}
14618 
14619 		basic = (vm_region_basic_info_t) info;
14620 		*count = VM_REGION_BASIC_INFO_COUNT;
14621 
14622 		vm_map_lock_read(map);
14623 
14624 		start = *address;
14625 		if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14626 			if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14627 				vm_map_unlock_read(map);
14628 				return KERN_INVALID_ADDRESS;
14629 			}
14630 		} else {
14631 			entry = tmp_entry;
14632 		}
14633 
14634 		start = entry->vme_start;
14635 
14636 		basic->offset = (uint32_t)VME_OFFSET(entry);
14637 		basic->protection = entry->protection;
14638 		basic->inheritance = entry->inheritance;
14639 		basic->max_protection = entry->max_protection;
14640 		basic->behavior = entry->behavior;
14641 		basic->user_wired_count = entry->user_wired_count;
14642 		basic->reserved = entry->is_sub_map;
14643 		*address = start;
14644 		*size = (entry->vme_end - start);
14645 
14646 		if (object_name) {
14647 			*object_name = IP_NULL;
14648 		}
14649 		if (entry->is_sub_map) {
14650 			basic->shared = FALSE;
14651 		} else {
14652 			basic->shared = entry->is_shared;
14653 		}
14654 
14655 		vm_map_unlock_read(map);
14656 		return KERN_SUCCESS;
14657 	}
14658 
14659 	case VM_REGION_BASIC_INFO_64:
14660 	{
14661 		vm_region_basic_info_64_t       basic;
14662 
14663 		if (*count < VM_REGION_BASIC_INFO_COUNT_64) {
14664 			return KERN_INVALID_ARGUMENT;
14665 		}
14666 
14667 		basic = (vm_region_basic_info_64_t) info;
14668 		*count = VM_REGION_BASIC_INFO_COUNT_64;
14669 
14670 		vm_map_lock_read(map);
14671 
14672 		start = *address;
14673 		if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14674 			if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14675 				vm_map_unlock_read(map);
14676 				return KERN_INVALID_ADDRESS;
14677 			}
14678 		} else {
14679 			entry = tmp_entry;
14680 		}
14681 
14682 		start = entry->vme_start;
14683 
14684 		basic->offset = VME_OFFSET(entry);
14685 		basic->protection = entry->protection;
14686 		basic->inheritance = entry->inheritance;
14687 		basic->max_protection = entry->max_protection;
14688 		basic->behavior = entry->behavior;
14689 		basic->user_wired_count = entry->user_wired_count;
14690 		basic->reserved = entry->is_sub_map;
14691 		*address = start;
14692 		*size = (entry->vme_end - start);
14693 
14694 		if (object_name) {
14695 			*object_name = IP_NULL;
14696 		}
14697 		if (entry->is_sub_map) {
14698 			basic->shared = FALSE;
14699 		} else {
14700 			basic->shared = entry->is_shared;
14701 		}
14702 
14703 		vm_map_unlock_read(map);
14704 		return KERN_SUCCESS;
14705 	}
14706 	case VM_REGION_EXTENDED_INFO:
14707 		if (*count < VM_REGION_EXTENDED_INFO_COUNT) {
14708 			return KERN_INVALID_ARGUMENT;
14709 		}
14710 		OS_FALLTHROUGH;
14711 	case VM_REGION_EXTENDED_INFO__legacy:
14712 		if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy) {
14713 			return KERN_INVALID_ARGUMENT;
14714 		}
14715 
14716 		{
14717 			vm_region_extended_info_t       extended;
14718 			mach_msg_type_number_t original_count;
14719 			int effective_page_size, effective_page_shift;
14720 
14721 			extended = (vm_region_extended_info_t) info;
14722 
14723 			effective_page_shift = vm_self_region_page_shift(map);
14724 			effective_page_size = (1 << effective_page_shift);
14725 
14726 			vm_map_lock_read(map);
14727 
14728 			start = *address;
14729 			if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14730 				if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14731 					vm_map_unlock_read(map);
14732 					return KERN_INVALID_ADDRESS;
14733 				}
14734 			} else {
14735 				entry = tmp_entry;
14736 			}
14737 			start = entry->vme_start;
14738 
14739 			extended->protection = entry->protection;
14740 			extended->user_tag = VME_ALIAS(entry);
14741 			extended->pages_resident = 0;
14742 			extended->pages_swapped_out = 0;
14743 			extended->pages_shared_now_private = 0;
14744 			extended->pages_dirtied = 0;
14745 			extended->external_pager = 0;
14746 			extended->shadow_depth = 0;
14747 
14748 			original_count = *count;
14749 			if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
14750 				*count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
14751 			} else {
14752 				extended->pages_reusable = 0;
14753 				*count = VM_REGION_EXTENDED_INFO_COUNT;
14754 			}
14755 
14756 			vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
14757 
14758 			if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) {
14759 				extended->share_mode = SM_PRIVATE;
14760 			}
14761 
14762 			if (object_name) {
14763 				*object_name = IP_NULL;
14764 			}
14765 			*address = start;
14766 			*size = (entry->vme_end - start);
14767 
14768 			vm_map_unlock_read(map);
14769 			return KERN_SUCCESS;
14770 		}
14771 	case VM_REGION_TOP_INFO:
14772 	{
14773 		vm_region_top_info_t    top;
14774 
14775 		if (*count < VM_REGION_TOP_INFO_COUNT) {
14776 			return KERN_INVALID_ARGUMENT;
14777 		}
14778 
14779 		top = (vm_region_top_info_t) info;
14780 		*count = VM_REGION_TOP_INFO_COUNT;
14781 
14782 		vm_map_lock_read(map);
14783 
14784 		start = *address;
14785 		if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14786 			if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14787 				vm_map_unlock_read(map);
14788 				return KERN_INVALID_ADDRESS;
14789 			}
14790 		} else {
14791 			entry = tmp_entry;
14792 		}
14793 		start = entry->vme_start;
14794 
14795 		top->private_pages_resident = 0;
14796 		top->shared_pages_resident = 0;
14797 
14798 		vm_map_region_top_walk(entry, top);
14799 
14800 		if (object_name) {
14801 			*object_name = IP_NULL;
14802 		}
14803 		*address = start;
14804 		*size = (entry->vme_end - start);
14805 
14806 		vm_map_unlock_read(map);
14807 		return KERN_SUCCESS;
14808 	}
14809 	default:
14810 		return KERN_INVALID_ARGUMENT;
14811 	}
14812 }
14813 
14814 #define OBJ_RESIDENT_COUNT(obj, entry_size)                             \
14815 	MIN((entry_size),                                               \
14816 	    ((obj)->all_reusable ?                                      \
14817 	     (obj)->wired_page_count :                                  \
14818 	     (obj)->resident_page_count - (obj)->reusable_page_count))
14819 
14820 void
vm_map_region_top_walk(vm_map_entry_t entry,vm_region_top_info_t top)14821 vm_map_region_top_walk(
14822 	vm_map_entry_t             entry,
14823 	vm_region_top_info_t       top)
14824 {
14825 	if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
14826 		top->share_mode = SM_EMPTY;
14827 		top->ref_count = 0;
14828 		top->obj_id = 0;
14829 		return;
14830 	}
14831 
14832 	{
14833 		struct  vm_object *obj, *tmp_obj;
14834 		int             ref_count;
14835 		uint32_t        entry_size;
14836 
14837 		entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
14838 
14839 		obj = VME_OBJECT(entry);
14840 
14841 		vm_object_lock(obj);
14842 
14843 		if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14844 			ref_count--;
14845 		}
14846 
14847 		assert(obj->reusable_page_count <= obj->resident_page_count);
14848 		if (obj->shadow) {
14849 			if (ref_count == 1) {
14850 				top->private_pages_resident =
14851 				    OBJ_RESIDENT_COUNT(obj, entry_size);
14852 			} else {
14853 				top->shared_pages_resident =
14854 				    OBJ_RESIDENT_COUNT(obj, entry_size);
14855 			}
14856 			top->ref_count  = ref_count;
14857 			top->share_mode = SM_COW;
14858 
14859 			while ((tmp_obj = obj->shadow)) {
14860 				vm_object_lock(tmp_obj);
14861 				vm_object_unlock(obj);
14862 				obj = tmp_obj;
14863 
14864 				if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14865 					ref_count--;
14866 				}
14867 
14868 				assert(obj->reusable_page_count <= obj->resident_page_count);
14869 				top->shared_pages_resident +=
14870 				    OBJ_RESIDENT_COUNT(obj, entry_size);
14871 				top->ref_count += ref_count - 1;
14872 			}
14873 		} else {
14874 			if (entry->superpage_size) {
14875 				top->share_mode = SM_LARGE_PAGE;
14876 				top->shared_pages_resident = 0;
14877 				top->private_pages_resident = entry_size;
14878 			} else if (entry->needs_copy) {
14879 				top->share_mode = SM_COW;
14880 				top->shared_pages_resident =
14881 				    OBJ_RESIDENT_COUNT(obj, entry_size);
14882 			} else {
14883 				if (ref_count == 1 ||
14884 				    (ref_count == 2 && obj->named)) {
14885 					top->share_mode = SM_PRIVATE;
14886 					top->private_pages_resident =
14887 					    OBJ_RESIDENT_COUNT(obj,
14888 					    entry_size);
14889 				} else {
14890 					top->share_mode = SM_SHARED;
14891 					top->shared_pages_resident =
14892 					    OBJ_RESIDENT_COUNT(obj,
14893 					    entry_size);
14894 				}
14895 			}
14896 			top->ref_count = ref_count;
14897 		}
14898 		/* XXX K64: obj_id will be truncated */
14899 		top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
14900 
14901 		vm_object_unlock(obj);
14902 	}
14903 }
14904 
14905 void
vm_map_region_walk(vm_map_t map,vm_map_offset_t va,vm_map_entry_t entry,vm_object_offset_t offset,vm_object_size_t range,vm_region_extended_info_t extended,boolean_t look_for_pages,mach_msg_type_number_t count)14906 vm_map_region_walk(
14907 	vm_map_t                        map,
14908 	vm_map_offset_t                 va,
14909 	vm_map_entry_t                  entry,
14910 	vm_object_offset_t              offset,
14911 	vm_object_size_t                range,
14912 	vm_region_extended_info_t       extended,
14913 	boolean_t                       look_for_pages,
14914 	mach_msg_type_number_t count)
14915 {
14916 	struct vm_object *obj, *tmp_obj;
14917 	vm_map_offset_t       last_offset;
14918 	int               i;
14919 	int               ref_count;
14920 	struct vm_object        *shadow_object;
14921 	unsigned short          shadow_depth;
14922 	boolean_t         do_region_footprint;
14923 	int                     effective_page_size, effective_page_shift;
14924 	vm_map_offset_t         effective_page_mask;
14925 
14926 	do_region_footprint = task_self_region_footprint();
14927 
14928 	if ((VME_OBJECT(entry) == 0) ||
14929 	    (entry->is_sub_map) ||
14930 	    (VME_OBJECT(entry)->phys_contiguous &&
14931 	    !entry->superpage_size)) {
14932 		extended->share_mode = SM_EMPTY;
14933 		extended->ref_count = 0;
14934 		return;
14935 	}
14936 
14937 	if (entry->superpage_size) {
14938 		extended->shadow_depth = 0;
14939 		extended->share_mode = SM_LARGE_PAGE;
14940 		extended->ref_count = 1;
14941 		extended->external_pager = 0;
14942 
14943 		/* TODO4K: Superpage in 4k mode? */
14944 		extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
14945 		extended->shadow_depth = 0;
14946 		return;
14947 	}
14948 
14949 	effective_page_shift = vm_self_region_page_shift(map);
14950 	effective_page_size = (1 << effective_page_shift);
14951 	effective_page_mask = effective_page_size - 1;
14952 
14953 	offset = vm_map_trunc_page(offset, effective_page_mask);
14954 
14955 	obj = VME_OBJECT(entry);
14956 
14957 	vm_object_lock(obj);
14958 
14959 	if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14960 		ref_count--;
14961 	}
14962 
14963 	if (look_for_pages) {
14964 		for (last_offset = offset + range;
14965 		    offset < last_offset;
14966 		    offset += effective_page_size, va += effective_page_size) {
14967 			if (do_region_footprint) {
14968 				int disp;
14969 
14970 				disp = 0;
14971 				if (map->has_corpse_footprint) {
14972 					/*
14973 					 * Query the page info data we saved
14974 					 * while forking the corpse.
14975 					 */
14976 					vm_map_corpse_footprint_query_page_info(
14977 						map,
14978 						va,
14979 						&disp);
14980 				} else {
14981 					/*
14982 					 * Query the pmap.
14983 					 */
14984 					vm_map_footprint_query_page_info(
14985 						map,
14986 						entry,
14987 						va,
14988 						&disp);
14989 				}
14990 				if (disp & VM_PAGE_QUERY_PAGE_PRESENT) {
14991 					extended->pages_resident++;
14992 				}
14993 				if (disp & VM_PAGE_QUERY_PAGE_REUSABLE) {
14994 					extended->pages_reusable++;
14995 				}
14996 				if (disp & VM_PAGE_QUERY_PAGE_DIRTY) {
14997 					extended->pages_dirtied++;
14998 				}
14999 				if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
15000 					extended->pages_swapped_out++;
15001 				}
15002 				continue;
15003 			}
15004 
15005 			vm_map_region_look_for_page(map, va, obj,
15006 			    vm_object_trunc_page(offset), ref_count,
15007 			    0, extended, count);
15008 		}
15009 
15010 		if (do_region_footprint) {
15011 			goto collect_object_info;
15012 		}
15013 	} else {
15014 collect_object_info:
15015 		shadow_object = obj->shadow;
15016 		shadow_depth = 0;
15017 
15018 		if (!(obj->internal)) {
15019 			extended->external_pager = 1;
15020 		}
15021 
15022 		if (shadow_object != VM_OBJECT_NULL) {
15023 			vm_object_lock(shadow_object);
15024 			for (;
15025 			    shadow_object != VM_OBJECT_NULL;
15026 			    shadow_depth++) {
15027 				vm_object_t     next_shadow;
15028 
15029 				if (!(shadow_object->internal)) {
15030 					extended->external_pager = 1;
15031 				}
15032 
15033 				next_shadow = shadow_object->shadow;
15034 				if (next_shadow) {
15035 					vm_object_lock(next_shadow);
15036 				}
15037 				vm_object_unlock(shadow_object);
15038 				shadow_object = next_shadow;
15039 			}
15040 		}
15041 		extended->shadow_depth = shadow_depth;
15042 	}
15043 
15044 	if (extended->shadow_depth || entry->needs_copy) {
15045 		extended->share_mode = SM_COW;
15046 	} else {
15047 		if (ref_count == 1) {
15048 			extended->share_mode = SM_PRIVATE;
15049 		} else {
15050 			if (obj->true_share) {
15051 				extended->share_mode = SM_TRUESHARED;
15052 			} else {
15053 				extended->share_mode = SM_SHARED;
15054 			}
15055 		}
15056 	}
15057 	extended->ref_count = ref_count - extended->shadow_depth;
15058 
15059 	for (i = 0; i < extended->shadow_depth; i++) {
15060 		if ((tmp_obj = obj->shadow) == 0) {
15061 			break;
15062 		}
15063 		vm_object_lock(tmp_obj);
15064 		vm_object_unlock(obj);
15065 
15066 		if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) {
15067 			ref_count--;
15068 		}
15069 
15070 		extended->ref_count += ref_count;
15071 		obj = tmp_obj;
15072 	}
15073 	vm_object_unlock(obj);
15074 
15075 	if (extended->share_mode == SM_SHARED) {
15076 		vm_map_entry_t       cur;
15077 		vm_map_entry_t       last;
15078 		int      my_refs;
15079 
15080 		obj = VME_OBJECT(entry);
15081 		last = vm_map_to_entry(map);
15082 		my_refs = 0;
15083 
15084 		if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
15085 			ref_count--;
15086 		}
15087 		for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) {
15088 			my_refs += vm_map_region_count_obj_refs(cur, obj);
15089 		}
15090 
15091 		if (my_refs == ref_count) {
15092 			extended->share_mode = SM_PRIVATE_ALIASED;
15093 		} else if (my_refs > 1) {
15094 			extended->share_mode = SM_SHARED_ALIASED;
15095 		}
15096 	}
15097 }
15098 
15099 
15100 /* object is locked on entry and locked on return */
15101 
15102 
15103 static void
vm_map_region_look_for_page(__unused vm_map_t map,__unused vm_map_offset_t va,vm_object_t object,vm_object_offset_t offset,int max_refcnt,unsigned short depth,vm_region_extended_info_t extended,mach_msg_type_number_t count)15104 vm_map_region_look_for_page(
15105 	__unused vm_map_t               map,
15106 	__unused vm_map_offset_t        va,
15107 	vm_object_t                     object,
15108 	vm_object_offset_t              offset,
15109 	int                             max_refcnt,
15110 	unsigned short                  depth,
15111 	vm_region_extended_info_t       extended,
15112 	mach_msg_type_number_t count)
15113 {
15114 	vm_page_t       p;
15115 	vm_object_t     shadow;
15116 	int             ref_count;
15117 	vm_object_t     caller_object;
15118 
15119 	shadow = object->shadow;
15120 	caller_object = object;
15121 
15122 
15123 	while (TRUE) {
15124 		if (!(object->internal)) {
15125 			extended->external_pager = 1;
15126 		}
15127 
15128 		if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
15129 			if (shadow && (max_refcnt == 1)) {
15130 				extended->pages_shared_now_private++;
15131 			}
15132 
15133 			if (!p->vmp_fictitious &&
15134 			    (p->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p)))) {
15135 				extended->pages_dirtied++;
15136 			} else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
15137 				if (p->vmp_reusable || object->all_reusable) {
15138 					extended->pages_reusable++;
15139 				}
15140 			}
15141 
15142 			extended->pages_resident++;
15143 
15144 			if (object != caller_object) {
15145 				vm_object_unlock(object);
15146 			}
15147 
15148 			return;
15149 		}
15150 		if (object->internal &&
15151 		    object->alive &&
15152 		    !object->terminating &&
15153 		    object->pager_ready) {
15154 			if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
15155 			    == VM_EXTERNAL_STATE_EXISTS) {
15156 				/* the pager has that page */
15157 				extended->pages_swapped_out++;
15158 				if (object != caller_object) {
15159 					vm_object_unlock(object);
15160 				}
15161 				return;
15162 			}
15163 		}
15164 
15165 		if (shadow) {
15166 			vm_object_lock(shadow);
15167 
15168 			if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) {
15169 				ref_count--;
15170 			}
15171 
15172 			if (++depth > extended->shadow_depth) {
15173 				extended->shadow_depth = depth;
15174 			}
15175 
15176 			if (ref_count > max_refcnt) {
15177 				max_refcnt = ref_count;
15178 			}
15179 
15180 			if (object != caller_object) {
15181 				vm_object_unlock(object);
15182 			}
15183 
15184 			offset = offset + object->vo_shadow_offset;
15185 			object = shadow;
15186 			shadow = object->shadow;
15187 			continue;
15188 		}
15189 		if (object != caller_object) {
15190 			vm_object_unlock(object);
15191 		}
15192 		break;
15193 	}
15194 }
15195 
15196 static int
vm_map_region_count_obj_refs(vm_map_entry_t entry,vm_object_t object)15197 vm_map_region_count_obj_refs(
15198 	vm_map_entry_t    entry,
15199 	vm_object_t       object)
15200 {
15201 	int ref_count;
15202 	vm_object_t chk_obj;
15203 	vm_object_t tmp_obj;
15204 
15205 	if (VME_OBJECT(entry) == 0) {
15206 		return 0;
15207 	}
15208 
15209 	if (entry->is_sub_map) {
15210 		return 0;
15211 	} else {
15212 		ref_count = 0;
15213 
15214 		chk_obj = VME_OBJECT(entry);
15215 		vm_object_lock(chk_obj);
15216 
15217 		while (chk_obj) {
15218 			if (chk_obj == object) {
15219 				ref_count++;
15220 			}
15221 			tmp_obj = chk_obj->shadow;
15222 			if (tmp_obj) {
15223 				vm_object_lock(tmp_obj);
15224 			}
15225 			vm_object_unlock(chk_obj);
15226 
15227 			chk_obj = tmp_obj;
15228 		}
15229 	}
15230 	return ref_count;
15231 }
15232 
15233 
15234 /*
15235  *	Routine:	vm_map_simplify
15236  *
15237  *	Description:
15238  *		Attempt to simplify the map representation in
15239  *		the vicinity of the given starting address.
15240  *	Note:
15241  *		This routine is intended primarily to keep the
15242  *		kernel maps more compact -- they generally don't
15243  *		benefit from the "expand a map entry" technology
15244  *		at allocation time because the adjacent entry
15245  *		is often wired down.
15246  */
15247 void
vm_map_simplify_entry(vm_map_t map,vm_map_entry_t this_entry)15248 vm_map_simplify_entry(
15249 	vm_map_t        map,
15250 	vm_map_entry_t  this_entry)
15251 {
15252 	vm_map_entry_t  prev_entry;
15253 
15254 	prev_entry = this_entry->vme_prev;
15255 
15256 	if ((this_entry != vm_map_to_entry(map)) &&
15257 	    (prev_entry != vm_map_to_entry(map)) &&
15258 
15259 	    (prev_entry->vme_end == this_entry->vme_start) &&
15260 
15261 	    (prev_entry->is_sub_map == this_entry->is_sub_map) &&
15262 	    (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
15263 	    ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
15264 	    prev_entry->vme_start))
15265 	    == VME_OFFSET(this_entry)) &&
15266 
15267 	    (prev_entry->behavior == this_entry->behavior) &&
15268 	    (prev_entry->needs_copy == this_entry->needs_copy) &&
15269 	    (prev_entry->protection == this_entry->protection) &&
15270 	    (prev_entry->max_protection == this_entry->max_protection) &&
15271 	    (prev_entry->inheritance == this_entry->inheritance) &&
15272 	    (prev_entry->use_pmap == this_entry->use_pmap) &&
15273 	    (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
15274 	    (prev_entry->no_cache == this_entry->no_cache) &&
15275 	    (prev_entry->permanent == this_entry->permanent) &&
15276 	    (prev_entry->map_aligned == this_entry->map_aligned) &&
15277 	    (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
15278 	    (prev_entry->used_for_jit == this_entry->used_for_jit) &&
15279 	    (prev_entry->pmap_cs_associated == this_entry->pmap_cs_associated) &&
15280 	    (prev_entry->iokit_acct == this_entry->iokit_acct) &&
15281 	    (prev_entry->vme_resilient_codesign ==
15282 	    this_entry->vme_resilient_codesign) &&
15283 	    (prev_entry->vme_resilient_media ==
15284 	    this_entry->vme_resilient_media) &&
15285 	    (prev_entry->vme_no_copy_on_read == this_entry->vme_no_copy_on_read) &&
15286 
15287 	    (prev_entry->wired_count == this_entry->wired_count) &&
15288 	    (prev_entry->user_wired_count == this_entry->user_wired_count) &&
15289 
15290 	    ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
15291 	    (prev_entry->in_transition == FALSE) &&
15292 	    (this_entry->in_transition == FALSE) &&
15293 	    (prev_entry->needs_wakeup == FALSE) &&
15294 	    (this_entry->needs_wakeup == FALSE) &&
15295 	    (prev_entry->is_shared == this_entry->is_shared) &&
15296 	    (prev_entry->superpage_size == FALSE) &&
15297 	    (this_entry->superpage_size == FALSE)
15298 	    ) {
15299 		vm_map_store_entry_unlink(map, prev_entry);
15300 		assert(prev_entry->vme_start < this_entry->vme_end);
15301 		if (prev_entry->map_aligned) {
15302 			assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
15303 			    VM_MAP_PAGE_MASK(map)));
15304 		}
15305 		this_entry->vme_start = prev_entry->vme_start;
15306 		VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
15307 
15308 		if (map->holelistenabled) {
15309 			vm_map_store_update_first_free(map, this_entry, TRUE);
15310 		}
15311 
15312 		if (prev_entry->is_sub_map) {
15313 			vm_map_deallocate(VME_SUBMAP(prev_entry));
15314 		} else {
15315 			vm_object_deallocate(VME_OBJECT(prev_entry));
15316 		}
15317 		vm_map_entry_dispose(map, prev_entry);
15318 		SAVE_HINT_MAP_WRITE(map, this_entry);
15319 	}
15320 }
15321 
15322 void
vm_map_simplify(vm_map_t map,vm_map_offset_t start)15323 vm_map_simplify(
15324 	vm_map_t        map,
15325 	vm_map_offset_t start)
15326 {
15327 	vm_map_entry_t  this_entry;
15328 
15329 	vm_map_lock(map);
15330 	if (vm_map_lookup_entry(map, start, &this_entry)) {
15331 		vm_map_simplify_entry(map, this_entry);
15332 		vm_map_simplify_entry(map, this_entry->vme_next);
15333 	}
15334 	vm_map_unlock(map);
15335 }
15336 
15337 static void
vm_map_simplify_range(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)15338 vm_map_simplify_range(
15339 	vm_map_t        map,
15340 	vm_map_offset_t start,
15341 	vm_map_offset_t end)
15342 {
15343 	vm_map_entry_t  entry;
15344 
15345 	/*
15346 	 * The map should be locked (for "write") by the caller.
15347 	 */
15348 
15349 	if (start >= end) {
15350 		/* invalid address range */
15351 		return;
15352 	}
15353 
15354 	start = vm_map_trunc_page(start,
15355 	    VM_MAP_PAGE_MASK(map));
15356 	end = vm_map_round_page(end,
15357 	    VM_MAP_PAGE_MASK(map));
15358 
15359 	if (!vm_map_lookup_entry(map, start, &entry)) {
15360 		/* "start" is not mapped and "entry" ends before "start" */
15361 		if (entry == vm_map_to_entry(map)) {
15362 			/* start with first entry in the map */
15363 			entry = vm_map_first_entry(map);
15364 		} else {
15365 			/* start with next entry */
15366 			entry = entry->vme_next;
15367 		}
15368 	}
15369 
15370 	while (entry != vm_map_to_entry(map) &&
15371 	    entry->vme_start <= end) {
15372 		/* try and coalesce "entry" with its previous entry */
15373 		vm_map_simplify_entry(map, entry);
15374 		entry = entry->vme_next;
15375 	}
15376 }
15377 
15378 
15379 /*
15380  *	Routine:	vm_map_machine_attribute
15381  *	Purpose:
15382  *		Provide machine-specific attributes to mappings,
15383  *		such as cachability etc. for machines that provide
15384  *		them.  NUMA architectures and machines with big/strange
15385  *		caches will use this.
15386  *	Note:
15387  *		Responsibilities for locking and checking are handled here,
15388  *		everything else in the pmap module. If any non-volatile
15389  *		information must be kept, the pmap module should handle
15390  *		it itself. [This assumes that attributes do not
15391  *		need to be inherited, which seems ok to me]
15392  */
15393 kern_return_t
vm_map_machine_attribute(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_machine_attribute_t attribute,vm_machine_attribute_val_t * value)15394 vm_map_machine_attribute(
15395 	vm_map_t                        map,
15396 	vm_map_offset_t         start,
15397 	vm_map_offset_t         end,
15398 	vm_machine_attribute_t  attribute,
15399 	vm_machine_attribute_val_t* value)              /* IN/OUT */
15400 {
15401 	kern_return_t   ret;
15402 	vm_map_size_t sync_size;
15403 	vm_map_entry_t entry;
15404 
15405 	if (start < vm_map_min(map) || end > vm_map_max(map)) {
15406 		return KERN_INVALID_ADDRESS;
15407 	}
15408 
15409 	/* Figure how much memory we need to flush (in page increments) */
15410 	sync_size = end - start;
15411 
15412 	vm_map_lock(map);
15413 
15414 	if (attribute != MATTR_CACHE) {
15415 		/* If we don't have to find physical addresses, we */
15416 		/* don't have to do an explicit traversal here.    */
15417 		ret = pmap_attribute(map->pmap, start, end - start,
15418 		    attribute, value);
15419 		vm_map_unlock(map);
15420 		return ret;
15421 	}
15422 
15423 	ret = KERN_SUCCESS;                                                                             /* Assume it all worked */
15424 
15425 	while (sync_size) {
15426 		if (vm_map_lookup_entry(map, start, &entry)) {
15427 			vm_map_size_t   sub_size;
15428 			if ((entry->vme_end - start) > sync_size) {
15429 				sub_size = sync_size;
15430 				sync_size = 0;
15431 			} else {
15432 				sub_size = entry->vme_end - start;
15433 				sync_size -= sub_size;
15434 			}
15435 			if (entry->is_sub_map) {
15436 				vm_map_offset_t sub_start;
15437 				vm_map_offset_t sub_end;
15438 
15439 				sub_start = (start - entry->vme_start)
15440 				    + VME_OFFSET(entry);
15441 				sub_end = sub_start + sub_size;
15442 				vm_map_machine_attribute(
15443 					VME_SUBMAP(entry),
15444 					sub_start,
15445 					sub_end,
15446 					attribute, value);
15447 			} else {
15448 				if (VME_OBJECT(entry)) {
15449 					vm_page_t               m;
15450 					vm_object_t             object;
15451 					vm_object_t             base_object;
15452 					vm_object_t             last_object;
15453 					vm_object_offset_t      offset;
15454 					vm_object_offset_t      base_offset;
15455 					vm_map_size_t           range;
15456 					range = sub_size;
15457 					offset = (start - entry->vme_start)
15458 					    + VME_OFFSET(entry);
15459 					offset = vm_object_trunc_page(offset);
15460 					base_offset = offset;
15461 					object = VME_OBJECT(entry);
15462 					base_object = object;
15463 					last_object = NULL;
15464 
15465 					vm_object_lock(object);
15466 
15467 					while (range) {
15468 						m = vm_page_lookup(
15469 							object, offset);
15470 
15471 						if (m && !m->vmp_fictitious) {
15472 							ret =
15473 							    pmap_attribute_cache_sync(
15474 								VM_PAGE_GET_PHYS_PAGE(m),
15475 								PAGE_SIZE,
15476 								attribute, value);
15477 						} else if (object->shadow) {
15478 							offset = offset + object->vo_shadow_offset;
15479 							last_object = object;
15480 							object = object->shadow;
15481 							vm_object_lock(last_object->shadow);
15482 							vm_object_unlock(last_object);
15483 							continue;
15484 						}
15485 						if (range < PAGE_SIZE) {
15486 							range = 0;
15487 						} else {
15488 							range -= PAGE_SIZE;
15489 						}
15490 
15491 						if (base_object != object) {
15492 							vm_object_unlock(object);
15493 							vm_object_lock(base_object);
15494 							object = base_object;
15495 						}
15496 						/* Bump to the next page */
15497 						base_offset += PAGE_SIZE;
15498 						offset = base_offset;
15499 					}
15500 					vm_object_unlock(object);
15501 				}
15502 			}
15503 			start += sub_size;
15504 		} else {
15505 			vm_map_unlock(map);
15506 			return KERN_FAILURE;
15507 		}
15508 	}
15509 
15510 	vm_map_unlock(map);
15511 
15512 	return ret;
15513 }
15514 
15515 /*
15516  *	vm_map_behavior_set:
15517  *
15518  *	Sets the paging reference behavior of the specified address
15519  *	range in the target map.  Paging reference behavior affects
15520  *	how pagein operations resulting from faults on the map will be
15521  *	clustered.
15522  */
15523 kern_return_t
vm_map_behavior_set(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_behavior_t new_behavior)15524 vm_map_behavior_set(
15525 	vm_map_t        map,
15526 	vm_map_offset_t start,
15527 	vm_map_offset_t end,
15528 	vm_behavior_t   new_behavior)
15529 {
15530 	vm_map_entry_t  entry;
15531 	vm_map_entry_t  temp_entry;
15532 
15533 	if (start > end ||
15534 	    start < vm_map_min(map) ||
15535 	    end > vm_map_max(map)) {
15536 		return KERN_NO_SPACE;
15537 	}
15538 
15539 	switch (new_behavior) {
15540 	/*
15541 	 * This first block of behaviors all set a persistent state on the specified
15542 	 * memory range.  All we have to do here is to record the desired behavior
15543 	 * in the vm_map_entry_t's.
15544 	 */
15545 
15546 	case VM_BEHAVIOR_DEFAULT:
15547 	case VM_BEHAVIOR_RANDOM:
15548 	case VM_BEHAVIOR_SEQUENTIAL:
15549 	case VM_BEHAVIOR_RSEQNTL:
15550 	case VM_BEHAVIOR_ZERO_WIRED_PAGES:
15551 		vm_map_lock(map);
15552 
15553 		/*
15554 		 *	The entire address range must be valid for the map.
15555 		 *      Note that vm_map_range_check() does a
15556 		 *	vm_map_lookup_entry() internally and returns the
15557 		 *	entry containing the start of the address range if
15558 		 *	the entire range is valid.
15559 		 */
15560 		if (vm_map_range_check(map, start, end, &temp_entry)) {
15561 			entry = temp_entry;
15562 			vm_map_clip_start(map, entry, start);
15563 		} else {
15564 			vm_map_unlock(map);
15565 			return KERN_INVALID_ADDRESS;
15566 		}
15567 
15568 		while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
15569 			vm_map_clip_end(map, entry, end);
15570 			if (entry->is_sub_map) {
15571 				assert(!entry->use_pmap);
15572 			}
15573 
15574 			if (new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES) {
15575 				entry->zero_wired_pages = TRUE;
15576 			} else {
15577 				entry->behavior = new_behavior;
15578 			}
15579 			entry = entry->vme_next;
15580 		}
15581 
15582 		vm_map_unlock(map);
15583 		break;
15584 
15585 	/*
15586 	 * The rest of these are different from the above in that they cause
15587 	 * an immediate action to take place as opposed to setting a behavior that
15588 	 * affects future actions.
15589 	 */
15590 
15591 	case VM_BEHAVIOR_WILLNEED:
15592 		return vm_map_willneed(map, start, end);
15593 
15594 	case VM_BEHAVIOR_DONTNEED:
15595 		return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
15596 
15597 	case VM_BEHAVIOR_FREE:
15598 		return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
15599 
15600 	case VM_BEHAVIOR_REUSABLE:
15601 		return vm_map_reusable_pages(map, start, end);
15602 
15603 	case VM_BEHAVIOR_REUSE:
15604 		return vm_map_reuse_pages(map, start, end);
15605 
15606 	case VM_BEHAVIOR_CAN_REUSE:
15607 		return vm_map_can_reuse(map, start, end);
15608 
15609 #if MACH_ASSERT
15610 	case VM_BEHAVIOR_PAGEOUT:
15611 		return vm_map_pageout(map, start, end);
15612 #endif /* MACH_ASSERT */
15613 
15614 	default:
15615 		return KERN_INVALID_ARGUMENT;
15616 	}
15617 
15618 	return KERN_SUCCESS;
15619 }
15620 
15621 
15622 /*
15623  * Internals for madvise(MADV_WILLNEED) system call.
15624  *
15625  * The implementation is to do:-
15626  * a) read-ahead if the mapping corresponds to a mapped regular file
15627  * b) or, fault in the pages (zero-fill, decompress etc) if it's an anonymous mapping
15628  */
15629 
15630 
15631 static kern_return_t
vm_map_willneed(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)15632 vm_map_willneed(
15633 	vm_map_t        map,
15634 	vm_map_offset_t start,
15635 	vm_map_offset_t end
15636 	)
15637 {
15638 	vm_map_entry_t                  entry;
15639 	vm_object_t                     object;
15640 	memory_object_t                 pager;
15641 	struct vm_object_fault_info     fault_info = {};
15642 	kern_return_t                   kr;
15643 	vm_object_size_t                len;
15644 	vm_object_offset_t              offset;
15645 
15646 	fault_info.interruptible = THREAD_UNINT;        /* ignored value */
15647 	fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
15648 	fault_info.stealth       = TRUE;
15649 
15650 	/*
15651 	 * The MADV_WILLNEED operation doesn't require any changes to the
15652 	 * vm_map_entry_t's, so the read lock is sufficient.
15653 	 */
15654 
15655 	vm_map_lock_read(map);
15656 
15657 	/*
15658 	 * The madvise semantics require that the address range be fully
15659 	 * allocated with no holes.  Otherwise, we're required to return
15660 	 * an error.
15661 	 */
15662 
15663 	if (!vm_map_range_check(map, start, end, &entry)) {
15664 		vm_map_unlock_read(map);
15665 		return KERN_INVALID_ADDRESS;
15666 	}
15667 
15668 	/*
15669 	 * Examine each vm_map_entry_t in the range.
15670 	 */
15671 	for (; entry != vm_map_to_entry(map) && start < end;) {
15672 		/*
15673 		 * The first time through, the start address could be anywhere
15674 		 * within the vm_map_entry we found.  So adjust the offset to
15675 		 * correspond.  After that, the offset will always be zero to
15676 		 * correspond to the beginning of the current vm_map_entry.
15677 		 */
15678 		offset = (start - entry->vme_start) + VME_OFFSET(entry);
15679 
15680 		/*
15681 		 * Set the length so we don't go beyond the end of the
15682 		 * map_entry or beyond the end of the range we were given.
15683 		 * This range could span also multiple map entries all of which
15684 		 * map different files, so make sure we only do the right amount
15685 		 * of I/O for each object.  Note that it's possible for there
15686 		 * to be multiple map entries all referring to the same object
15687 		 * but with different page permissions, but it's not worth
15688 		 * trying to optimize that case.
15689 		 */
15690 		len = MIN(entry->vme_end - start, end - start);
15691 
15692 		if ((vm_size_t) len != len) {
15693 			/* 32-bit overflow */
15694 			len = (vm_size_t) (0 - PAGE_SIZE);
15695 		}
15696 		fault_info.cluster_size = (vm_size_t) len;
15697 		fault_info.lo_offset    = offset;
15698 		fault_info.hi_offset    = offset + len;
15699 		fault_info.user_tag     = VME_ALIAS(entry);
15700 		fault_info.pmap_options = 0;
15701 		if (entry->iokit_acct ||
15702 		    (!entry->is_sub_map && !entry->use_pmap)) {
15703 			fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
15704 		}
15705 
15706 		/*
15707 		 * If the entry is a submap OR there's no read permission
15708 		 * to this mapping, then just skip it.
15709 		 */
15710 		if ((entry->is_sub_map) || (entry->protection & VM_PROT_READ) == 0) {
15711 			entry = entry->vme_next;
15712 			start = entry->vme_start;
15713 			continue;
15714 		}
15715 
15716 		object = VME_OBJECT(entry);
15717 
15718 		if (object == NULL ||
15719 		    (object && object->internal)) {
15720 			/*
15721 			 * Memory range backed by anonymous memory.
15722 			 */
15723 			vm_size_t region_size = 0, effective_page_size = 0;
15724 			vm_map_offset_t addr = 0, effective_page_mask = 0;
15725 
15726 			region_size = len;
15727 			addr = start;
15728 
15729 			effective_page_mask = MIN(vm_map_page_mask(current_map()), PAGE_MASK);
15730 			effective_page_size = effective_page_mask + 1;
15731 
15732 			vm_map_unlock_read(map);
15733 
15734 			while (region_size) {
15735 				vm_pre_fault(
15736 					vm_map_trunc_page(addr, effective_page_mask),
15737 					VM_PROT_READ | VM_PROT_WRITE);
15738 
15739 				region_size -= effective_page_size;
15740 				addr += effective_page_size;
15741 			}
15742 		} else {
15743 			/*
15744 			 * Find the file object backing this map entry.  If there is
15745 			 * none, then we simply ignore the "will need" advice for this
15746 			 * entry and go on to the next one.
15747 			 */
15748 			if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
15749 				entry = entry->vme_next;
15750 				start = entry->vme_start;
15751 				continue;
15752 			}
15753 
15754 			vm_object_paging_begin(object);
15755 			pager = object->pager;
15756 			vm_object_unlock(object);
15757 
15758 			/*
15759 			 * The data_request() could take a long time, so let's
15760 			 * release the map lock to avoid blocking other threads.
15761 			 */
15762 			vm_map_unlock_read(map);
15763 
15764 			/*
15765 			 * Get the data from the object asynchronously.
15766 			 *
15767 			 * Note that memory_object_data_request() places limits on the
15768 			 * amount of I/O it will do.  Regardless of the len we
15769 			 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
15770 			 * silently truncates the len to that size.  This isn't
15771 			 * necessarily bad since madvise shouldn't really be used to
15772 			 * page in unlimited amounts of data.  Other Unix variants
15773 			 * limit the willneed case as well.  If this turns out to be an
15774 			 * issue for developers, then we can always adjust the policy
15775 			 * here and still be backwards compatible since this is all
15776 			 * just "advice".
15777 			 */
15778 			kr = memory_object_data_request(
15779 				pager,
15780 				vm_object_trunc_page(offset) + object->paging_offset,
15781 				0,      /* ignored */
15782 				VM_PROT_READ,
15783 				(memory_object_fault_info_t)&fault_info);
15784 
15785 			vm_object_lock(object);
15786 			vm_object_paging_end(object);
15787 			vm_object_unlock(object);
15788 
15789 			/*
15790 			 * If we couldn't do the I/O for some reason, just give up on
15791 			 * the madvise.  We still return success to the user since
15792 			 * madvise isn't supposed to fail when the advice can't be
15793 			 * taken.
15794 			 */
15795 
15796 			if (kr != KERN_SUCCESS) {
15797 				return KERN_SUCCESS;
15798 			}
15799 		}
15800 
15801 		start += len;
15802 		if (start >= end) {
15803 			/* done */
15804 			return KERN_SUCCESS;
15805 		}
15806 
15807 		/* look up next entry */
15808 		vm_map_lock_read(map);
15809 		if (!vm_map_lookup_entry(map, start, &entry)) {
15810 			/*
15811 			 * There's a new hole in the address range.
15812 			 */
15813 			vm_map_unlock_read(map);
15814 			return KERN_INVALID_ADDRESS;
15815 		}
15816 	}
15817 
15818 	vm_map_unlock_read(map);
15819 	return KERN_SUCCESS;
15820 }
15821 
15822 static boolean_t
vm_map_entry_is_reusable(vm_map_entry_t entry)15823 vm_map_entry_is_reusable(
15824 	vm_map_entry_t entry)
15825 {
15826 	/* Only user map entries */
15827 
15828 	vm_object_t object;
15829 
15830 	if (entry->is_sub_map) {
15831 		return FALSE;
15832 	}
15833 
15834 	switch (VME_ALIAS(entry)) {
15835 	case VM_MEMORY_MALLOC:
15836 	case VM_MEMORY_MALLOC_SMALL:
15837 	case VM_MEMORY_MALLOC_LARGE:
15838 	case VM_MEMORY_REALLOC:
15839 	case VM_MEMORY_MALLOC_TINY:
15840 	case VM_MEMORY_MALLOC_LARGE_REUSABLE:
15841 	case VM_MEMORY_MALLOC_LARGE_REUSED:
15842 		/*
15843 		 * This is a malloc() memory region: check if it's still
15844 		 * in its original state and can be re-used for more
15845 		 * malloc() allocations.
15846 		 */
15847 		break;
15848 	default:
15849 		/*
15850 		 * Not a malloc() memory region: let the caller decide if
15851 		 * it's re-usable.
15852 		 */
15853 		return TRUE;
15854 	}
15855 
15856 	if (/*entry->is_shared ||*/
15857 		entry->is_sub_map ||
15858 		entry->in_transition ||
15859 		entry->protection != VM_PROT_DEFAULT ||
15860 		entry->max_protection != VM_PROT_ALL ||
15861 		entry->inheritance != VM_INHERIT_DEFAULT ||
15862 		entry->no_cache ||
15863 		entry->permanent ||
15864 		entry->superpage_size != FALSE ||
15865 		entry->zero_wired_pages ||
15866 		entry->wired_count != 0 ||
15867 		entry->user_wired_count != 0) {
15868 		return FALSE;
15869 	}
15870 
15871 	object = VME_OBJECT(entry);
15872 	if (object == VM_OBJECT_NULL) {
15873 		return TRUE;
15874 	}
15875 	if (
15876 #if 0
15877 		/*
15878 		 * Let's proceed even if the VM object is potentially
15879 		 * shared.
15880 		 * We check for this later when processing the actual
15881 		 * VM pages, so the contents will be safe if shared.
15882 		 *
15883 		 * But we can still mark this memory region as "reusable" to
15884 		 * acknowledge that the caller did let us know that the memory
15885 		 * could be re-used and should not be penalized for holding
15886 		 * on to it.  This allows its "resident size" to not include
15887 		 * the reusable range.
15888 		 */
15889 		object->ref_count == 1 &&
15890 #endif
15891 		object->wired_page_count == 0 &&
15892 		object->copy == VM_OBJECT_NULL &&
15893 		object->shadow == VM_OBJECT_NULL &&
15894 		object->internal &&
15895 		object->purgable == VM_PURGABLE_DENY &&
15896 		object->wimg_bits == VM_WIMG_USE_DEFAULT &&
15897 		!object->code_signed) {
15898 		return TRUE;
15899 	}
15900 	return FALSE;
15901 }
15902 
15903 static kern_return_t
vm_map_reuse_pages(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)15904 vm_map_reuse_pages(
15905 	vm_map_t        map,
15906 	vm_map_offset_t start,
15907 	vm_map_offset_t end)
15908 {
15909 	vm_map_entry_t                  entry;
15910 	vm_object_t                     object;
15911 	vm_object_offset_t              start_offset, end_offset;
15912 
15913 	/*
15914 	 * The MADV_REUSE operation doesn't require any changes to the
15915 	 * vm_map_entry_t's, so the read lock is sufficient.
15916 	 */
15917 
15918 	if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
15919 		/*
15920 		 * XXX TODO4K
15921 		 * need to figure out what reusable means for a
15922 		 * portion of a native page.
15923 		 */
15924 		return KERN_SUCCESS;
15925 	}
15926 
15927 	vm_map_lock_read(map);
15928 	assert(map->pmap != kernel_pmap);       /* protect alias access */
15929 
15930 	/*
15931 	 * The madvise semantics require that the address range be fully
15932 	 * allocated with no holes.  Otherwise, we're required to return
15933 	 * an error.
15934 	 */
15935 
15936 	if (!vm_map_range_check(map, start, end, &entry)) {
15937 		vm_map_unlock_read(map);
15938 		vm_page_stats_reusable.reuse_pages_failure++;
15939 		return KERN_INVALID_ADDRESS;
15940 	}
15941 
15942 	/*
15943 	 * Examine each vm_map_entry_t in the range.
15944 	 */
15945 	for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15946 	    entry = entry->vme_next) {
15947 		/*
15948 		 * Sanity check on the VM map entry.
15949 		 */
15950 		if (!vm_map_entry_is_reusable(entry)) {
15951 			vm_map_unlock_read(map);
15952 			vm_page_stats_reusable.reuse_pages_failure++;
15953 			return KERN_INVALID_ADDRESS;
15954 		}
15955 
15956 		/*
15957 		 * The first time through, the start address could be anywhere
15958 		 * within the vm_map_entry we found.  So adjust the offset to
15959 		 * correspond.
15960 		 */
15961 		if (entry->vme_start < start) {
15962 			start_offset = start - entry->vme_start;
15963 		} else {
15964 			start_offset = 0;
15965 		}
15966 		end_offset = MIN(end, entry->vme_end) - entry->vme_start;
15967 		start_offset += VME_OFFSET(entry);
15968 		end_offset += VME_OFFSET(entry);
15969 
15970 		assert(!entry->is_sub_map);
15971 		object = VME_OBJECT(entry);
15972 		if (object != VM_OBJECT_NULL) {
15973 			vm_object_lock(object);
15974 			vm_object_reuse_pages(object, start_offset, end_offset,
15975 			    TRUE);
15976 			vm_object_unlock(object);
15977 		}
15978 
15979 		if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
15980 			/*
15981 			 * XXX
15982 			 * We do not hold the VM map exclusively here.
15983 			 * The "alias" field is not that critical, so it's
15984 			 * safe to update it here, as long as it is the only
15985 			 * one that can be modified while holding the VM map
15986 			 * "shared".
15987 			 */
15988 			VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
15989 		}
15990 	}
15991 
15992 	vm_map_unlock_read(map);
15993 	vm_page_stats_reusable.reuse_pages_success++;
15994 	return KERN_SUCCESS;
15995 }
15996 
15997 
15998 static kern_return_t
vm_map_reusable_pages(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)15999 vm_map_reusable_pages(
16000 	vm_map_t        map,
16001 	vm_map_offset_t start,
16002 	vm_map_offset_t end)
16003 {
16004 	vm_map_entry_t                  entry;
16005 	vm_object_t                     object;
16006 	vm_object_offset_t              start_offset, end_offset;
16007 	vm_map_offset_t                 pmap_offset;
16008 
16009 	if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
16010 		/*
16011 		 * XXX TODO4K
16012 		 * need to figure out what reusable means for a portion
16013 		 * of a native page.
16014 		 */
16015 		return KERN_SUCCESS;
16016 	}
16017 
16018 	/*
16019 	 * The MADV_REUSABLE operation doesn't require any changes to the
16020 	 * vm_map_entry_t's, so the read lock is sufficient.
16021 	 */
16022 
16023 	vm_map_lock_read(map);
16024 	assert(map->pmap != kernel_pmap);       /* protect alias access */
16025 
16026 	/*
16027 	 * The madvise semantics require that the address range be fully
16028 	 * allocated with no holes.  Otherwise, we're required to return
16029 	 * an error.
16030 	 */
16031 
16032 	if (!vm_map_range_check(map, start, end, &entry)) {
16033 		vm_map_unlock_read(map);
16034 		vm_page_stats_reusable.reusable_pages_failure++;
16035 		return KERN_INVALID_ADDRESS;
16036 	}
16037 
16038 	/*
16039 	 * Examine each vm_map_entry_t in the range.
16040 	 */
16041 	for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16042 	    entry = entry->vme_next) {
16043 		int kill_pages = 0;
16044 
16045 		/*
16046 		 * Sanity check on the VM map entry.
16047 		 */
16048 		if (!vm_map_entry_is_reusable(entry)) {
16049 			vm_map_unlock_read(map);
16050 			vm_page_stats_reusable.reusable_pages_failure++;
16051 			return KERN_INVALID_ADDRESS;
16052 		}
16053 
16054 		if (!(entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
16055 			/* not writable: can't discard contents */
16056 			vm_map_unlock_read(map);
16057 			vm_page_stats_reusable.reusable_nonwritable++;
16058 			vm_page_stats_reusable.reusable_pages_failure++;
16059 			return KERN_PROTECTION_FAILURE;
16060 		}
16061 
16062 		/*
16063 		 * The first time through, the start address could be anywhere
16064 		 * within the vm_map_entry we found.  So adjust the offset to
16065 		 * correspond.
16066 		 */
16067 		if (entry->vme_start < start) {
16068 			start_offset = start - entry->vme_start;
16069 			pmap_offset = start;
16070 		} else {
16071 			start_offset = 0;
16072 			pmap_offset = entry->vme_start;
16073 		}
16074 		end_offset = MIN(end, entry->vme_end) - entry->vme_start;
16075 		start_offset += VME_OFFSET(entry);
16076 		end_offset += VME_OFFSET(entry);
16077 
16078 		assert(!entry->is_sub_map);
16079 		object = VME_OBJECT(entry);
16080 		if (object == VM_OBJECT_NULL) {
16081 			continue;
16082 		}
16083 
16084 
16085 		vm_object_lock(object);
16086 		if (((object->ref_count == 1) ||
16087 		    (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
16088 		    object->copy == VM_OBJECT_NULL)) &&
16089 		    object->shadow == VM_OBJECT_NULL &&
16090 		    /*
16091 		     * "iokit_acct" entries are billed for their virtual size
16092 		     * (rather than for their resident pages only), so they
16093 		     * wouldn't benefit from making pages reusable, and it
16094 		     * would be hard to keep track of pages that are both
16095 		     * "iokit_acct" and "reusable" in the pmap stats and
16096 		     * ledgers.
16097 		     */
16098 		    !(entry->iokit_acct ||
16099 		    (!entry->is_sub_map && !entry->use_pmap))) {
16100 			if (object->ref_count != 1) {
16101 				vm_page_stats_reusable.reusable_shared++;
16102 			}
16103 			kill_pages = 1;
16104 		} else {
16105 			kill_pages = -1;
16106 		}
16107 		if (kill_pages != -1) {
16108 			vm_object_deactivate_pages(object,
16109 			    start_offset,
16110 			    end_offset - start_offset,
16111 			    kill_pages,
16112 			    TRUE /*reusable_pages*/,
16113 			    map->pmap,
16114 			    pmap_offset);
16115 		} else {
16116 			vm_page_stats_reusable.reusable_pages_shared++;
16117 			DTRACE_VM4(vm_map_reusable_pages_shared,
16118 			    unsigned int, VME_ALIAS(entry),
16119 			    vm_map_t, map,
16120 			    vm_map_entry_t, entry,
16121 			    vm_object_t, object);
16122 		}
16123 		vm_object_unlock(object);
16124 
16125 		if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
16126 		    VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
16127 			/*
16128 			 * XXX
16129 			 * We do not hold the VM map exclusively here.
16130 			 * The "alias" field is not that critical, so it's
16131 			 * safe to update it here, as long as it is the only
16132 			 * one that can be modified while holding the VM map
16133 			 * "shared".
16134 			 */
16135 			VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
16136 		}
16137 	}
16138 
16139 	vm_map_unlock_read(map);
16140 	vm_page_stats_reusable.reusable_pages_success++;
16141 	return KERN_SUCCESS;
16142 }
16143 
16144 
16145 static kern_return_t
vm_map_can_reuse(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)16146 vm_map_can_reuse(
16147 	vm_map_t        map,
16148 	vm_map_offset_t start,
16149 	vm_map_offset_t end)
16150 {
16151 	vm_map_entry_t                  entry;
16152 
16153 	/*
16154 	 * The MADV_REUSABLE operation doesn't require any changes to the
16155 	 * vm_map_entry_t's, so the read lock is sufficient.
16156 	 */
16157 
16158 	vm_map_lock_read(map);
16159 	assert(map->pmap != kernel_pmap);       /* protect alias access */
16160 
16161 	/*
16162 	 * The madvise semantics require that the address range be fully
16163 	 * allocated with no holes.  Otherwise, we're required to return
16164 	 * an error.
16165 	 */
16166 
16167 	if (!vm_map_range_check(map, start, end, &entry)) {
16168 		vm_map_unlock_read(map);
16169 		vm_page_stats_reusable.can_reuse_failure++;
16170 		return KERN_INVALID_ADDRESS;
16171 	}
16172 
16173 	/*
16174 	 * Examine each vm_map_entry_t in the range.
16175 	 */
16176 	for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16177 	    entry = entry->vme_next) {
16178 		/*
16179 		 * Sanity check on the VM map entry.
16180 		 */
16181 		if (!vm_map_entry_is_reusable(entry)) {
16182 			vm_map_unlock_read(map);
16183 			vm_page_stats_reusable.can_reuse_failure++;
16184 			return KERN_INVALID_ADDRESS;
16185 		}
16186 	}
16187 
16188 	vm_map_unlock_read(map);
16189 	vm_page_stats_reusable.can_reuse_success++;
16190 	return KERN_SUCCESS;
16191 }
16192 
16193 
16194 #if MACH_ASSERT
16195 static kern_return_t
vm_map_pageout(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)16196 vm_map_pageout(
16197 	vm_map_t        map,
16198 	vm_map_offset_t start,
16199 	vm_map_offset_t end)
16200 {
16201 	vm_map_entry_t                  entry;
16202 
16203 	/*
16204 	 * The MADV_PAGEOUT operation doesn't require any changes to the
16205 	 * vm_map_entry_t's, so the read lock is sufficient.
16206 	 */
16207 
16208 	vm_map_lock_read(map);
16209 
16210 	/*
16211 	 * The madvise semantics require that the address range be fully
16212 	 * allocated with no holes.  Otherwise, we're required to return
16213 	 * an error.
16214 	 */
16215 
16216 	if (!vm_map_range_check(map, start, end, &entry)) {
16217 		vm_map_unlock_read(map);
16218 		return KERN_INVALID_ADDRESS;
16219 	}
16220 
16221 	/*
16222 	 * Examine each vm_map_entry_t in the range.
16223 	 */
16224 	for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16225 	    entry = entry->vme_next) {
16226 		vm_object_t     object;
16227 
16228 		/*
16229 		 * Sanity check on the VM map entry.
16230 		 */
16231 		if (entry->is_sub_map) {
16232 			vm_map_t submap;
16233 			vm_map_offset_t submap_start;
16234 			vm_map_offset_t submap_end;
16235 			vm_map_entry_t submap_entry;
16236 
16237 			submap = VME_SUBMAP(entry);
16238 			submap_start = VME_OFFSET(entry);
16239 			submap_end = submap_start + (entry->vme_end -
16240 			    entry->vme_start);
16241 
16242 			vm_map_lock_read(submap);
16243 
16244 			if (!vm_map_range_check(submap,
16245 			    submap_start,
16246 			    submap_end,
16247 			    &submap_entry)) {
16248 				vm_map_unlock_read(submap);
16249 				vm_map_unlock_read(map);
16250 				return KERN_INVALID_ADDRESS;
16251 			}
16252 
16253 			object = VME_OBJECT(submap_entry);
16254 			if (submap_entry->is_sub_map ||
16255 			    object == VM_OBJECT_NULL ||
16256 			    !object->internal) {
16257 				vm_map_unlock_read(submap);
16258 				continue;
16259 			}
16260 
16261 			vm_object_pageout(object);
16262 
16263 			vm_map_unlock_read(submap);
16264 			submap = VM_MAP_NULL;
16265 			submap_entry = VM_MAP_ENTRY_NULL;
16266 			continue;
16267 		}
16268 
16269 		object = VME_OBJECT(entry);
16270 		if (entry->is_sub_map ||
16271 		    object == VM_OBJECT_NULL ||
16272 		    !object->internal) {
16273 			continue;
16274 		}
16275 
16276 		vm_object_pageout(object);
16277 	}
16278 
16279 	vm_map_unlock_read(map);
16280 	return KERN_SUCCESS;
16281 }
16282 #endif /* MACH_ASSERT */
16283 
16284 
16285 /*
16286  *	Routine:	vm_map_entry_insert
16287  *
16288  *	Description:	This routine inserts a new vm_entry in a locked map.
16289  */
16290 vm_map_entry_t
vm_map_entry_insert(vm_map_t map,vm_map_entry_t insp_entry,vm_map_offset_t start,vm_map_offset_t end,vm_object_t object,vm_object_offset_t offset,vm_map_kernel_flags_t vmk_flags,boolean_t needs_copy,boolean_t is_shared,boolean_t in_transition,vm_prot_t cur_protection,vm_prot_t max_protection,vm_behavior_t behavior,vm_inherit_t inheritance,unsigned short wired_count,boolean_t no_cache,boolean_t permanent,boolean_t no_copy_on_read,unsigned int superpage_size,boolean_t clear_map_aligned,boolean_t is_submap,boolean_t used_for_jit,int alias,boolean_t translated_allow_execute)16291 vm_map_entry_insert(
16292 	vm_map_t                map,
16293 	vm_map_entry_t          insp_entry,
16294 	vm_map_offset_t         start,
16295 	vm_map_offset_t         end,
16296 	vm_object_t             object,
16297 	vm_object_offset_t      offset,
16298 	vm_map_kernel_flags_t   vmk_flags,
16299 	boolean_t               needs_copy,
16300 	boolean_t               is_shared,
16301 	boolean_t               in_transition,
16302 	vm_prot_t               cur_protection,
16303 	vm_prot_t               max_protection,
16304 	vm_behavior_t           behavior,
16305 	vm_inherit_t            inheritance,
16306 	unsigned short          wired_count,
16307 	boolean_t               no_cache,
16308 	boolean_t               permanent,
16309 	boolean_t               no_copy_on_read,
16310 	unsigned int            superpage_size,
16311 	boolean_t               clear_map_aligned,
16312 	boolean_t               is_submap,
16313 	boolean_t               used_for_jit,
16314 	int                     alias,
16315 	boolean_t               translated_allow_execute)
16316 {
16317 	vm_map_entry_t  new_entry;
16318 
16319 	assert(insp_entry != (vm_map_entry_t)0);
16320 	vm_map_lock_assert_exclusive(map);
16321 
16322 #if DEVELOPMENT || DEBUG
16323 	vm_object_offset_t      end_offset = 0;
16324 	assertf(!os_add_overflow(end - start, offset, &end_offset), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end - start), offset);
16325 #endif /* DEVELOPMENT || DEBUG */
16326 
16327 	new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
16328 
16329 	if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
16330 		new_entry->map_aligned = TRUE;
16331 	} else {
16332 		new_entry->map_aligned = FALSE;
16333 	}
16334 	if (clear_map_aligned &&
16335 	    (!VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
16336 	    !VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
16337 		new_entry->map_aligned = FALSE;
16338 	}
16339 
16340 	new_entry->vme_start = start;
16341 	new_entry->vme_end = end;
16342 	if (new_entry->map_aligned) {
16343 		assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
16344 		    VM_MAP_PAGE_MASK(map)));
16345 		assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
16346 		    VM_MAP_PAGE_MASK(map)));
16347 	} else {
16348 		assert(page_aligned(new_entry->vme_start));
16349 		assert(page_aligned(new_entry->vme_end));
16350 	}
16351 	assert(new_entry->vme_start < new_entry->vme_end);
16352 
16353 	VME_OBJECT_SET(new_entry, object);
16354 	VME_OFFSET_SET(new_entry, offset);
16355 	new_entry->is_shared = is_shared;
16356 	new_entry->is_sub_map = is_submap;
16357 	new_entry->needs_copy = needs_copy;
16358 	new_entry->in_transition = in_transition;
16359 	new_entry->needs_wakeup = FALSE;
16360 	new_entry->inheritance = inheritance;
16361 	new_entry->protection = cur_protection;
16362 	new_entry->max_protection = max_protection;
16363 	new_entry->behavior = behavior;
16364 	new_entry->wired_count = wired_count;
16365 	new_entry->user_wired_count = 0;
16366 	if (is_submap) {
16367 		/*
16368 		 * submap: "use_pmap" means "nested".
16369 		 * default: false.
16370 		 */
16371 		new_entry->use_pmap = FALSE;
16372 	} else {
16373 		/*
16374 		 * object: "use_pmap" means "use pmap accounting" for footprint.
16375 		 * default: true.
16376 		 */
16377 		new_entry->use_pmap = TRUE;
16378 	}
16379 	VME_ALIAS_SET(new_entry, alias);
16380 	new_entry->zero_wired_pages = FALSE;
16381 	new_entry->no_cache = no_cache;
16382 	new_entry->permanent = permanent;
16383 	if (superpage_size) {
16384 		new_entry->superpage_size = TRUE;
16385 	} else {
16386 		new_entry->superpage_size = FALSE;
16387 	}
16388 	if (used_for_jit) {
16389 		if (!(map->jit_entry_exists) ||
16390 		    VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map)) {
16391 			new_entry->used_for_jit = TRUE;
16392 			map->jit_entry_exists = TRUE;
16393 		}
16394 	} else {
16395 		new_entry->used_for_jit = FALSE;
16396 	}
16397 	if (translated_allow_execute) {
16398 		new_entry->translated_allow_execute = TRUE;
16399 	} else {
16400 		new_entry->translated_allow_execute = FALSE;
16401 	}
16402 	new_entry->pmap_cs_associated = FALSE;
16403 	new_entry->iokit_acct = FALSE;
16404 	new_entry->vme_resilient_codesign = FALSE;
16405 	new_entry->vme_resilient_media = FALSE;
16406 	new_entry->vme_atomic = FALSE;
16407 	new_entry->vme_no_copy_on_read = no_copy_on_read;
16408 
16409 	/*
16410 	 *	Insert the new entry into the list.
16411 	 */
16412 
16413 	vm_map_store_entry_link(map, insp_entry, new_entry, vmk_flags);
16414 	map->size += end - start;
16415 
16416 	/*
16417 	 *	Update the free space hint and the lookup hint.
16418 	 */
16419 
16420 	SAVE_HINT_MAP_WRITE(map, new_entry);
16421 	return new_entry;
16422 }
16423 
16424 /*
16425  *	Routine:	vm_map_remap_extract
16426  *
16427  *	Description:	This routine returns a vm_entry list from a map.
16428  */
16429 static kern_return_t
vm_map_remap_extract(vm_map_t map,vm_map_offset_t addr,vm_map_size_t size,boolean_t copy,struct vm_map_header * map_header,vm_prot_t * cur_protection,vm_prot_t * max_protection,vm_inherit_t inheritance,vm_map_kernel_flags_t vmk_flags)16430 vm_map_remap_extract(
16431 	vm_map_t                map,
16432 	vm_map_offset_t         addr,
16433 	vm_map_size_t           size,
16434 	boolean_t               copy,
16435 	struct vm_map_header    *map_header,
16436 	vm_prot_t               *cur_protection,   /* IN/OUT */
16437 	vm_prot_t               *max_protection,   /* IN/OUT */
16438 	/* What, no behavior? */
16439 	vm_inherit_t            inheritance,
16440 	vm_map_kernel_flags_t   vmk_flags)
16441 {
16442 	kern_return_t           result;
16443 	vm_map_size_t           mapped_size;
16444 	vm_map_size_t           tmp_size;
16445 	vm_map_entry_t          src_entry;     /* result of last map lookup */
16446 	vm_map_entry_t          new_entry;
16447 	vm_object_offset_t      offset;
16448 	vm_map_offset_t         map_address;
16449 	vm_map_offset_t         src_start;     /* start of entry to map */
16450 	vm_map_offset_t         src_end;       /* end of region to be mapped */
16451 	vm_object_t             object;
16452 	vm_map_version_t        version;
16453 	boolean_t               src_needs_copy;
16454 	boolean_t               new_entry_needs_copy;
16455 	vm_map_entry_t          saved_src_entry;
16456 	boolean_t               src_entry_was_wired;
16457 	vm_prot_t               max_prot_for_prot_copy;
16458 	vm_map_offset_t         effective_page_mask;
16459 	boolean_t               pageable, same_map;
16460 	boolean_t               vm_remap_legacy;
16461 	vm_prot_t               required_cur_prot, required_max_prot;
16462 	vm_object_t             new_copy_object;     /* vm_object_copy_* result */
16463 	boolean_t               saved_used_for_jit;     /* Saved used_for_jit. */
16464 
16465 	pageable = vmk_flags.vmkf_copy_pageable;
16466 	same_map = vmk_flags.vmkf_copy_same_map;
16467 
16468 	effective_page_mask = MIN(PAGE_MASK, VM_MAP_PAGE_MASK(map));
16469 
16470 	assert(map != VM_MAP_NULL);
16471 	assert(size != 0);
16472 	assert(size == vm_map_round_page(size, effective_page_mask));
16473 	assert(inheritance == VM_INHERIT_NONE ||
16474 	    inheritance == VM_INHERIT_COPY ||
16475 	    inheritance == VM_INHERIT_SHARE);
16476 	assert(!(*cur_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)));
16477 	assert(!(*max_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)));
16478 	assert((*cur_protection & *max_protection) == *cur_protection);
16479 
16480 	/*
16481 	 *	Compute start and end of region.
16482 	 */
16483 	src_start = vm_map_trunc_page(addr, effective_page_mask);
16484 	src_end = vm_map_round_page(src_start + size, effective_page_mask);
16485 
16486 	/*
16487 	 *	Initialize map_header.
16488 	 */
16489 	map_header->links.next = CAST_TO_VM_MAP_ENTRY(&map_header->links);
16490 	map_header->links.prev = CAST_TO_VM_MAP_ENTRY(&map_header->links);
16491 	map_header->nentries = 0;
16492 	map_header->entries_pageable = pageable;
16493 //	map_header->page_shift = MIN(VM_MAP_PAGE_SHIFT(map), PAGE_SHIFT);
16494 	map_header->page_shift = (uint16_t)VM_MAP_PAGE_SHIFT(map);
16495 	map_header->rb_head_store.rbh_root = (void *)(int)SKIP_RB_TREE;
16496 
16497 	vm_map_store_init( map_header );
16498 
16499 	if (copy && vmk_flags.vmkf_remap_prot_copy) {
16500 		/*
16501 		 * Special case for vm_map_protect(VM_PROT_COPY):
16502 		 * we want to set the new mappings' max protection to the
16503 		 * specified *max_protection...
16504 		 */
16505 		max_prot_for_prot_copy = *max_protection & (VM_PROT_ALL | VM_PROT_ALLEXEC);
16506 		/* ... but we want to use the vm_remap() legacy mode */
16507 		*max_protection = VM_PROT_NONE;
16508 		*cur_protection = VM_PROT_NONE;
16509 	} else {
16510 		max_prot_for_prot_copy = VM_PROT_NONE;
16511 	}
16512 
16513 	if (*cur_protection == VM_PROT_NONE &&
16514 	    *max_protection == VM_PROT_NONE) {
16515 		/*
16516 		 * vm_remap() legacy mode:
16517 		 * Extract all memory regions in the specified range and
16518 		 * collect the strictest set of protections allowed on the
16519 		 * entire range, so the caller knows what they can do with
16520 		 * the remapped range.
16521 		 * We start with VM_PROT_ALL and we'll remove the protections
16522 		 * missing from each memory region.
16523 		 */
16524 		vm_remap_legacy = TRUE;
16525 		*cur_protection = VM_PROT_ALL;
16526 		*max_protection = VM_PROT_ALL;
16527 		required_cur_prot = VM_PROT_NONE;
16528 		required_max_prot = VM_PROT_NONE;
16529 	} else {
16530 		/*
16531 		 * vm_remap_new() mode:
16532 		 * Extract all memory regions in the specified range and
16533 		 * ensure that they have at least the protections specified
16534 		 * by the caller via *cur_protection and *max_protection.
16535 		 * The resulting mapping should have these protections.
16536 		 */
16537 		vm_remap_legacy = FALSE;
16538 		if (copy) {
16539 			required_cur_prot = VM_PROT_NONE;
16540 			required_max_prot = VM_PROT_READ;
16541 		} else {
16542 			required_cur_prot = *cur_protection;
16543 			required_max_prot = *max_protection;
16544 		}
16545 	}
16546 
16547 	map_address = 0;
16548 	mapped_size = 0;
16549 	result = KERN_SUCCESS;
16550 
16551 	/*
16552 	 *	The specified source virtual space might correspond to
16553 	 *	multiple map entries, need to loop on them.
16554 	 */
16555 	vm_map_lock(map);
16556 	if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
16557 		/*
16558 		 * This address space uses sub-pages so the range might
16559 		 * not be re-mappable in an address space with larger
16560 		 * pages. Re-assemble any broken-up VM map entries to
16561 		 * improve our chances of making it work.
16562 		 */
16563 		vm_map_simplify_range(map, src_start, src_end);
16564 	}
16565 	while (mapped_size != size) {
16566 		vm_map_size_t   entry_size;
16567 
16568 		/*
16569 		 *	Find the beginning of the region.
16570 		 */
16571 		if (!vm_map_lookup_entry(map, src_start, &src_entry)) {
16572 			result = KERN_INVALID_ADDRESS;
16573 			break;
16574 		}
16575 
16576 		if (src_start < src_entry->vme_start ||
16577 		    (mapped_size && src_start != src_entry->vme_start)) {
16578 			result = KERN_INVALID_ADDRESS;
16579 			break;
16580 		}
16581 
16582 		tmp_size = size - mapped_size;
16583 		if (src_end > src_entry->vme_end) {
16584 			tmp_size -= (src_end - src_entry->vme_end);
16585 		}
16586 
16587 		entry_size = (vm_map_size_t)(src_entry->vme_end -
16588 		    src_entry->vme_start);
16589 
16590 		if (src_entry->is_sub_map &&
16591 		    vmk_flags.vmkf_copy_single_object) {
16592 			vm_map_t submap;
16593 			vm_map_offset_t submap_start;
16594 			vm_map_size_t submap_size;
16595 			boolean_t submap_needs_copy;
16596 
16597 			/*
16598 			 * No check for "required protection" on "src_entry"
16599 			 * because the protections that matter are the ones
16600 			 * on the submap's VM map entry, which will be checked
16601 			 * during the call to vm_map_remap_extract() below.
16602 			 */
16603 			submap_size = src_entry->vme_end - src_start;
16604 			if (submap_size > size) {
16605 				submap_size = size;
16606 			}
16607 			submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
16608 			submap = VME_SUBMAP(src_entry);
16609 			if (copy) {
16610 				/*
16611 				 * The caller wants a copy-on-write re-mapping,
16612 				 * so let's extract from the submap accordingly.
16613 				 */
16614 				submap_needs_copy = TRUE;
16615 			} else if (src_entry->needs_copy) {
16616 				/*
16617 				 * The caller wants a shared re-mapping but the
16618 				 * submap is mapped with "needs_copy", so its
16619 				 * contents can't be shared as is. Extract the
16620 				 * contents of the submap as "copy-on-write".
16621 				 * The re-mapping won't be shared with the
16622 				 * original mapping but this is equivalent to
16623 				 * what happened with the original "remap from
16624 				 * submap" code.
16625 				 * The shared region is mapped "needs_copy", for
16626 				 * example.
16627 				 */
16628 				submap_needs_copy = TRUE;
16629 			} else {
16630 				/*
16631 				 * The caller wants a shared re-mapping and
16632 				 * this mapping can be shared (no "needs_copy"),
16633 				 * so let's extract from the submap accordingly.
16634 				 * Kernel submaps are mapped without
16635 				 * "needs_copy", for example.
16636 				 */
16637 				submap_needs_copy = FALSE;
16638 			}
16639 			vm_map_reference(submap);
16640 			vm_map_unlock(map);
16641 			src_entry = NULL;
16642 			if (vm_remap_legacy) {
16643 				*cur_protection = VM_PROT_NONE;
16644 				*max_protection = VM_PROT_NONE;
16645 			}
16646 
16647 			DTRACE_VM7(remap_submap_recurse,
16648 			    vm_map_t, map,
16649 			    vm_map_offset_t, addr,
16650 			    vm_map_size_t, size,
16651 			    boolean_t, copy,
16652 			    vm_map_offset_t, submap_start,
16653 			    vm_map_size_t, submap_size,
16654 			    boolean_t, submap_needs_copy);
16655 
16656 			result = vm_map_remap_extract(submap,
16657 			    submap_start,
16658 			    submap_size,
16659 			    submap_needs_copy,
16660 			    map_header,
16661 			    cur_protection,
16662 			    max_protection,
16663 			    inheritance,
16664 			    vmk_flags);
16665 			vm_map_deallocate(submap);
16666 			return result;
16667 		}
16668 
16669 		if (src_entry->is_sub_map) {
16670 			/* protections for submap mapping are irrelevant here */
16671 		} else if (((src_entry->protection & required_cur_prot) !=
16672 		    required_cur_prot) ||
16673 		    ((src_entry->max_protection & required_max_prot) !=
16674 		    required_max_prot)) {
16675 			if (vmk_flags.vmkf_copy_single_object &&
16676 			    mapped_size != 0) {
16677 				/*
16678 				 * Single object extraction.
16679 				 * We can't extract more with the required
16680 				 * protection but we've extracted some, so
16681 				 * stop there and declare success.
16682 				 * The caller should check the size of
16683 				 * the copy entry we've extracted.
16684 				 */
16685 				result = KERN_SUCCESS;
16686 			} else {
16687 				/*
16688 				 * VM range extraction.
16689 				 * Required proctection is not available
16690 				 * for this part of the range: fail.
16691 				 */
16692 				result = KERN_PROTECTION_FAILURE;
16693 			}
16694 			break;
16695 		}
16696 
16697 		if (src_entry->is_sub_map) {
16698 			vm_map_t submap;
16699 			vm_map_offset_t submap_start;
16700 			vm_map_size_t submap_size;
16701 			vm_map_copy_t submap_copy;
16702 			vm_prot_t submap_curprot, submap_maxprot;
16703 			boolean_t submap_needs_copy;
16704 
16705 			/*
16706 			 * No check for "required protection" on "src_entry"
16707 			 * because the protections that matter are the ones
16708 			 * on the submap's VM map entry, which will be checked
16709 			 * during the call to vm_map_copy_extract() below.
16710 			 */
16711 			object = VM_OBJECT_NULL;
16712 			submap_copy = VM_MAP_COPY_NULL;
16713 
16714 			/* find equivalent range in the submap */
16715 			submap = VME_SUBMAP(src_entry);
16716 			submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
16717 			submap_size = tmp_size;
16718 			if (copy) {
16719 				/*
16720 				 * The caller wants a copy-on-write re-mapping,
16721 				 * so let's extract from the submap accordingly.
16722 				 */
16723 				submap_needs_copy = TRUE;
16724 			} else if (src_entry->needs_copy) {
16725 				/*
16726 				 * The caller wants a shared re-mapping but the
16727 				 * submap is mapped with "needs_copy", so its
16728 				 * contents can't be shared as is. Extract the
16729 				 * contents of the submap as "copy-on-write".
16730 				 * The re-mapping won't be shared with the
16731 				 * original mapping but this is equivalent to
16732 				 * what happened with the original "remap from
16733 				 * submap" code.
16734 				 * The shared region is mapped "needs_copy", for
16735 				 * example.
16736 				 */
16737 				submap_needs_copy = TRUE;
16738 			} else {
16739 				/*
16740 				 * The caller wants a shared re-mapping and
16741 				 * this mapping can be shared (no "needs_copy"),
16742 				 * so let's extract from the submap accordingly.
16743 				 * Kernel submaps are mapped without
16744 				 * "needs_copy", for example.
16745 				 */
16746 				submap_needs_copy = FALSE;
16747 			}
16748 			/* extra ref to keep submap alive */
16749 			vm_map_reference(submap);
16750 
16751 			DTRACE_VM7(remap_submap_recurse,
16752 			    vm_map_t, map,
16753 			    vm_map_offset_t, addr,
16754 			    vm_map_size_t, size,
16755 			    boolean_t, copy,
16756 			    vm_map_offset_t, submap_start,
16757 			    vm_map_size_t, submap_size,
16758 			    boolean_t, submap_needs_copy);
16759 
16760 			/*
16761 			 * The map can be safely unlocked since we
16762 			 * already hold a reference on the submap.
16763 			 *
16764 			 * No timestamp since we don't care if the map
16765 			 * gets modified while we're down in the submap.
16766 			 * We'll resume the extraction at src_start + tmp_size
16767 			 * anyway.
16768 			 */
16769 			vm_map_unlock(map);
16770 			src_entry = NULL; /* not valid once map is unlocked */
16771 
16772 			if (vm_remap_legacy) {
16773 				submap_curprot = VM_PROT_NONE;
16774 				submap_maxprot = VM_PROT_NONE;
16775 				if (max_prot_for_prot_copy) {
16776 					submap_maxprot = max_prot_for_prot_copy;
16777 				}
16778 			} else {
16779 				assert(!max_prot_for_prot_copy);
16780 				submap_curprot = *cur_protection;
16781 				submap_maxprot = *max_protection;
16782 			}
16783 			result = vm_map_copy_extract(submap,
16784 			    submap_start,
16785 			    submap_size,
16786 			    submap_needs_copy,
16787 			    &submap_copy,
16788 			    &submap_curprot,
16789 			    &submap_maxprot,
16790 			    inheritance,
16791 			    vmk_flags);
16792 
16793 			/* release extra ref on submap */
16794 			vm_map_deallocate(submap);
16795 			submap = VM_MAP_NULL;
16796 
16797 			if (result != KERN_SUCCESS) {
16798 				vm_map_lock(map);
16799 				break;
16800 			}
16801 
16802 			/* transfer submap_copy entries to map_header */
16803 			while (vm_map_copy_first_entry(submap_copy) !=
16804 			    vm_map_copy_to_entry(submap_copy)) {
16805 				vm_map_entry_t copy_entry;
16806 				vm_map_size_t copy_entry_size;
16807 
16808 				copy_entry = vm_map_copy_first_entry(submap_copy);
16809 				assert(!copy_entry->is_sub_map);
16810 				object = VME_OBJECT(copy_entry);
16811 
16812 				/*
16813 				 * Prevent kernel_object from being exposed to
16814 				 * user space.
16815 				 */
16816 				if (__improbable(object == kernel_object)) {
16817 					printf("%d[%s]: rejecting attempt to extract from kernel_object\n",
16818 					    proc_selfpid(),
16819 					    (current_task()->bsd_info
16820 					    ? proc_name_address(current_task()->bsd_info)
16821 					    : "?"));
16822 					DTRACE_VM(extract_kernel_only);
16823 					result = KERN_INVALID_RIGHT;
16824 					vm_map_copy_discard(submap_copy);
16825 					submap_copy = VM_MAP_COPY_NULL;
16826 					vm_map_lock(map);
16827 					break;
16828 				}
16829 
16830 				vm_map_copy_entry_unlink(submap_copy, copy_entry);
16831 				copy_entry_size = copy_entry->vme_end - copy_entry->vme_start;
16832 				copy_entry->vme_start = map_address;
16833 				copy_entry->vme_end = map_address + copy_entry_size;
16834 				map_address += copy_entry_size;
16835 				mapped_size += copy_entry_size;
16836 				src_start += copy_entry_size;
16837 				assert(src_start <= src_end);
16838 				_vm_map_store_entry_link(map_header,
16839 				    map_header->links.prev,
16840 				    copy_entry);
16841 			}
16842 			/* done with submap_copy */
16843 			vm_map_copy_discard(submap_copy);
16844 
16845 			if (vm_remap_legacy) {
16846 				*cur_protection &= submap_curprot;
16847 				*max_protection &= submap_maxprot;
16848 			}
16849 
16850 			/* re-acquire the map lock and continue to next entry */
16851 			vm_map_lock(map);
16852 			continue;
16853 		} else {
16854 			object = VME_OBJECT(src_entry);
16855 
16856 			/*
16857 			 * Prevent kernel_object from being exposed to
16858 			 * user space.
16859 			 */
16860 			if (__improbable(object == kernel_object)) {
16861 				printf("%d[%s]: rejecting attempt to extract from kernel_object\n",
16862 				    proc_selfpid(),
16863 				    (current_task()->bsd_info
16864 				    ? proc_name_address(current_task()->bsd_info)
16865 				    : "?"));
16866 				DTRACE_VM(extract_kernel_only);
16867 				result = KERN_INVALID_RIGHT;
16868 				break;
16869 			}
16870 
16871 			if (src_entry->iokit_acct) {
16872 				/*
16873 				 * This entry uses "IOKit accounting".
16874 				 */
16875 			} else if (object != VM_OBJECT_NULL &&
16876 			    (object->purgable != VM_PURGABLE_DENY ||
16877 			    object->vo_ledger_tag != VM_LEDGER_TAG_NONE)) {
16878 				/*
16879 				 * Purgeable objects have their own accounting:
16880 				 * no pmap accounting for them.
16881 				 */
16882 				assertf(!src_entry->use_pmap,
16883 				    "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
16884 				    map,
16885 				    src_entry,
16886 				    (uint64_t)src_entry->vme_start,
16887 				    (uint64_t)src_entry->vme_end,
16888 				    src_entry->protection,
16889 				    src_entry->max_protection,
16890 				    VME_ALIAS(src_entry));
16891 			} else {
16892 				/*
16893 				 * Not IOKit or purgeable:
16894 				 * must be accounted by pmap stats.
16895 				 */
16896 				assertf(src_entry->use_pmap,
16897 				    "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
16898 				    map,
16899 				    src_entry,
16900 				    (uint64_t)src_entry->vme_start,
16901 				    (uint64_t)src_entry->vme_end,
16902 				    src_entry->protection,
16903 				    src_entry->max_protection,
16904 				    VME_ALIAS(src_entry));
16905 			}
16906 
16907 			if (object == VM_OBJECT_NULL) {
16908 				assert(!src_entry->needs_copy);
16909 				object = vm_object_allocate(entry_size);
16910 				VME_OFFSET_SET(src_entry, 0);
16911 				VME_OBJECT_SET(src_entry, object);
16912 				assert(src_entry->use_pmap);
16913 				assert(!map->mapped_in_other_pmaps);
16914 			} else if (src_entry->wired_count ||
16915 			    object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
16916 				/*
16917 				 * A wired memory region should not have
16918 				 * any pending copy-on-write and needs to
16919 				 * keep pointing at the VM object that
16920 				 * contains the wired pages.
16921 				 * If we're sharing this memory (copy=false),
16922 				 * we'll share this VM object.
16923 				 * If we're copying this memory (copy=true),
16924 				 * we'll call vm_object_copy_slowly() below
16925 				 * and use the new VM object for the remapping.
16926 				 *
16927 				 * Or, we are already using an asymmetric
16928 				 * copy, and therefore we already have
16929 				 * the right object.
16930 				 */
16931 				assert(!src_entry->needs_copy);
16932 			} else if (src_entry->needs_copy || object->shadowed ||
16933 			    (object->internal && !object->true_share &&
16934 			    !src_entry->is_shared &&
16935 			    object->vo_size > entry_size)) {
16936 				VME_OBJECT_SHADOW(src_entry, entry_size);
16937 				assert(src_entry->use_pmap);
16938 
16939 				if (!src_entry->needs_copy &&
16940 				    (src_entry->protection & VM_PROT_WRITE)) {
16941 					vm_prot_t prot;
16942 
16943 					assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
16944 
16945 					prot = src_entry->protection & ~VM_PROT_WRITE;
16946 
16947 					if (override_nx(map,
16948 					    VME_ALIAS(src_entry))
16949 					    && prot) {
16950 						prot |= VM_PROT_EXECUTE;
16951 					}
16952 
16953 					assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
16954 
16955 					if (map->mapped_in_other_pmaps) {
16956 						vm_object_pmap_protect(
16957 							VME_OBJECT(src_entry),
16958 							VME_OFFSET(src_entry),
16959 							entry_size,
16960 							PMAP_NULL,
16961 							PAGE_SIZE,
16962 							src_entry->vme_start,
16963 							prot);
16964 #if MACH_ASSERT
16965 					} else if (__improbable(map->pmap == PMAP_NULL)) {
16966 						extern boolean_t vm_tests_in_progress;
16967 						assert(vm_tests_in_progress);
16968 						/*
16969 						 * Some VM tests (in vm_tests.c)
16970 						 * sometimes want to use a VM
16971 						 * map without a pmap.
16972 						 * Otherwise, this should never
16973 						 * happen.
16974 						 */
16975 #endif /* MACH_ASSERT */
16976 					} else {
16977 						pmap_protect(vm_map_pmap(map),
16978 						    src_entry->vme_start,
16979 						    src_entry->vme_end,
16980 						    prot);
16981 					}
16982 				}
16983 
16984 				object = VME_OBJECT(src_entry);
16985 				src_entry->needs_copy = FALSE;
16986 			}
16987 
16988 
16989 			vm_object_lock(object);
16990 			vm_object_reference_locked(object); /* object ref. for new entry */
16991 			assert(!src_entry->needs_copy);
16992 			if (object->copy_strategy ==
16993 			    MEMORY_OBJECT_COPY_SYMMETRIC) {
16994 				/*
16995 				 * If we want to share this object (copy==0),
16996 				 * it needs to be COPY_DELAY.
16997 				 * If we want to copy this object (copy==1),
16998 				 * we can't just set "needs_copy" on our side
16999 				 * and expect the other side to do the same
17000 				 * (symmetrically), so we can't let the object
17001 				 * stay COPY_SYMMETRIC.
17002 				 * So we always switch from COPY_SYMMETRIC to
17003 				 * COPY_DELAY.
17004 				 */
17005 				object->copy_strategy =
17006 				    MEMORY_OBJECT_COPY_DELAY;
17007 				object->true_share = TRUE;
17008 			}
17009 			vm_object_unlock(object);
17010 		}
17011 
17012 		offset = (VME_OFFSET(src_entry) +
17013 		    (src_start - src_entry->vme_start));
17014 
17015 		new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
17016 		vm_map_entry_copy(map, new_entry, src_entry);
17017 		if (new_entry->is_sub_map) {
17018 			/* clr address space specifics */
17019 			new_entry->use_pmap = FALSE;
17020 		} else if (copy) {
17021 			/*
17022 			 * We're dealing with a copy-on-write operation,
17023 			 * so the resulting mapping should not inherit the
17024 			 * original mapping's accounting settings.
17025 			 * "use_pmap" should be reset to its default (TRUE)
17026 			 * so that the new mapping gets accounted for in
17027 			 * the task's memory footprint.
17028 			 */
17029 			new_entry->use_pmap = TRUE;
17030 		}
17031 		/* "iokit_acct" was cleared in vm_map_entry_copy() */
17032 		assert(!new_entry->iokit_acct);
17033 
17034 		new_entry->map_aligned = FALSE;
17035 
17036 		new_entry->vme_start = map_address;
17037 		new_entry->vme_end = map_address + tmp_size;
17038 		assert(new_entry->vme_start < new_entry->vme_end);
17039 		if (copy && vmk_flags.vmkf_remap_prot_copy) {
17040 			/*
17041 			 * Remapping for vm_map_protect(VM_PROT_COPY)
17042 			 * to convert a read-only mapping into a
17043 			 * copy-on-write version of itself but
17044 			 * with write access:
17045 			 * keep the original inheritance and add
17046 			 * VM_PROT_WRITE to the max protection.
17047 			 */
17048 			new_entry->inheritance = src_entry->inheritance;
17049 			new_entry->protection &= max_prot_for_prot_copy;
17050 			new_entry->max_protection |= VM_PROT_WRITE;
17051 		} else {
17052 			new_entry->inheritance = inheritance;
17053 			if (!vm_remap_legacy) {
17054 				new_entry->protection = *cur_protection;
17055 				new_entry->max_protection = *max_protection;
17056 			}
17057 		}
17058 		VME_OFFSET_SET(new_entry, offset);
17059 
17060 		/*
17061 		 * The new region has to be copied now if required.
17062 		 */
17063 RestartCopy:
17064 		if (!copy) {
17065 			if (src_entry->used_for_jit == TRUE) {
17066 				if (same_map) {
17067 				} else if (!VM_MAP_POLICY_ALLOW_JIT_SHARING(map)) {
17068 					/*
17069 					 * Cannot allow an entry describing a JIT
17070 					 * region to be shared across address spaces.
17071 					 */
17072 					result = KERN_INVALID_ARGUMENT;
17073 					vm_object_deallocate(object);
17074 					_vm_map_entry_dispose(map_header, new_entry);
17075 					new_entry = VM_MAP_ENTRY_NULL;
17076 					break;
17077 				}
17078 			}
17079 
17080 			src_entry->is_shared = TRUE;
17081 			new_entry->is_shared = TRUE;
17082 			if (!(new_entry->is_sub_map)) {
17083 				new_entry->needs_copy = FALSE;
17084 			}
17085 		} else if (src_entry->is_sub_map) {
17086 			/* make this a COW sub_map if not already */
17087 			assert(new_entry->wired_count == 0);
17088 			new_entry->needs_copy = TRUE;
17089 			object = VM_OBJECT_NULL;
17090 		} else if (src_entry->wired_count == 0 &&
17091 		    !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) &&
17092 		    vm_object_copy_quickly(VME_OBJECT(new_entry),
17093 		    VME_OFFSET(new_entry),
17094 		    (new_entry->vme_end -
17095 		    new_entry->vme_start),
17096 		    &src_needs_copy,
17097 		    &new_entry_needs_copy)) {
17098 			new_entry->needs_copy = new_entry_needs_copy;
17099 			new_entry->is_shared = FALSE;
17100 			assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
17101 
17102 			/*
17103 			 * Handle copy_on_write semantics.
17104 			 */
17105 			if (src_needs_copy && !src_entry->needs_copy) {
17106 				vm_prot_t prot;
17107 
17108 				assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
17109 
17110 				prot = src_entry->protection & ~VM_PROT_WRITE;
17111 
17112 				if (override_nx(map,
17113 				    VME_ALIAS(src_entry))
17114 				    && prot) {
17115 					prot |= VM_PROT_EXECUTE;
17116 				}
17117 
17118 				assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
17119 
17120 				vm_object_pmap_protect(object,
17121 				    offset,
17122 				    entry_size,
17123 				    ((src_entry->is_shared
17124 				    || map->mapped_in_other_pmaps) ?
17125 				    PMAP_NULL : map->pmap),
17126 				    VM_MAP_PAGE_SIZE(map),
17127 				    src_entry->vme_start,
17128 				    prot);
17129 
17130 				assert(src_entry->wired_count == 0);
17131 				src_entry->needs_copy = TRUE;
17132 			}
17133 			/*
17134 			 * Throw away the old object reference of the new entry.
17135 			 */
17136 			vm_object_deallocate(object);
17137 		} else {
17138 			new_entry->is_shared = FALSE;
17139 			assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
17140 
17141 			src_entry_was_wired = (src_entry->wired_count > 0);
17142 			saved_src_entry = src_entry;
17143 			src_entry = VM_MAP_ENTRY_NULL;
17144 
17145 			/*
17146 			 * The map can be safely unlocked since we
17147 			 * already hold a reference on the object.
17148 			 *
17149 			 * Record the timestamp of the map for later
17150 			 * verification, and unlock the map.
17151 			 */
17152 			version.main_timestamp = map->timestamp;
17153 			vm_map_unlock(map);     /* Increments timestamp once! */
17154 
17155 			/*
17156 			 * Perform the copy.
17157 			 */
17158 			if (src_entry_was_wired > 0 ||
17159 			    (debug4k_no_cow_copyin &&
17160 			    VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT)) {
17161 				vm_object_lock(object);
17162 				result = vm_object_copy_slowly(
17163 					object,
17164 					offset,
17165 					(new_entry->vme_end -
17166 					new_entry->vme_start),
17167 					THREAD_UNINT,
17168 					&new_copy_object);
17169 				/* VME_OBJECT_SET will reset used_for_jit, so preserve it. */
17170 				saved_used_for_jit = new_entry->used_for_jit;
17171 				VME_OBJECT_SET(new_entry, new_copy_object);
17172 				new_entry->used_for_jit = saved_used_for_jit;
17173 				VME_OFFSET_SET(new_entry, offset - vm_object_trunc_page(offset));
17174 				new_entry->needs_copy = FALSE;
17175 			} else {
17176 				vm_object_offset_t new_offset;
17177 
17178 				new_offset = VME_OFFSET(new_entry);
17179 				result = vm_object_copy_strategically(
17180 					object,
17181 					offset,
17182 					(new_entry->vme_end -
17183 					new_entry->vme_start),
17184 					&new_copy_object,
17185 					&new_offset,
17186 					&new_entry_needs_copy);
17187 				/* VME_OBJECT_SET will reset used_for_jit, so preserve it. */
17188 				saved_used_for_jit = new_entry->used_for_jit;
17189 				VME_OBJECT_SET(new_entry, new_copy_object);
17190 				new_entry->used_for_jit = saved_used_for_jit;
17191 				if (new_offset != VME_OFFSET(new_entry)) {
17192 					VME_OFFSET_SET(new_entry, new_offset);
17193 				}
17194 
17195 				new_entry->needs_copy = new_entry_needs_copy;
17196 			}
17197 
17198 			/*
17199 			 * Throw away the old object reference of the new entry.
17200 			 */
17201 			vm_object_deallocate(object);
17202 
17203 			if (result != KERN_SUCCESS &&
17204 			    result != KERN_MEMORY_RESTART_COPY) {
17205 				_vm_map_entry_dispose(map_header, new_entry);
17206 				vm_map_lock(map);
17207 				break;
17208 			}
17209 
17210 			/*
17211 			 * Verify that the map has not substantially
17212 			 * changed while the copy was being made.
17213 			 */
17214 
17215 			vm_map_lock(map);
17216 			if (version.main_timestamp + 1 != map->timestamp) {
17217 				/*
17218 				 * Simple version comparison failed.
17219 				 *
17220 				 * Retry the lookup and verify that the
17221 				 * same object/offset are still present.
17222 				 */
17223 				saved_src_entry = VM_MAP_ENTRY_NULL;
17224 				vm_object_deallocate(VME_OBJECT(new_entry));
17225 				_vm_map_entry_dispose(map_header, new_entry);
17226 				if (result == KERN_MEMORY_RESTART_COPY) {
17227 					result = KERN_SUCCESS;
17228 				}
17229 				continue;
17230 			}
17231 			/* map hasn't changed: src_entry is still valid */
17232 			src_entry = saved_src_entry;
17233 			saved_src_entry = VM_MAP_ENTRY_NULL;
17234 
17235 			if (result == KERN_MEMORY_RESTART_COPY) {
17236 				vm_object_reference(object);
17237 				goto RestartCopy;
17238 			}
17239 		}
17240 
17241 		_vm_map_store_entry_link(map_header,
17242 		    map_header->links.prev, new_entry);
17243 
17244 		/* protections for submap mapping are irrelevant here */
17245 		if (vm_remap_legacy && !src_entry->is_sub_map) {
17246 			*cur_protection &= src_entry->protection;
17247 			*max_protection &= src_entry->max_protection;
17248 		}
17249 
17250 		map_address += tmp_size;
17251 		mapped_size += tmp_size;
17252 		src_start += tmp_size;
17253 
17254 		if (vmk_flags.vmkf_copy_single_object) {
17255 			if (mapped_size != size) {
17256 				DEBUG4K_SHARE("map %p addr 0x%llx size 0x%llx clipped copy at mapped_size 0x%llx\n", map, (uint64_t)addr, (uint64_t)size, (uint64_t)mapped_size);
17257 				if (src_entry->vme_next != vm_map_to_entry(map) &&
17258 				    VME_OBJECT(src_entry->vme_next) == VME_OBJECT(src_entry)) {
17259 					/* XXX TODO4K */
17260 					DEBUG4K_ERROR("could have extended copy to next entry...\n");
17261 				}
17262 			}
17263 			break;
17264 		}
17265 	} /* end while */
17266 
17267 	vm_map_unlock(map);
17268 	if (result != KERN_SUCCESS) {
17269 		/*
17270 		 * Free all allocated elements.
17271 		 */
17272 		for (src_entry = map_header->links.next;
17273 		    src_entry != CAST_TO_VM_MAP_ENTRY(&map_header->links);
17274 		    src_entry = new_entry) {
17275 			new_entry = src_entry->vme_next;
17276 			_vm_map_store_entry_unlink(map_header, src_entry);
17277 			if (src_entry->is_sub_map) {
17278 				vm_map_deallocate(VME_SUBMAP(src_entry));
17279 			} else {
17280 				vm_object_deallocate(VME_OBJECT(src_entry));
17281 			}
17282 			_vm_map_entry_dispose(map_header, src_entry);
17283 		}
17284 	}
17285 	return result;
17286 }
17287 
17288 bool
vm_map_is_exotic(vm_map_t map)17289 vm_map_is_exotic(
17290 	vm_map_t map)
17291 {
17292 	return VM_MAP_IS_EXOTIC(map);
17293 }
17294 
17295 bool
vm_map_is_alien(vm_map_t map)17296 vm_map_is_alien(
17297 	vm_map_t map)
17298 {
17299 	return VM_MAP_IS_ALIEN(map);
17300 }
17301 
17302 #if XNU_TARGET_OS_OSX
17303 void
vm_map_mark_alien(vm_map_t map)17304 vm_map_mark_alien(
17305 	vm_map_t map)
17306 {
17307 	vm_map_lock(map);
17308 	map->is_alien = true;
17309 	vm_map_unlock(map);
17310 }
17311 
17312 void
vm_map_single_jit(vm_map_t map)17313 vm_map_single_jit(
17314 	vm_map_t map)
17315 {
17316 	vm_map_lock(map);
17317 	map->single_jit = true;
17318 	vm_map_unlock(map);
17319 }
17320 #endif /* XNU_TARGET_OS_OSX */
17321 
17322 static kern_return_t
vm_map_copy_to_physcopy(vm_map_copy_t copy_map,vm_map_t target_map)17323 vm_map_copy_to_physcopy(
17324 	vm_map_copy_t   copy_map,
17325 	vm_map_t        target_map)
17326 {
17327 	vm_map_size_t           size;
17328 	vm_map_entry_t          entry;
17329 	vm_map_entry_t          new_entry;
17330 	vm_object_t             new_object;
17331 	unsigned int            pmap_flags;
17332 	pmap_t                  new_pmap;
17333 	vm_map_t                new_map;
17334 	vm_map_address_t        src_start, src_end, src_cur;
17335 	vm_map_address_t        dst_start, dst_end, dst_cur;
17336 	kern_return_t           kr;
17337 	void                    *kbuf;
17338 
17339 	/*
17340 	 * Perform the equivalent of vm_allocate() and memcpy().
17341 	 * Replace the mappings in "copy_map" with the newly allocated mapping.
17342 	 */
17343 	DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) BEFORE\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
17344 
17345 	assert(copy_map->cpy_hdr.page_shift != VM_MAP_PAGE_MASK(target_map));
17346 
17347 	/* create a new pmap to map "copy_map" */
17348 	pmap_flags = 0;
17349 	assert(copy_map->cpy_hdr.page_shift == FOURK_PAGE_SHIFT);
17350 #if PMAP_CREATE_FORCE_4K_PAGES
17351 	pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
17352 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
17353 	pmap_flags |= PMAP_CREATE_64BIT;
17354 	new_pmap = pmap_create_options(NULL, (vm_map_size_t)0, pmap_flags);
17355 	if (new_pmap == NULL) {
17356 		return KERN_RESOURCE_SHORTAGE;
17357 	}
17358 
17359 	/* allocate new VM object */
17360 	size = VM_MAP_ROUND_PAGE(copy_map->size, PAGE_MASK);
17361 	new_object = vm_object_allocate(size);
17362 	assert(new_object);
17363 
17364 	/* allocate new VM map entry */
17365 	new_entry = vm_map_copy_entry_create(copy_map, FALSE);
17366 	assert(new_entry);
17367 
17368 	/* finish initializing new VM map entry */
17369 	new_entry->protection = VM_PROT_DEFAULT;
17370 	new_entry->max_protection = VM_PROT_DEFAULT;
17371 	new_entry->use_pmap = TRUE;
17372 
17373 	/* make new VM map entry point to new VM object */
17374 	new_entry->vme_start = 0;
17375 	new_entry->vme_end = size;
17376 	VME_OBJECT_SET(new_entry, new_object);
17377 	VME_OFFSET_SET(new_entry, 0);
17378 
17379 	/* create a new pageable VM map to map "copy_map" */
17380 	new_map = vm_map_create_options(new_pmap, 0, MACH_VM_MAX_ADDRESS,
17381 	    VM_MAP_CREATE_PAGEABLE);
17382 	assert(new_map);
17383 	vm_map_set_page_shift(new_map, copy_map->cpy_hdr.page_shift);
17384 
17385 	/* map "copy_map" in the new VM map */
17386 	src_start = 0;
17387 	kr = vm_map_copyout_internal(
17388 		new_map,
17389 		&src_start,
17390 		copy_map,
17391 		copy_map->size,
17392 		FALSE, /* consume_on_success */
17393 		VM_PROT_DEFAULT,
17394 		VM_PROT_DEFAULT,
17395 		VM_INHERIT_DEFAULT);
17396 	assert(kr == KERN_SUCCESS);
17397 	src_end = src_start + copy_map->size;
17398 
17399 	/* map "new_object" in the new VM map */
17400 	vm_object_reference(new_object);
17401 	dst_start = 0;
17402 	kr = vm_map_enter(new_map,
17403 	    &dst_start,
17404 	    size,
17405 	    0,               /* mask */
17406 	    VM_FLAGS_ANYWHERE,
17407 	    VM_MAP_KERNEL_FLAGS_NONE,
17408 	    VM_KERN_MEMORY_OSFMK,
17409 	    new_object,
17410 	    0,               /* offset */
17411 	    FALSE,               /* needs copy */
17412 	    VM_PROT_DEFAULT,
17413 	    VM_PROT_DEFAULT,
17414 	    VM_INHERIT_DEFAULT);
17415 	assert(kr == KERN_SUCCESS);
17416 	dst_end = dst_start + size;
17417 
17418 	/* get a kernel buffer */
17419 	kbuf = kalloc_data(PAGE_SIZE, Z_WAITOK | Z_NOFAIL);
17420 
17421 	/* physically copy "copy_map" mappings to new VM object */
17422 	for (src_cur = src_start, dst_cur = dst_start;
17423 	    src_cur < src_end;
17424 	    src_cur += PAGE_SIZE, dst_cur += PAGE_SIZE) {
17425 		vm_size_t bytes;
17426 
17427 		bytes = PAGE_SIZE;
17428 		if (src_cur + PAGE_SIZE > src_end) {
17429 			/* partial copy for last page */
17430 			bytes = src_end - src_cur;
17431 			assert(bytes > 0 && bytes < PAGE_SIZE);
17432 			/* rest of dst page should be zero-filled */
17433 		}
17434 		/* get bytes from src mapping */
17435 		kr = copyinmap(new_map, src_cur, kbuf, bytes);
17436 		if (kr != KERN_SUCCESS) {
17437 			DEBUG4K_COPY("copyinmap(%p, 0x%llx, %p, 0x%llx) kr 0x%x\n", new_map, (uint64_t)src_cur, kbuf, (uint64_t)bytes, kr);
17438 		}
17439 		/* put bytes in dst mapping */
17440 		assert(dst_cur < dst_end);
17441 		assert(dst_cur + bytes <= dst_end);
17442 		kr = copyoutmap(new_map, kbuf, dst_cur, bytes);
17443 		if (kr != KERN_SUCCESS) {
17444 			DEBUG4K_COPY("copyoutmap(%p, %p, 0x%llx, 0x%llx) kr 0x%x\n", new_map, kbuf, (uint64_t)dst_cur, (uint64_t)bytes, kr);
17445 		}
17446 	}
17447 
17448 	/* free kernel buffer */
17449 	kfree_data(kbuf, PAGE_SIZE);
17450 
17451 	/* destroy new map */
17452 	vm_map_destroy(new_map, VM_MAP_REMOVE_NO_FLAGS);
17453 	new_map = VM_MAP_NULL;
17454 
17455 	/* dispose of the old map entries in "copy_map" */
17456 	while (vm_map_copy_first_entry(copy_map) !=
17457 	    vm_map_copy_to_entry(copy_map)) {
17458 		entry = vm_map_copy_first_entry(copy_map);
17459 		vm_map_copy_entry_unlink(copy_map, entry);
17460 		if (entry->is_sub_map) {
17461 			vm_map_deallocate(VME_SUBMAP(entry));
17462 		} else {
17463 			vm_object_deallocate(VME_OBJECT(entry));
17464 		}
17465 		vm_map_copy_entry_dispose(copy_map, entry);
17466 	}
17467 
17468 	/* change "copy_map"'s page_size to match "target_map" */
17469 	copy_map->cpy_hdr.page_shift = (uint16_t)VM_MAP_PAGE_SHIFT(target_map);
17470 	copy_map->offset = 0;
17471 	copy_map->size = size;
17472 
17473 	/* insert new map entry in "copy_map" */
17474 	assert(vm_map_copy_last_entry(copy_map) == vm_map_copy_to_entry(copy_map));
17475 	vm_map_copy_entry_link(copy_map, vm_map_copy_last_entry(copy_map), new_entry);
17476 
17477 	DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) AFTER\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
17478 	return KERN_SUCCESS;
17479 }
17480 
17481 void
17482 vm_map_copy_adjust_get_target_copy_map(
17483 	vm_map_copy_t   copy_map,
17484 	vm_map_copy_t   *target_copy_map_p);
17485 void
vm_map_copy_adjust_get_target_copy_map(vm_map_copy_t copy_map,vm_map_copy_t * target_copy_map_p)17486 vm_map_copy_adjust_get_target_copy_map(
17487 	vm_map_copy_t   copy_map,
17488 	vm_map_copy_t   *target_copy_map_p)
17489 {
17490 	vm_map_copy_t   target_copy_map;
17491 	vm_map_entry_t  entry, target_entry;
17492 
17493 	if (*target_copy_map_p != VM_MAP_COPY_NULL) {
17494 		/* the caller already has a "target_copy_map": use it */
17495 		return;
17496 	}
17497 
17498 	/* the caller wants us to create a new copy of "copy_map" */
17499 	target_copy_map = vm_map_copy_allocate();
17500 	target_copy_map->type = copy_map->type;
17501 	assert(target_copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17502 	target_copy_map->offset = copy_map->offset;
17503 	target_copy_map->size = copy_map->size;
17504 	target_copy_map->cpy_hdr.page_shift = copy_map->cpy_hdr.page_shift;
17505 	vm_map_store_init(&target_copy_map->cpy_hdr);
17506 	for (entry = vm_map_copy_first_entry(copy_map);
17507 	    entry != vm_map_copy_to_entry(copy_map);
17508 	    entry = entry->vme_next) {
17509 		target_entry = vm_map_copy_entry_create(target_copy_map, FALSE);
17510 		vm_map_entry_copy_full(target_entry, entry);
17511 		if (target_entry->is_sub_map) {
17512 			vm_map_reference(VME_SUBMAP(target_entry));
17513 		} else {
17514 			vm_object_reference(VME_OBJECT(target_entry));
17515 		}
17516 		vm_map_copy_entry_link(
17517 			target_copy_map,
17518 			vm_map_copy_last_entry(target_copy_map),
17519 			target_entry);
17520 	}
17521 	entry = VM_MAP_ENTRY_NULL;
17522 	*target_copy_map_p = target_copy_map;
17523 }
17524 
17525 static void
vm_map_copy_trim(vm_map_copy_t copy_map,uint16_t new_page_shift,vm_map_offset_t trim_start,vm_map_offset_t trim_end)17526 vm_map_copy_trim(
17527 	vm_map_copy_t   copy_map,
17528 	uint16_t        new_page_shift,
17529 	vm_map_offset_t trim_start,
17530 	vm_map_offset_t trim_end)
17531 {
17532 	uint16_t        copy_page_shift;
17533 	vm_map_entry_t  entry, next_entry;
17534 
17535 	assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17536 	assert(copy_map->cpy_hdr.nentries > 0);
17537 
17538 	trim_start += vm_map_copy_first_entry(copy_map)->vme_start;
17539 	trim_end += vm_map_copy_first_entry(copy_map)->vme_start;
17540 
17541 	/* use the new page_shift to do the clipping */
17542 	copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
17543 	copy_map->cpy_hdr.page_shift = new_page_shift;
17544 
17545 	for (entry = vm_map_copy_first_entry(copy_map);
17546 	    entry != vm_map_copy_to_entry(copy_map);
17547 	    entry = next_entry) {
17548 		next_entry = entry->vme_next;
17549 		if (entry->vme_end <= trim_start) {
17550 			/* entry fully before trim range: skip */
17551 			continue;
17552 		}
17553 		if (entry->vme_start >= trim_end) {
17554 			/* entry fully after trim range: done */
17555 			break;
17556 		}
17557 		/* clip entry if needed */
17558 		vm_map_copy_clip_start(copy_map, entry, trim_start);
17559 		vm_map_copy_clip_end(copy_map, entry, trim_end);
17560 		/* dispose of entry */
17561 		copy_map->size -= entry->vme_end - entry->vme_start;
17562 		vm_map_copy_entry_unlink(copy_map, entry);
17563 		if (entry->is_sub_map) {
17564 			vm_map_deallocate(VME_SUBMAP(entry));
17565 		} else {
17566 			vm_object_deallocate(VME_OBJECT(entry));
17567 		}
17568 		vm_map_copy_entry_dispose(copy_map, entry);
17569 		entry = VM_MAP_ENTRY_NULL;
17570 	}
17571 
17572 	/* restore copy_map's original page_shift */
17573 	copy_map->cpy_hdr.page_shift = copy_page_shift;
17574 }
17575 
17576 /*
17577  * Make any necessary adjustments to "copy_map" to allow it to be
17578  * mapped into "target_map".
17579  * If no changes were necessary, "target_copy_map" points to the
17580  * untouched "copy_map".
17581  * If changes are necessary, changes will be made to "target_copy_map".
17582  * If "target_copy_map" was NULL, we create a new "vm_map_copy_t" and
17583  * copy the original "copy_map" to it before applying the changes.
17584  * The caller should discard "target_copy_map" if it's not the same as
17585  * the original "copy_map".
17586  */
17587 /* TODO4K: also adjust to sub-range in the copy_map -> add start&end? */
17588 kern_return_t
vm_map_copy_adjust_to_target(vm_map_copy_t src_copy_map,vm_map_offset_t offset,vm_map_size_t size,vm_map_t target_map,boolean_t copy,vm_map_copy_t * target_copy_map_p,vm_map_offset_t * overmap_start_p,vm_map_offset_t * overmap_end_p,vm_map_offset_t * trimmed_start_p)17589 vm_map_copy_adjust_to_target(
17590 	vm_map_copy_t           src_copy_map,
17591 	vm_map_offset_t         offset,
17592 	vm_map_size_t           size,
17593 	vm_map_t                target_map,
17594 	boolean_t               copy,
17595 	vm_map_copy_t           *target_copy_map_p,
17596 	vm_map_offset_t         *overmap_start_p,
17597 	vm_map_offset_t         *overmap_end_p,
17598 	vm_map_offset_t         *trimmed_start_p)
17599 {
17600 	vm_map_copy_t           copy_map, target_copy_map;
17601 	vm_map_size_t           target_size;
17602 	vm_map_size_t           src_copy_map_size;
17603 	vm_map_size_t           overmap_start, overmap_end;
17604 	int                     misalignments;
17605 	vm_map_entry_t          entry, target_entry;
17606 	vm_map_offset_t         addr_adjustment;
17607 	vm_map_offset_t         new_start, new_end;
17608 	int                     copy_page_mask, target_page_mask;
17609 	uint16_t                copy_page_shift, target_page_shift;
17610 	vm_map_offset_t         trimmed_end;
17611 
17612 	/*
17613 	 * Assert that the vm_map_copy is coming from the right
17614 	 * zone and hasn't been forged
17615 	 */
17616 	vm_map_copy_require(src_copy_map);
17617 	assert(src_copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17618 
17619 	/*
17620 	 * Start working with "src_copy_map" but we'll switch
17621 	 * to "target_copy_map" as soon as we start making adjustments.
17622 	 */
17623 	copy_map = src_copy_map;
17624 	src_copy_map_size = src_copy_map->size;
17625 
17626 	copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
17627 	copy_page_mask = VM_MAP_COPY_PAGE_MASK(copy_map);
17628 	target_page_shift = (uint16_t)VM_MAP_PAGE_SHIFT(target_map);
17629 	target_page_mask = VM_MAP_PAGE_MASK(target_map);
17630 
17631 	DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p...\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, *target_copy_map_p);
17632 
17633 	target_copy_map = *target_copy_map_p;
17634 	if (target_copy_map != VM_MAP_COPY_NULL) {
17635 		vm_map_copy_require(target_copy_map);
17636 	}
17637 
17638 	if (offset + size > copy_map->size) {
17639 		DEBUG4K_ERROR("copy_map %p (%d->%d) copy_map->size 0x%llx offset 0x%llx size 0x%llx KERN_INVALID_ARGUMENT\n", copy_map, copy_page_shift, target_page_shift, (uint64_t)copy_map->size, (uint64_t)offset, (uint64_t)size);
17640 		return KERN_INVALID_ARGUMENT;
17641 	}
17642 
17643 	/* trim the end */
17644 	trimmed_end = 0;
17645 	new_end = VM_MAP_ROUND_PAGE(offset + size, target_page_mask);
17646 	if (new_end < copy_map->size) {
17647 		trimmed_end = src_copy_map_size - new_end;
17648 		DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim end from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)new_end, (uint64_t)copy_map->size);
17649 		/* get "target_copy_map" if needed and adjust it */
17650 		vm_map_copy_adjust_get_target_copy_map(copy_map,
17651 		    &target_copy_map);
17652 		copy_map = target_copy_map;
17653 		vm_map_copy_trim(target_copy_map, target_page_shift,
17654 		    new_end, copy_map->size);
17655 	}
17656 
17657 	/* trim the start */
17658 	new_start = VM_MAP_TRUNC_PAGE(offset, target_page_mask);
17659 	if (new_start != 0) {
17660 		DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim start from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)0, (uint64_t)new_start);
17661 		/* get "target_copy_map" if needed and adjust it */
17662 		vm_map_copy_adjust_get_target_copy_map(copy_map,
17663 		    &target_copy_map);
17664 		copy_map = target_copy_map;
17665 		vm_map_copy_trim(target_copy_map, target_page_shift,
17666 		    0, new_start);
17667 	}
17668 	*trimmed_start_p = new_start;
17669 
17670 	/* target_size starts with what's left after trimming */
17671 	target_size = copy_map->size;
17672 	assertf(target_size == src_copy_map_size - *trimmed_start_p - trimmed_end,
17673 	    "target_size 0x%llx src_copy_map_size 0x%llx trimmed_start 0x%llx trimmed_end 0x%llx\n",
17674 	    (uint64_t)target_size, (uint64_t)src_copy_map_size,
17675 	    (uint64_t)*trimmed_start_p, (uint64_t)trimmed_end);
17676 
17677 	/* check for misalignments but don't adjust yet */
17678 	misalignments = 0;
17679 	overmap_start = 0;
17680 	overmap_end = 0;
17681 	if (copy_page_shift < target_page_shift) {
17682 		/*
17683 		 * Remapping from 4K to 16K: check the VM object alignments
17684 		 * throughout the range.
17685 		 * If the start and end of the range are mis-aligned, we can
17686 		 * over-map to re-align, and adjust the "overmap" start/end
17687 		 * and "target_size" of the range accordingly.
17688 		 * If there is any mis-alignment within the range:
17689 		 *     if "copy":
17690 		 *         we can do immediate-copy instead of copy-on-write,
17691 		 *     else:
17692 		 *         no way to remap and share; fail.
17693 		 */
17694 		for (entry = vm_map_copy_first_entry(copy_map);
17695 		    entry != vm_map_copy_to_entry(copy_map);
17696 		    entry = entry->vme_next) {
17697 			vm_object_offset_t object_offset_start, object_offset_end;
17698 
17699 			object_offset_start = VME_OFFSET(entry);
17700 			object_offset_end = object_offset_start;
17701 			object_offset_end += entry->vme_end - entry->vme_start;
17702 			if (object_offset_start & target_page_mask) {
17703 				if (entry == vm_map_copy_first_entry(copy_map) && !copy) {
17704 					overmap_start++;
17705 				} else {
17706 					misalignments++;
17707 				}
17708 			}
17709 			if (object_offset_end & target_page_mask) {
17710 				if (entry->vme_next == vm_map_copy_to_entry(copy_map) && !copy) {
17711 					overmap_end++;
17712 				} else {
17713 					misalignments++;
17714 				}
17715 			}
17716 		}
17717 	}
17718 	entry = VM_MAP_ENTRY_NULL;
17719 
17720 	/* decide how to deal with misalignments */
17721 	assert(overmap_start <= 1);
17722 	assert(overmap_end <= 1);
17723 	if (!overmap_start && !overmap_end && !misalignments) {
17724 		/* copy_map is properly aligned for target_map ... */
17725 		if (*trimmed_start_p) {
17726 			/* ... but we trimmed it, so still need to adjust */
17727 		} else {
17728 			/* ... and we didn't trim anything: we're done */
17729 			if (target_copy_map == VM_MAP_COPY_NULL) {
17730 				target_copy_map = copy_map;
17731 			}
17732 			*target_copy_map_p = target_copy_map;
17733 			*overmap_start_p = 0;
17734 			*overmap_end_p = 0;
17735 			DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17736 			return KERN_SUCCESS;
17737 		}
17738 	} else if (misalignments && !copy) {
17739 		/* can't "share" if misaligned */
17740 		DEBUG4K_ADJUST("unsupported sharing\n");
17741 #if MACH_ASSERT
17742 		if (debug4k_panic_on_misaligned_sharing) {
17743 			panic("DEBUG4k %s:%d unsupported sharing", __FUNCTION__, __LINE__);
17744 		}
17745 #endif /* MACH_ASSERT */
17746 		DEBUG4K_ADJUST("copy_map %p (%d) target_map %p (%d) copy %d target_copy_map %p -> KERN_NOT_SUPPORTED\n", copy_map, copy_page_shift, target_map, target_page_shift, copy, *target_copy_map_p);
17747 		return KERN_NOT_SUPPORTED;
17748 	} else {
17749 		/* can't virtual-copy if misaligned (but can physical-copy) */
17750 		DEBUG4K_ADJUST("mis-aligned copying\n");
17751 	}
17752 
17753 	/* get a "target_copy_map" if needed and switch to it */
17754 	vm_map_copy_adjust_get_target_copy_map(copy_map, &target_copy_map);
17755 	copy_map = target_copy_map;
17756 
17757 	if (misalignments && copy) {
17758 		vm_map_size_t target_copy_map_size;
17759 
17760 		/*
17761 		 * Can't do copy-on-write with misaligned mappings.
17762 		 * Replace the mappings with a physical copy of the original
17763 		 * mappings' contents.
17764 		 */
17765 		target_copy_map_size = target_copy_map->size;
17766 		kern_return_t kr = vm_map_copy_to_physcopy(target_copy_map, target_map);
17767 		if (kr != KERN_SUCCESS) {
17768 			return kr;
17769 		}
17770 		*target_copy_map_p = target_copy_map;
17771 		*overmap_start_p = 0;
17772 		*overmap_end_p = target_copy_map->size - target_copy_map_size;
17773 		DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx)-> trimmed 0x%llx overmap start 0x%llx end 0x%llx PHYSCOPY\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17774 		return KERN_SUCCESS;
17775 	}
17776 
17777 	/* apply the adjustments */
17778 	misalignments = 0;
17779 	overmap_start = 0;
17780 	overmap_end = 0;
17781 	/* remove copy_map->offset, so that everything starts at offset 0 */
17782 	addr_adjustment = copy_map->offset;
17783 	/* also remove whatever we trimmed from the start */
17784 	addr_adjustment += *trimmed_start_p;
17785 	for (target_entry = vm_map_copy_first_entry(target_copy_map);
17786 	    target_entry != vm_map_copy_to_entry(target_copy_map);
17787 	    target_entry = target_entry->vme_next) {
17788 		vm_object_offset_t object_offset_start, object_offset_end;
17789 
17790 		DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx BEFORE\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17791 		object_offset_start = VME_OFFSET(target_entry);
17792 		if (object_offset_start & target_page_mask) {
17793 			DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at start\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17794 			if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
17795 				/*
17796 				 * start of 1st entry is mis-aligned:
17797 				 * re-adjust by over-mapping.
17798 				 */
17799 				overmap_start = object_offset_start - trunc_page_mask_64(object_offset_start, target_page_mask);
17800 				DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_start 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_start);
17801 				VME_OFFSET_SET(target_entry, VME_OFFSET(target_entry) - overmap_start);
17802 			} else {
17803 				misalignments++;
17804 				DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
17805 				assert(copy);
17806 			}
17807 		}
17808 
17809 		if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
17810 			target_size += overmap_start;
17811 		} else {
17812 			target_entry->vme_start += overmap_start;
17813 		}
17814 		target_entry->vme_end += overmap_start;
17815 
17816 		object_offset_end = VME_OFFSET(target_entry) + target_entry->vme_end - target_entry->vme_start;
17817 		if (object_offset_end & target_page_mask) {
17818 			DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at end\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17819 			if (target_entry->vme_next == vm_map_copy_to_entry(target_copy_map)) {
17820 				/*
17821 				 * end of last entry is mis-aligned: re-adjust by over-mapping.
17822 				 */
17823 				overmap_end = round_page_mask_64(object_offset_end, target_page_mask) - object_offset_end;
17824 				DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_end 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_end);
17825 				target_entry->vme_end += overmap_end;
17826 				target_size += overmap_end;
17827 			} else {
17828 				misalignments++;
17829 				DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
17830 				assert(copy);
17831 			}
17832 		}
17833 		target_entry->vme_start -= addr_adjustment;
17834 		target_entry->vme_end -= addr_adjustment;
17835 		DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx AFTER\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17836 	}
17837 
17838 	target_copy_map->size = target_size;
17839 	target_copy_map->offset += overmap_start;
17840 	target_copy_map->offset -= addr_adjustment;
17841 	target_copy_map->cpy_hdr.page_shift = target_page_shift;
17842 
17843 //	assert(VM_MAP_PAGE_ALIGNED(target_copy_map->size, target_page_mask));
17844 //	assert(VM_MAP_PAGE_ALIGNED(target_copy_map->offset, FOURK_PAGE_MASK));
17845 	assert(overmap_start < VM_MAP_PAGE_SIZE(target_map));
17846 	assert(overmap_end < VM_MAP_PAGE_SIZE(target_map));
17847 
17848 	*target_copy_map_p = target_copy_map;
17849 	*overmap_start_p = overmap_start;
17850 	*overmap_end_p = overmap_end;
17851 
17852 	DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17853 	return KERN_SUCCESS;
17854 }
17855 
17856 kern_return_t
vm_map_range_physical_size(vm_map_t map,vm_map_address_t start,mach_vm_size_t size,mach_vm_size_t * phys_size)17857 vm_map_range_physical_size(
17858 	vm_map_t         map,
17859 	vm_map_address_t start,
17860 	mach_vm_size_t   size,
17861 	mach_vm_size_t * phys_size)
17862 {
17863 	kern_return_t   kr;
17864 	vm_map_copy_t   copy_map, target_copy_map;
17865 	vm_map_offset_t adjusted_start, adjusted_end;
17866 	vm_map_size_t   adjusted_size;
17867 	vm_prot_t       cur_prot, max_prot;
17868 	vm_map_offset_t overmap_start, overmap_end, trimmed_start;
17869 	vm_map_kernel_flags_t vmk_flags;
17870 
17871 	adjusted_start = vm_map_trunc_page(start, VM_MAP_PAGE_MASK(map));
17872 	adjusted_end = vm_map_round_page(start + size, VM_MAP_PAGE_MASK(map));
17873 	adjusted_size = adjusted_end - adjusted_start;
17874 	*phys_size = adjusted_size;
17875 	if (VM_MAP_PAGE_SIZE(map) == PAGE_SIZE) {
17876 		return KERN_SUCCESS;
17877 	}
17878 	if (start == 0) {
17879 		adjusted_start = vm_map_trunc_page(start, PAGE_MASK);
17880 		adjusted_end = vm_map_round_page(start + size, PAGE_MASK);
17881 		adjusted_size = adjusted_end - adjusted_start;
17882 		*phys_size = adjusted_size;
17883 		return KERN_SUCCESS;
17884 	}
17885 	if (adjusted_size == 0) {
17886 		DEBUG4K_SHARE("map %p start 0x%llx size 0x%llx adjusted 0x%llx -> phys_size 0!\n", map, (uint64_t)start, (uint64_t)size, (uint64_t)adjusted_size);
17887 		*phys_size = 0;
17888 		return KERN_SUCCESS;
17889 	}
17890 
17891 	vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
17892 	vmk_flags.vmkf_copy_pageable = TRUE;
17893 	vmk_flags.vmkf_copy_same_map = TRUE;
17894 	assert(adjusted_size != 0);
17895 	cur_prot = VM_PROT_NONE; /* legacy mode */
17896 	max_prot = VM_PROT_NONE; /* legacy mode */
17897 	kr = vm_map_copy_extract(map, adjusted_start, adjusted_size,
17898 	    FALSE /* copy */,
17899 	    &copy_map,
17900 	    &cur_prot, &max_prot, VM_INHERIT_DEFAULT,
17901 	    vmk_flags);
17902 	if (kr != KERN_SUCCESS) {
17903 		DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
17904 		//assert(0);
17905 		*phys_size = 0;
17906 		return kr;
17907 	}
17908 	assert(copy_map != VM_MAP_COPY_NULL);
17909 	target_copy_map = copy_map;
17910 	DEBUG4K_ADJUST("adjusting...\n");
17911 	kr = vm_map_copy_adjust_to_target(
17912 		copy_map,
17913 		start - adjusted_start, /* offset */
17914 		size, /* size */
17915 		kernel_map,
17916 		FALSE,                          /* copy */
17917 		&target_copy_map,
17918 		&overmap_start,
17919 		&overmap_end,
17920 		&trimmed_start);
17921 	if (kr == KERN_SUCCESS) {
17922 		if (target_copy_map->size != *phys_size) {
17923 			DEBUG4K_ADJUST("map %p (%d) start 0x%llx size 0x%llx adjusted_start 0x%llx adjusted_end 0x%llx overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx phys_size 0x%llx -> 0x%llx\n", map, VM_MAP_PAGE_SHIFT(map), (uint64_t)start, (uint64_t)size, (uint64_t)adjusted_start, (uint64_t)adjusted_end, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start, (uint64_t)*phys_size, (uint64_t)target_copy_map->size);
17924 		}
17925 		*phys_size = target_copy_map->size;
17926 	} else {
17927 		DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
17928 		//assert(0);
17929 		*phys_size = 0;
17930 	}
17931 	vm_map_copy_discard(copy_map);
17932 	copy_map = VM_MAP_COPY_NULL;
17933 
17934 	return kr;
17935 }
17936 
17937 
17938 kern_return_t
memory_entry_check_for_adjustment(vm_map_t src_map,ipc_port_t port,vm_map_offset_t * overmap_start,vm_map_offset_t * overmap_end)17939 memory_entry_check_for_adjustment(
17940 	vm_map_t                        src_map,
17941 	ipc_port_t                      port,
17942 	vm_map_offset_t         *overmap_start,
17943 	vm_map_offset_t         *overmap_end)
17944 {
17945 	kern_return_t kr = KERN_SUCCESS;
17946 	vm_map_copy_t copy_map = VM_MAP_COPY_NULL, target_copy_map = VM_MAP_COPY_NULL;
17947 
17948 	assert(port);
17949 	assertf(ip_kotype(port) == IKOT_NAMED_ENTRY, "Port Type expected: %d...received:%d\n", IKOT_NAMED_ENTRY, ip_kotype(port));
17950 
17951 	vm_named_entry_t        named_entry;
17952 
17953 	named_entry = mach_memory_entry_from_port(port);
17954 	copy_map = named_entry->backing.copy;
17955 	target_copy_map = copy_map;
17956 
17957 	if (src_map && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT) {
17958 		vm_map_offset_t trimmed_start;
17959 
17960 		trimmed_start = 0;
17961 		DEBUG4K_ADJUST("adjusting...\n");
17962 		kr = vm_map_copy_adjust_to_target(
17963 			copy_map,
17964 			0, /* offset */
17965 			copy_map->size, /* size */
17966 			src_map,
17967 			FALSE, /* copy */
17968 			&target_copy_map,
17969 			overmap_start,
17970 			overmap_end,
17971 			&trimmed_start);
17972 		assert(trimmed_start == 0);
17973 	}
17974 
17975 	return kr;
17976 }
17977 
17978 
17979 /*
17980  *	Routine:	vm_remap
17981  *
17982  *			Map portion of a task's address space.
17983  *			Mapped region must not overlap more than
17984  *			one vm memory object. Protections and
17985  *			inheritance attributes remain the same
17986  *			as in the original task and are	out parameters.
17987  *			Source and Target task can be identical
17988  *			Other attributes are identical as for vm_map()
17989  */
17990 kern_return_t
vm_map_remap(vm_map_t target_map,vm_map_address_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,vm_map_t src_map,vm_map_offset_t memory_address,boolean_t copy,vm_prot_t * cur_protection,vm_prot_t * max_protection,vm_inherit_t inheritance)17991 vm_map_remap(
17992 	vm_map_t                target_map,
17993 	vm_map_address_t        *address,
17994 	vm_map_size_t           size,
17995 	vm_map_offset_t         mask,
17996 	int                     flags,
17997 	vm_map_kernel_flags_t   vmk_flags,
17998 	vm_tag_t                tag,
17999 	vm_map_t                src_map,
18000 	vm_map_offset_t         memory_address,
18001 	boolean_t               copy,
18002 	vm_prot_t               *cur_protection, /* IN/OUT */
18003 	vm_prot_t               *max_protection, /* IN/OUT */
18004 	vm_inherit_t            inheritance)
18005 {
18006 	kern_return_t           result;
18007 	vm_map_entry_t          entry;
18008 	vm_map_entry_t          insp_entry = VM_MAP_ENTRY_NULL;
18009 	vm_map_entry_t          new_entry;
18010 	vm_map_copy_t           copy_map;
18011 	vm_map_offset_t         offset_in_mapping;
18012 	vm_map_size_t           target_size = 0;
18013 	vm_map_size_t           src_page_mask, target_page_mask;
18014 	vm_map_offset_t         overmap_start, overmap_end, trimmed_start;
18015 	vm_map_offset_t         initial_memory_address;
18016 	vm_map_size_t           initial_size;
18017 
18018 	if (target_map == VM_MAP_NULL) {
18019 		return KERN_INVALID_ARGUMENT;
18020 	}
18021 
18022 	initial_memory_address = memory_address;
18023 	initial_size = size;
18024 	src_page_mask = VM_MAP_PAGE_MASK(src_map);
18025 	target_page_mask = VM_MAP_PAGE_MASK(target_map);
18026 
18027 	switch (inheritance) {
18028 	case VM_INHERIT_NONE:
18029 	case VM_INHERIT_COPY:
18030 	case VM_INHERIT_SHARE:
18031 		if (size != 0 && src_map != VM_MAP_NULL) {
18032 			break;
18033 		}
18034 		OS_FALLTHROUGH;
18035 	default:
18036 		return KERN_INVALID_ARGUMENT;
18037 	}
18038 
18039 	if (src_page_mask != target_page_mask) {
18040 		if (copy) {
18041 			DEBUG4K_COPY("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map, VM_MAP_PAGE_SIZE(src_map), (uint64_t)memory_address, (uint64_t)size, copy, target_map, VM_MAP_PAGE_SIZE(target_map));
18042 		} else {
18043 			DEBUG4K_SHARE("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map, VM_MAP_PAGE_SIZE(src_map), (uint64_t)memory_address, (uint64_t)size, copy, target_map, VM_MAP_PAGE_SIZE(target_map));
18044 		}
18045 	}
18046 
18047 	/*
18048 	 * If the user is requesting that we return the address of the
18049 	 * first byte of the data (rather than the base of the page),
18050 	 * then we use different rounding semantics: specifically,
18051 	 * we assume that (memory_address, size) describes a region
18052 	 * all of whose pages we must cover, rather than a base to be truncated
18053 	 * down and a size to be added to that base.  So we figure out
18054 	 * the highest page that the requested region includes and make
18055 	 * sure that the size will cover it.
18056 	 *
18057 	 * The key example we're worried about it is of the form:
18058 	 *
18059 	 *              memory_address = 0x1ff0, size = 0x20
18060 	 *
18061 	 * With the old semantics, we round down the memory_address to 0x1000
18062 	 * and round up the size to 0x1000, resulting in our covering *only*
18063 	 * page 0x1000.  With the new semantics, we'd realize that the region covers
18064 	 * 0x1ff0-0x2010, and compute a size of 0x2000.  Thus, we cover both page
18065 	 * 0x1000 and page 0x2000 in the region we remap.
18066 	 */
18067 	if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
18068 		vm_map_offset_t range_start, range_end;
18069 
18070 		range_start = vm_map_trunc_page(memory_address, src_page_mask);
18071 		range_end = vm_map_round_page(memory_address + size, src_page_mask);
18072 		memory_address = range_start;
18073 		size = range_end - range_start;
18074 		offset_in_mapping = initial_memory_address - memory_address;
18075 	} else {
18076 		/*
18077 		 * IMPORTANT:
18078 		 * This legacy code path is broken: for the range mentioned
18079 		 * above [ memory_address = 0x1ff0,size = 0x20 ], which spans
18080 		 * two 4k pages, it yields [ memory_address = 0x1000,
18081 		 * size = 0x1000 ], which covers only the first 4k page.
18082 		 * BUT some code unfortunately depends on this bug, so we
18083 		 * can't fix it without breaking something.
18084 		 * New code should get automatically opted in the new
18085 		 * behavior with the new VM_FLAGS_RETURN_DATA_ADDR flags.
18086 		 */
18087 		offset_in_mapping = 0;
18088 		memory_address = vm_map_trunc_page(memory_address, src_page_mask);
18089 		size = vm_map_round_page(size, src_page_mask);
18090 		initial_memory_address = memory_address;
18091 		initial_size = size;
18092 	}
18093 
18094 
18095 	if (size == 0) {
18096 		return KERN_INVALID_ARGUMENT;
18097 	}
18098 
18099 	if (flags & VM_FLAGS_RESILIENT_MEDIA) {
18100 		/* must be copy-on-write to be "media resilient" */
18101 		if (!copy) {
18102 			return KERN_INVALID_ARGUMENT;
18103 		}
18104 	}
18105 
18106 	vmk_flags.vmkf_copy_pageable = target_map->hdr.entries_pageable;
18107 	vmk_flags.vmkf_copy_same_map = (src_map == target_map);
18108 
18109 	assert(size != 0);
18110 	result = vm_map_copy_extract(src_map,
18111 	    memory_address,
18112 	    size,
18113 	    copy, &copy_map,
18114 	    cur_protection, /* IN/OUT */
18115 	    max_protection, /* IN/OUT */
18116 	    inheritance,
18117 	    vmk_flags);
18118 	if (result != KERN_SUCCESS) {
18119 		return result;
18120 	}
18121 	assert(copy_map != VM_MAP_COPY_NULL);
18122 
18123 	overmap_start = 0;
18124 	overmap_end = 0;
18125 	trimmed_start = 0;
18126 	target_size = size;
18127 	if (src_page_mask != target_page_mask) {
18128 		vm_map_copy_t target_copy_map;
18129 
18130 		target_copy_map = copy_map; /* can modify "copy_map" itself */
18131 		DEBUG4K_ADJUST("adjusting...\n");
18132 		result = vm_map_copy_adjust_to_target(
18133 			copy_map,
18134 			offset_in_mapping, /* offset */
18135 			initial_size,
18136 			target_map,
18137 			copy,
18138 			&target_copy_map,
18139 			&overmap_start,
18140 			&overmap_end,
18141 			&trimmed_start);
18142 		if (result != KERN_SUCCESS) {
18143 			DEBUG4K_COPY("failed to adjust 0x%x\n", result);
18144 			vm_map_copy_discard(copy_map);
18145 			return result;
18146 		}
18147 		if (trimmed_start == 0) {
18148 			/* nothing trimmed: no adjustment needed */
18149 		} else if (trimmed_start >= offset_in_mapping) {
18150 			/* trimmed more than offset_in_mapping: nothing left */
18151 			assert(overmap_start == 0);
18152 			assert(overmap_end == 0);
18153 			offset_in_mapping = 0;
18154 		} else {
18155 			/* trimmed some of offset_in_mapping: adjust */
18156 			assert(overmap_start == 0);
18157 			assert(overmap_end == 0);
18158 			offset_in_mapping -= trimmed_start;
18159 		}
18160 		offset_in_mapping += overmap_start;
18161 		target_size = target_copy_map->size;
18162 	}
18163 
18164 	/*
18165 	 * Allocate/check a range of free virtual address
18166 	 * space for the target
18167 	 */
18168 	*address = vm_map_trunc_page(*address, target_page_mask);
18169 	vm_map_lock(target_map);
18170 	target_size = vm_map_round_page(target_size, target_page_mask);
18171 	result = vm_map_remap_range_allocate(target_map, address,
18172 	    target_size,
18173 	    mask, flags, vmk_flags, tag,
18174 	    &insp_entry);
18175 
18176 	for (entry = vm_map_copy_first_entry(copy_map);
18177 	    entry != vm_map_copy_to_entry(copy_map);
18178 	    entry = new_entry) {
18179 		new_entry = entry->vme_next;
18180 		vm_map_copy_entry_unlink(copy_map, entry);
18181 		if (result == KERN_SUCCESS) {
18182 			if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
18183 				/* no codesigning -> read-only access */
18184 				entry->max_protection = VM_PROT_READ;
18185 				entry->protection = VM_PROT_READ;
18186 				entry->vme_resilient_codesign = TRUE;
18187 			}
18188 			entry->vme_start += *address;
18189 			entry->vme_end += *address;
18190 			assert(!entry->map_aligned);
18191 			if ((flags & VM_FLAGS_RESILIENT_MEDIA) &&
18192 			    !entry->is_sub_map &&
18193 			    (VME_OBJECT(entry) == VM_OBJECT_NULL ||
18194 			    VME_OBJECT(entry)->internal)) {
18195 				entry->vme_resilient_media = TRUE;
18196 			}
18197 			assert(VM_MAP_PAGE_ALIGNED(entry->vme_start, MIN(target_page_mask, PAGE_MASK)));
18198 			assert(VM_MAP_PAGE_ALIGNED(entry->vme_end, MIN(target_page_mask, PAGE_MASK)));
18199 			assert(VM_MAP_PAGE_ALIGNED(VME_OFFSET(entry), MIN(target_page_mask, PAGE_MASK)));
18200 			vm_map_store_entry_link(target_map, insp_entry, entry,
18201 			    vmk_flags);
18202 			insp_entry = entry;
18203 		} else {
18204 			if (!entry->is_sub_map) {
18205 				vm_object_deallocate(VME_OBJECT(entry));
18206 			} else {
18207 				vm_map_deallocate(VME_SUBMAP(entry));
18208 			}
18209 			vm_map_copy_entry_dispose(copy_map, entry);
18210 		}
18211 	}
18212 
18213 	if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
18214 		*cur_protection = VM_PROT_READ;
18215 		*max_protection = VM_PROT_READ;
18216 	}
18217 
18218 	if (result == KERN_SUCCESS) {
18219 		target_map->size += target_size;
18220 		SAVE_HINT_MAP_WRITE(target_map, insp_entry);
18221 
18222 	}
18223 	vm_map_unlock(target_map);
18224 
18225 	if (result == KERN_SUCCESS && target_map->wiring_required) {
18226 		result = vm_map_wire_kernel(target_map, *address,
18227 		    *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
18228 		    TRUE);
18229 	}
18230 
18231 	/*
18232 	 * If requested, return the address of the data pointed to by the
18233 	 * request, rather than the base of the resulting page.
18234 	 */
18235 	if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
18236 		*address += offset_in_mapping;
18237 	}
18238 
18239 	if (src_page_mask != target_page_mask) {
18240 		DEBUG4K_SHARE("vm_remap(%p 0x%llx 0x%llx copy=%d-> %p 0x%llx 0x%llx  result=0x%x\n", src_map, (uint64_t)memory_address, (uint64_t)size, copy, target_map, (uint64_t)*address, (uint64_t)offset_in_mapping, result);
18241 	}
18242 	vm_map_copy_discard(copy_map);
18243 	copy_map = VM_MAP_COPY_NULL;
18244 
18245 	return result;
18246 }
18247 
18248 /*
18249  *	Routine:	vm_map_remap_range_allocate
18250  *
18251  *	Description:
18252  *		Allocate a range in the specified virtual address map.
18253  *		returns the address and the map entry just before the allocated
18254  *		range
18255  *
18256  *	Map must be locked.
18257  */
18258 
18259 static kern_return_t
vm_map_remap_range_allocate(vm_map_t map,vm_map_address_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,__unused vm_tag_t tag,vm_map_entry_t * map_entry)18260 vm_map_remap_range_allocate(
18261 	vm_map_t                map,
18262 	vm_map_address_t        *address,       /* IN/OUT */
18263 	vm_map_size_t           size,
18264 	vm_map_offset_t         mask,
18265 	int                     flags,
18266 	vm_map_kernel_flags_t   vmk_flags,
18267 	__unused vm_tag_t       tag,
18268 	vm_map_entry_t          *map_entry)     /* OUT */
18269 {
18270 	vm_map_entry_t  entry;
18271 	vm_map_offset_t start;
18272 	vm_map_offset_t end;
18273 	vm_map_offset_t desired_empty_end;
18274 	kern_return_t   kr;
18275 	vm_map_entry_t          hole_entry;
18276 
18277 StartAgain:;
18278 
18279 	start = *address;
18280 
18281 	if (flags & VM_FLAGS_ANYWHERE) {
18282 		if (flags & VM_FLAGS_RANDOM_ADDR) {
18283 			/*
18284 			 * Get a random start address.
18285 			 */
18286 			kr = vm_map_random_address_for_size(map, address, size);
18287 			if (kr != KERN_SUCCESS) {
18288 				return kr;
18289 			}
18290 			start = *address;
18291 		}
18292 
18293 		/*
18294 		 *	Calculate the first possible address.
18295 		 */
18296 
18297 		if (start < map->min_offset) {
18298 			start = map->min_offset;
18299 		}
18300 		if (start > map->max_offset) {
18301 			return KERN_NO_SPACE;
18302 		}
18303 
18304 		/*
18305 		 *	Look for the first possible address;
18306 		 *	if there's already something at this
18307 		 *	address, we have to start after it.
18308 		 */
18309 
18310 		if (map->disable_vmentry_reuse == TRUE) {
18311 			VM_MAP_HIGHEST_ENTRY(map, entry, start);
18312 		} else {
18313 			if (map->holelistenabled) {
18314 				hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
18315 
18316 				if (hole_entry == NULL) {
18317 					/*
18318 					 * No more space in the map?
18319 					 */
18320 					return KERN_NO_SPACE;
18321 				} else {
18322 					boolean_t found_hole = FALSE;
18323 
18324 					do {
18325 						if (hole_entry->vme_start >= start) {
18326 							start = hole_entry->vme_start;
18327 							found_hole = TRUE;
18328 							break;
18329 						}
18330 
18331 						if (hole_entry->vme_end > start) {
18332 							found_hole = TRUE;
18333 							break;
18334 						}
18335 						hole_entry = hole_entry->vme_next;
18336 					} while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
18337 
18338 					if (found_hole == FALSE) {
18339 						return KERN_NO_SPACE;
18340 					}
18341 
18342 					entry = hole_entry;
18343 				}
18344 			} else {
18345 				assert(first_free_is_valid(map));
18346 				if (start == map->min_offset) {
18347 					if ((entry = map->first_free) != vm_map_to_entry(map)) {
18348 						start = entry->vme_end;
18349 					}
18350 				} else {
18351 					vm_map_entry_t  tmp_entry;
18352 					if (vm_map_lookup_entry(map, start, &tmp_entry)) {
18353 						start = tmp_entry->vme_end;
18354 					}
18355 					entry = tmp_entry;
18356 				}
18357 			}
18358 			start = vm_map_round_page(start,
18359 			    VM_MAP_PAGE_MASK(map));
18360 		}
18361 
18362 		/*
18363 		 *	In any case, the "entry" always precedes
18364 		 *	the proposed new region throughout the
18365 		 *	loop:
18366 		 */
18367 
18368 		while (TRUE) {
18369 			vm_map_entry_t  next;
18370 
18371 			/*
18372 			 *	Find the end of the proposed new region.
18373 			 *	Be sure we didn't go beyond the end, or
18374 			 *	wrap around the address.
18375 			 */
18376 
18377 			end = ((start + mask) & ~mask);
18378 			end = vm_map_round_page(end,
18379 			    VM_MAP_PAGE_MASK(map));
18380 			if (end < start) {
18381 				return KERN_NO_SPACE;
18382 			}
18383 			start = end;
18384 			end += size;
18385 
18386 			/* We want an entire page of empty space, but don't increase the allocation size. */
18387 			desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
18388 
18389 			if ((desired_empty_end > map->max_offset) || (desired_empty_end < start)) {
18390 				if (map->wait_for_space) {
18391 					if (size <= (map->max_offset -
18392 					    map->min_offset)) {
18393 						assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
18394 						vm_map_unlock(map);
18395 						thread_block(THREAD_CONTINUE_NULL);
18396 						vm_map_lock(map);
18397 						goto StartAgain;
18398 					}
18399 				}
18400 
18401 				return KERN_NO_SPACE;
18402 			}
18403 
18404 			next = entry->vme_next;
18405 
18406 			if (map->holelistenabled) {
18407 				if (entry->vme_end >= desired_empty_end) {
18408 					break;
18409 				}
18410 			} else {
18411 				/*
18412 				 *	If there are no more entries, we must win.
18413 				 *
18414 				 *	OR
18415 				 *
18416 				 *	If there is another entry, it must be
18417 				 *	after the end of the potential new region.
18418 				 */
18419 
18420 				if (next == vm_map_to_entry(map)) {
18421 					break;
18422 				}
18423 
18424 				if (next->vme_start >= desired_empty_end) {
18425 					break;
18426 				}
18427 			}
18428 
18429 			/*
18430 			 *	Didn't fit -- move to the next entry.
18431 			 */
18432 
18433 			entry = next;
18434 
18435 			if (map->holelistenabled) {
18436 				if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
18437 					/*
18438 					 * Wrapped around
18439 					 */
18440 					return KERN_NO_SPACE;
18441 				}
18442 				start = entry->vme_start;
18443 			} else {
18444 				start = entry->vme_end;
18445 			}
18446 		}
18447 
18448 		if (map->holelistenabled) {
18449 			if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
18450 				panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.", entry, (unsigned long long)entry->vme_start);
18451 			}
18452 		}
18453 
18454 		*address = start;
18455 	} else {
18456 		vm_map_entry_t          temp_entry;
18457 
18458 		/*
18459 		 *	Verify that:
18460 		 *		the address doesn't itself violate
18461 		 *		the mask requirement.
18462 		 */
18463 
18464 		if ((start & mask) != 0) {
18465 			return KERN_NO_SPACE;
18466 		}
18467 
18468 
18469 		/*
18470 		 *	...	the address is within bounds
18471 		 */
18472 
18473 		end = start + size;
18474 
18475 		if ((start < map->min_offset) ||
18476 		    (end > map->max_offset) ||
18477 		    (start >= end)) {
18478 			return KERN_INVALID_ADDRESS;
18479 		}
18480 
18481 		/*
18482 		 * If we're asked to overwrite whatever was mapped in that
18483 		 * range, first deallocate that range.
18484 		 */
18485 		if (flags & VM_FLAGS_OVERWRITE) {
18486 			vm_map_t zap_map;
18487 			int remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES | VM_MAP_REMOVE_NO_MAP_ALIGN;
18488 
18489 			/*
18490 			 * We use a "zap_map" to avoid having to unlock
18491 			 * the "map" in vm_map_delete(), which would compromise
18492 			 * the atomicity of the "deallocate" and then "remap"
18493 			 * combination.
18494 			 */
18495 			zap_map = vm_map_create_options(PMAP_NULL, start, end,
18496 			    VM_MAP_CREATE_ZAP_OPTIONS(map));
18497 			vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
18498 
18499 			if (vmk_flags.vmkf_overwrite_immutable) {
18500 				remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
18501 			}
18502 			kr = vm_map_delete(map, start, end,
18503 			    remove_flags,
18504 			    zap_map);
18505 			if (kr == KERN_SUCCESS) {
18506 				vm_map_destroy(zap_map,
18507 				    VM_MAP_REMOVE_NO_PMAP_CLEANUP);
18508 				zap_map = VM_MAP_NULL;
18509 			}
18510 		}
18511 
18512 		/*
18513 		 *	...	the starting address isn't allocated
18514 		 */
18515 
18516 		if (vm_map_lookup_entry(map, start, &temp_entry)) {
18517 			return KERN_NO_SPACE;
18518 		}
18519 
18520 		entry = temp_entry;
18521 
18522 		/*
18523 		 *	...	the next region doesn't overlap the
18524 		 *		end point.
18525 		 */
18526 
18527 		if ((entry->vme_next != vm_map_to_entry(map)) &&
18528 		    (entry->vme_next->vme_start < end)) {
18529 			return KERN_NO_SPACE;
18530 		}
18531 	}
18532 	*map_entry = entry;
18533 	return KERN_SUCCESS;
18534 }
18535 
18536 /*
18537  *	vm_map_switch:
18538  *
18539  *	Set the address map for the current thread to the specified map
18540  */
18541 
18542 vm_map_t
vm_map_switch(vm_map_t map)18543 vm_map_switch(
18544 	vm_map_t        map)
18545 {
18546 	int             mycpu;
18547 	thread_t        thread = current_thread();
18548 	vm_map_t        oldmap = thread->map;
18549 
18550 	mp_disable_preemption();
18551 	mycpu = cpu_number();
18552 
18553 	/*
18554 	 *	Deactivate the current map and activate the requested map
18555 	 */
18556 	PMAP_SWITCH_USER(thread, map, mycpu);
18557 
18558 	mp_enable_preemption();
18559 	return oldmap;
18560 }
18561 
18562 
18563 /*
18564  *	Routine:	vm_map_write_user
18565  *
18566  *	Description:
18567  *		Copy out data from a kernel space into space in the
18568  *		destination map. The space must already exist in the
18569  *		destination map.
18570  *		NOTE:  This routine should only be called by threads
18571  *		which can block on a page fault. i.e. kernel mode user
18572  *		threads.
18573  *
18574  */
18575 kern_return_t
vm_map_write_user(vm_map_t map,void * src_p,vm_map_address_t dst_addr,vm_size_t size)18576 vm_map_write_user(
18577 	vm_map_t                map,
18578 	void                    *src_p,
18579 	vm_map_address_t        dst_addr,
18580 	vm_size_t               size)
18581 {
18582 	kern_return_t   kr = KERN_SUCCESS;
18583 
18584 	if (current_map() == map) {
18585 		if (copyout(src_p, dst_addr, size)) {
18586 			kr = KERN_INVALID_ADDRESS;
18587 		}
18588 	} else {
18589 		vm_map_t        oldmap;
18590 
18591 		/* take on the identity of the target map while doing */
18592 		/* the transfer */
18593 
18594 		vm_map_reference(map);
18595 		oldmap = vm_map_switch(map);
18596 		if (copyout(src_p, dst_addr, size)) {
18597 			kr = KERN_INVALID_ADDRESS;
18598 		}
18599 		vm_map_switch(oldmap);
18600 		vm_map_deallocate(map);
18601 	}
18602 	return kr;
18603 }
18604 
18605 /*
18606  *	Routine:	vm_map_read_user
18607  *
18608  *	Description:
18609  *		Copy in data from a user space source map into the
18610  *		kernel map. The space must already exist in the
18611  *		kernel map.
18612  *		NOTE:  This routine should only be called by threads
18613  *		which can block on a page fault. i.e. kernel mode user
18614  *		threads.
18615  *
18616  */
18617 kern_return_t
vm_map_read_user(vm_map_t map,vm_map_address_t src_addr,void * dst_p,vm_size_t size)18618 vm_map_read_user(
18619 	vm_map_t                map,
18620 	vm_map_address_t        src_addr,
18621 	void                    *dst_p,
18622 	vm_size_t               size)
18623 {
18624 	kern_return_t   kr = KERN_SUCCESS;
18625 
18626 	if (current_map() == map) {
18627 		if (copyin(src_addr, dst_p, size)) {
18628 			kr = KERN_INVALID_ADDRESS;
18629 		}
18630 	} else {
18631 		vm_map_t        oldmap;
18632 
18633 		/* take on the identity of the target map while doing */
18634 		/* the transfer */
18635 
18636 		vm_map_reference(map);
18637 		oldmap = vm_map_switch(map);
18638 		if (copyin(src_addr, dst_p, size)) {
18639 			kr = KERN_INVALID_ADDRESS;
18640 		}
18641 		vm_map_switch(oldmap);
18642 		vm_map_deallocate(map);
18643 	}
18644 	return kr;
18645 }
18646 
18647 
18648 /*
18649  *	vm_map_check_protection:
18650  *
18651  *	Assert that the target map allows the specified
18652  *	privilege on the entire address region given.
18653  *	The entire region must be allocated.
18654  */
18655 boolean_t
vm_map_check_protection(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t protection)18656 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
18657     vm_map_offset_t end, vm_prot_t protection)
18658 {
18659 	vm_map_entry_t entry;
18660 	vm_map_entry_t tmp_entry;
18661 
18662 	vm_map_lock(map);
18663 
18664 	if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
18665 		vm_map_unlock(map);
18666 		return FALSE;
18667 	}
18668 
18669 	if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
18670 		vm_map_unlock(map);
18671 		return FALSE;
18672 	}
18673 
18674 	entry = tmp_entry;
18675 
18676 	while (start < end) {
18677 		if (entry == vm_map_to_entry(map)) {
18678 			vm_map_unlock(map);
18679 			return FALSE;
18680 		}
18681 
18682 		/*
18683 		 *	No holes allowed!
18684 		 */
18685 
18686 		if (start < entry->vme_start) {
18687 			vm_map_unlock(map);
18688 			return FALSE;
18689 		}
18690 
18691 		/*
18692 		 * Check protection associated with entry.
18693 		 */
18694 
18695 		if ((entry->protection & protection) != protection) {
18696 			vm_map_unlock(map);
18697 			return FALSE;
18698 		}
18699 
18700 		/* go to next entry */
18701 
18702 		start = entry->vme_end;
18703 		entry = entry->vme_next;
18704 	}
18705 	vm_map_unlock(map);
18706 	return TRUE;
18707 }
18708 
18709 kern_return_t
vm_map_purgable_control(vm_map_t map,vm_map_offset_t address,vm_purgable_t control,int * state)18710 vm_map_purgable_control(
18711 	vm_map_t                map,
18712 	vm_map_offset_t         address,
18713 	vm_purgable_t           control,
18714 	int                     *state)
18715 {
18716 	vm_map_entry_t          entry;
18717 	vm_object_t             object;
18718 	kern_return_t           kr;
18719 	boolean_t               was_nonvolatile;
18720 
18721 	/*
18722 	 * Vet all the input parameters and current type and state of the
18723 	 * underlaying object.  Return with an error if anything is amiss.
18724 	 */
18725 	if (map == VM_MAP_NULL) {
18726 		return KERN_INVALID_ARGUMENT;
18727 	}
18728 
18729 	if (control != VM_PURGABLE_SET_STATE &&
18730 	    control != VM_PURGABLE_GET_STATE &&
18731 	    control != VM_PURGABLE_PURGE_ALL &&
18732 	    control != VM_PURGABLE_SET_STATE_FROM_KERNEL) {
18733 		return KERN_INVALID_ARGUMENT;
18734 	}
18735 
18736 	if (control == VM_PURGABLE_PURGE_ALL) {
18737 		vm_purgeable_object_purge_all();
18738 		return KERN_SUCCESS;
18739 	}
18740 
18741 	if ((control == VM_PURGABLE_SET_STATE ||
18742 	    control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
18743 	    (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
18744 	    ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) {
18745 		return KERN_INVALID_ARGUMENT;
18746 	}
18747 
18748 	vm_map_lock_read(map);
18749 
18750 	if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
18751 		/*
18752 		 * Must pass a valid non-submap address.
18753 		 */
18754 		vm_map_unlock_read(map);
18755 		return KERN_INVALID_ADDRESS;
18756 	}
18757 
18758 	if ((entry->protection & VM_PROT_WRITE) == 0 &&
18759 	    control != VM_PURGABLE_GET_STATE) {
18760 		/*
18761 		 * Can't apply purgable controls to something you can't write.
18762 		 */
18763 		vm_map_unlock_read(map);
18764 		return KERN_PROTECTION_FAILURE;
18765 	}
18766 
18767 	object = VME_OBJECT(entry);
18768 	if (object == VM_OBJECT_NULL ||
18769 	    object->purgable == VM_PURGABLE_DENY) {
18770 		/*
18771 		 * Object must already be present and be purgeable.
18772 		 */
18773 		vm_map_unlock_read(map);
18774 		return KERN_INVALID_ARGUMENT;
18775 	}
18776 
18777 	vm_object_lock(object);
18778 
18779 #if 00
18780 	if (VME_OFFSET(entry) != 0 ||
18781 	    entry->vme_end - entry->vme_start != object->vo_size) {
18782 		/*
18783 		 * Can only apply purgable controls to the whole (existing)
18784 		 * object at once.
18785 		 */
18786 		vm_map_unlock_read(map);
18787 		vm_object_unlock(object);
18788 		return KERN_INVALID_ARGUMENT;
18789 	}
18790 #endif
18791 
18792 	assert(!entry->is_sub_map);
18793 	assert(!entry->use_pmap); /* purgeable has its own accounting */
18794 
18795 	vm_map_unlock_read(map);
18796 
18797 	was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
18798 
18799 	kr = vm_object_purgable_control(object, control, state);
18800 
18801 	if (was_nonvolatile &&
18802 	    object->purgable != VM_PURGABLE_NONVOLATILE &&
18803 	    map->pmap == kernel_pmap) {
18804 #if DEBUG
18805 		object->vo_purgeable_volatilizer = kernel_task;
18806 #endif /* DEBUG */
18807 	}
18808 
18809 	vm_object_unlock(object);
18810 
18811 	return kr;
18812 }
18813 
18814 void
vm_map_footprint_query_page_info(vm_map_t map,vm_map_entry_t map_entry,vm_map_offset_t curr_s_offset,int * disposition_p)18815 vm_map_footprint_query_page_info(
18816 	vm_map_t        map,
18817 	vm_map_entry_t  map_entry,
18818 	vm_map_offset_t curr_s_offset,
18819 	int             *disposition_p)
18820 {
18821 	int             pmap_disp;
18822 	vm_object_t     object;
18823 	int             disposition;
18824 	int             effective_page_size;
18825 
18826 	vm_map_lock_assert_held(map);
18827 	assert(!map->has_corpse_footprint);
18828 	assert(curr_s_offset >= map_entry->vme_start);
18829 	assert(curr_s_offset < map_entry->vme_end);
18830 
18831 	object = VME_OBJECT(map_entry);
18832 	if (object == VM_OBJECT_NULL) {
18833 		*disposition_p = 0;
18834 		return;
18835 	}
18836 
18837 	effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(map));
18838 
18839 	pmap_disp = 0;
18840 	if (object == VM_OBJECT_NULL) {
18841 		/* nothing mapped here: no need to ask */
18842 		*disposition_p = 0;
18843 		return;
18844 	} else if (map_entry->is_sub_map &&
18845 	    !map_entry->use_pmap) {
18846 		/* nested pmap: no footprint */
18847 		*disposition_p = 0;
18848 		return;
18849 	}
18850 
18851 	/*
18852 	 * Query the pmap.
18853 	 */
18854 	pmap_query_page_info(map->pmap, curr_s_offset, &pmap_disp);
18855 
18856 	/*
18857 	 * Compute this page's disposition.
18858 	 */
18859 	disposition = 0;
18860 
18861 	/* deal with "alternate accounting" first */
18862 	if (!map_entry->is_sub_map &&
18863 	    object->vo_no_footprint) {
18864 		/* does not count in footprint */
18865 		assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18866 	} else if (!map_entry->is_sub_map &&
18867 	    (object->purgable == VM_PURGABLE_NONVOLATILE ||
18868 	    (object->purgable == VM_PURGABLE_DENY &&
18869 	    object->vo_ledger_tag)) &&
18870 	    VM_OBJECT_OWNER(object) != NULL &&
18871 	    VM_OBJECT_OWNER(object)->map == map) {
18872 		assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18873 		if ((((curr_s_offset
18874 		    - map_entry->vme_start
18875 		    + VME_OFFSET(map_entry))
18876 		    / effective_page_size) <
18877 		    (object->resident_page_count +
18878 		    vm_compressor_pager_get_count(object->pager)))) {
18879 			/*
18880 			 * Non-volatile purgeable object owned
18881 			 * by this task: report the first
18882 			 * "#resident + #compressed" pages as
18883 			 * "resident" (to show that they
18884 			 * contribute to the footprint) but not
18885 			 * "dirty" (to avoid double-counting
18886 			 * with the fake "non-volatile" region
18887 			 * we'll report at the end of the
18888 			 * address space to account for all
18889 			 * (mapped or not) non-volatile memory
18890 			 * owned by this task.
18891 			 */
18892 			disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18893 		}
18894 	} else if (!map_entry->is_sub_map &&
18895 	    (object->purgable == VM_PURGABLE_VOLATILE ||
18896 	    object->purgable == VM_PURGABLE_EMPTY) &&
18897 	    VM_OBJECT_OWNER(object) != NULL &&
18898 	    VM_OBJECT_OWNER(object)->map == map) {
18899 		assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18900 		if ((((curr_s_offset
18901 		    - map_entry->vme_start
18902 		    + VME_OFFSET(map_entry))
18903 		    / effective_page_size) <
18904 		    object->wired_page_count)) {
18905 			/*
18906 			 * Volatile|empty purgeable object owned
18907 			 * by this task: report the first
18908 			 * "#wired" pages as "resident" (to
18909 			 * show that they contribute to the
18910 			 * footprint) but not "dirty" (to avoid
18911 			 * double-counting with the fake
18912 			 * "non-volatile" region we'll report
18913 			 * at the end of the address space to
18914 			 * account for all (mapped or not)
18915 			 * non-volatile memory owned by this
18916 			 * task.
18917 			 */
18918 			disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18919 		}
18920 	} else if (!map_entry->is_sub_map &&
18921 	    map_entry->iokit_acct &&
18922 	    object->internal &&
18923 	    object->purgable == VM_PURGABLE_DENY) {
18924 		/*
18925 		 * Non-purgeable IOKit memory: phys_footprint
18926 		 * includes the entire virtual mapping.
18927 		 */
18928 		assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18929 		disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18930 		disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
18931 	} else if (pmap_disp & (PMAP_QUERY_PAGE_ALTACCT |
18932 	    PMAP_QUERY_PAGE_COMPRESSED_ALTACCT)) {
18933 		/* alternate accounting */
18934 #if (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG)
18935 		if (map->pmap->footprint_was_suspended) {
18936 			/*
18937 			 * The assertion below can fail if dyld
18938 			 * suspended footprint accounting
18939 			 * while doing some adjustments to
18940 			 * this page;  the mapping would say
18941 			 * "use pmap accounting" but the page
18942 			 * would be marked "alternate
18943 			 * accounting".
18944 			 */
18945 		} else
18946 #endif /* (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG) */
18947 		{
18948 			assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18949 		}
18950 		disposition = 0;
18951 	} else {
18952 		if (pmap_disp & PMAP_QUERY_PAGE_PRESENT) {
18953 			assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18954 			disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18955 			disposition |= VM_PAGE_QUERY_PAGE_REF;
18956 			if (pmap_disp & PMAP_QUERY_PAGE_INTERNAL) {
18957 				disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
18958 			} else {
18959 				disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
18960 			}
18961 			if (pmap_disp & PMAP_QUERY_PAGE_REUSABLE) {
18962 				disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
18963 			}
18964 		} else if (pmap_disp & PMAP_QUERY_PAGE_COMPRESSED) {
18965 			assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18966 			disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
18967 		}
18968 	}
18969 
18970 	*disposition_p = disposition;
18971 }
18972 
18973 kern_return_t
vm_map_page_query_internal(vm_map_t target_map,vm_map_offset_t offset,int * disposition,int * ref_count)18974 vm_map_page_query_internal(
18975 	vm_map_t        target_map,
18976 	vm_map_offset_t offset,
18977 	int             *disposition,
18978 	int             *ref_count)
18979 {
18980 	kern_return_t                   kr;
18981 	vm_page_info_basic_data_t       info;
18982 	mach_msg_type_number_t          count;
18983 
18984 	count = VM_PAGE_INFO_BASIC_COUNT;
18985 	kr = vm_map_page_info(target_map,
18986 	    offset,
18987 	    VM_PAGE_INFO_BASIC,
18988 	    (vm_page_info_t) &info,
18989 	    &count);
18990 	if (kr == KERN_SUCCESS) {
18991 		*disposition = info.disposition;
18992 		*ref_count = info.ref_count;
18993 	} else {
18994 		*disposition = 0;
18995 		*ref_count = 0;
18996 	}
18997 
18998 	return kr;
18999 }
19000 
19001 kern_return_t
vm_map_page_info(vm_map_t map,vm_map_offset_t offset,vm_page_info_flavor_t flavor,vm_page_info_t info,mach_msg_type_number_t * count)19002 vm_map_page_info(
19003 	vm_map_t                map,
19004 	vm_map_offset_t         offset,
19005 	vm_page_info_flavor_t   flavor,
19006 	vm_page_info_t          info,
19007 	mach_msg_type_number_t  *count)
19008 {
19009 	return vm_map_page_range_info_internal(map,
19010 	           offset, /* start of range */
19011 	           (offset + 1), /* this will get rounded in the call to the page boundary */
19012 	           (int)-1, /* effective_page_shift: unspecified */
19013 	           flavor,
19014 	           info,
19015 	           count);
19016 }
19017 
19018 kern_return_t
vm_map_page_range_info_internal(vm_map_t map,vm_map_offset_t start_offset,vm_map_offset_t end_offset,int effective_page_shift,vm_page_info_flavor_t flavor,vm_page_info_t info,mach_msg_type_number_t * count)19019 vm_map_page_range_info_internal(
19020 	vm_map_t                map,
19021 	vm_map_offset_t         start_offset,
19022 	vm_map_offset_t         end_offset,
19023 	int                     effective_page_shift,
19024 	vm_page_info_flavor_t   flavor,
19025 	vm_page_info_t          info,
19026 	mach_msg_type_number_t  *count)
19027 {
19028 	vm_map_entry_t          map_entry = VM_MAP_ENTRY_NULL;
19029 	vm_object_t             object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
19030 	vm_page_t               m = VM_PAGE_NULL;
19031 	kern_return_t           retval = KERN_SUCCESS;
19032 	int                     disposition = 0;
19033 	int                     ref_count = 0;
19034 	int                     depth = 0, info_idx = 0;
19035 	vm_page_info_basic_t    basic_info = 0;
19036 	vm_map_offset_t         offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
19037 	vm_map_offset_t         start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
19038 	boolean_t               do_region_footprint;
19039 	ledger_amount_t         ledger_resident, ledger_compressed;
19040 	int                     effective_page_size;
19041 	vm_map_offset_t         effective_page_mask;
19042 
19043 	switch (flavor) {
19044 	case VM_PAGE_INFO_BASIC:
19045 		if (*count != VM_PAGE_INFO_BASIC_COUNT) {
19046 			/*
19047 			 * The "vm_page_info_basic_data" structure was not
19048 			 * properly padded, so allow the size to be off by
19049 			 * one to maintain backwards binary compatibility...
19050 			 */
19051 			if (*count != VM_PAGE_INFO_BASIC_COUNT - 1) {
19052 				return KERN_INVALID_ARGUMENT;
19053 			}
19054 		}
19055 		break;
19056 	default:
19057 		return KERN_INVALID_ARGUMENT;
19058 	}
19059 
19060 	if (effective_page_shift == -1) {
19061 		effective_page_shift = vm_self_region_page_shift_safely(map);
19062 		if (effective_page_shift == -1) {
19063 			return KERN_INVALID_ARGUMENT;
19064 		}
19065 	}
19066 	effective_page_size = (1 << effective_page_shift);
19067 	effective_page_mask = effective_page_size - 1;
19068 
19069 	do_region_footprint = task_self_region_footprint();
19070 	disposition = 0;
19071 	ref_count = 0;
19072 	depth = 0;
19073 	info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
19074 	retval = KERN_SUCCESS;
19075 
19076 	offset_in_page = start_offset & effective_page_mask;
19077 	start = vm_map_trunc_page(start_offset, effective_page_mask);
19078 	end = vm_map_round_page(end_offset, effective_page_mask);
19079 
19080 	if (end < start) {
19081 		return KERN_INVALID_ARGUMENT;
19082 	}
19083 
19084 	assert((end - start) <= MAX_PAGE_RANGE_QUERY);
19085 
19086 	vm_map_lock_read(map);
19087 
19088 	task_ledgers_footprint(map->pmap->ledger, &ledger_resident, &ledger_compressed);
19089 
19090 	for (curr_s_offset = start; curr_s_offset < end;) {
19091 		/*
19092 		 * New lookup needs reset of these variables.
19093 		 */
19094 		curr_object = object = VM_OBJECT_NULL;
19095 		offset_in_object = 0;
19096 		ref_count = 0;
19097 		depth = 0;
19098 
19099 		if (do_region_footprint &&
19100 		    curr_s_offset >= vm_map_last_entry(map)->vme_end) {
19101 			/*
19102 			 * Request for "footprint" info about a page beyond
19103 			 * the end of address space: this must be for
19104 			 * the fake region vm_map_region_recurse_64()
19105 			 * reported to account for non-volatile purgeable
19106 			 * memory owned by this task.
19107 			 */
19108 			disposition = 0;
19109 
19110 			if (curr_s_offset - vm_map_last_entry(map)->vme_end <=
19111 			    (unsigned) ledger_compressed) {
19112 				/*
19113 				 * We haven't reported all the "non-volatile
19114 				 * compressed" pages yet, so report this fake
19115 				 * page as "compressed".
19116 				 */
19117 				disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
19118 			} else {
19119 				/*
19120 				 * We've reported all the non-volatile
19121 				 * compressed page but not all the non-volatile
19122 				 * pages , so report this fake page as
19123 				 * "resident dirty".
19124 				 */
19125 				disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19126 				disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
19127 				disposition |= VM_PAGE_QUERY_PAGE_REF;
19128 			}
19129 			switch (flavor) {
19130 			case VM_PAGE_INFO_BASIC:
19131 				basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19132 				basic_info->disposition = disposition;
19133 				basic_info->ref_count = 1;
19134 				basic_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
19135 				basic_info->offset = 0;
19136 				basic_info->depth = 0;
19137 
19138 				info_idx++;
19139 				break;
19140 			}
19141 			curr_s_offset += effective_page_size;
19142 			continue;
19143 		}
19144 
19145 		/*
19146 		 * First, find the map entry covering "curr_s_offset", going down
19147 		 * submaps if necessary.
19148 		 */
19149 		if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
19150 			/* no entry -> no object -> no page */
19151 
19152 			if (curr_s_offset < vm_map_min(map)) {
19153 				/*
19154 				 * Illegal address that falls below map min.
19155 				 */
19156 				curr_e_offset = MIN(end, vm_map_min(map));
19157 			} else if (curr_s_offset >= vm_map_max(map)) {
19158 				/*
19159 				 * Illegal address that falls on/after map max.
19160 				 */
19161 				curr_e_offset = end;
19162 			} else if (map_entry == vm_map_to_entry(map)) {
19163 				/*
19164 				 * Hit a hole.
19165 				 */
19166 				if (map_entry->vme_next == vm_map_to_entry(map)) {
19167 					/*
19168 					 * Empty map.
19169 					 */
19170 					curr_e_offset = MIN(map->max_offset, end);
19171 				} else {
19172 					/*
19173 					 * Hole at start of the map.
19174 					 */
19175 					curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
19176 				}
19177 			} else {
19178 				if (map_entry->vme_next == vm_map_to_entry(map)) {
19179 					/*
19180 					 * Hole at the end of the map.
19181 					 */
19182 					curr_e_offset = MIN(map->max_offset, end);
19183 				} else {
19184 					curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
19185 				}
19186 			}
19187 
19188 			assert(curr_e_offset >= curr_s_offset);
19189 
19190 			uint64_t num_pages = (curr_e_offset - curr_s_offset) >> effective_page_shift;
19191 
19192 			void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19193 
19194 			bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
19195 
19196 			curr_s_offset = curr_e_offset;
19197 
19198 			info_idx += num_pages;
19199 
19200 			continue;
19201 		}
19202 
19203 		/* compute offset from this map entry's start */
19204 		offset_in_object = curr_s_offset - map_entry->vme_start;
19205 
19206 		/* compute offset into this map entry's object (or submap) */
19207 		offset_in_object += VME_OFFSET(map_entry);
19208 
19209 		if (map_entry->is_sub_map) {
19210 			vm_map_t sub_map = VM_MAP_NULL;
19211 			vm_page_info_t submap_info = 0;
19212 			vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
19213 
19214 			range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
19215 
19216 			submap_s_offset = offset_in_object;
19217 			submap_e_offset = submap_s_offset + range_len;
19218 
19219 			sub_map = VME_SUBMAP(map_entry);
19220 
19221 			vm_map_reference(sub_map);
19222 			vm_map_unlock_read(map);
19223 
19224 			submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19225 
19226 			assertf(VM_MAP_PAGE_SHIFT(sub_map) >= VM_MAP_PAGE_SHIFT(map),
19227 			    "Submap page size (%d) differs from current map (%d)\n", VM_MAP_PAGE_SIZE(sub_map), VM_MAP_PAGE_SIZE(map));
19228 
19229 			retval = vm_map_page_range_info_internal(sub_map,
19230 			    submap_s_offset,
19231 			    submap_e_offset,
19232 			    effective_page_shift,
19233 			    VM_PAGE_INFO_BASIC,
19234 			    (vm_page_info_t) submap_info,
19235 			    count);
19236 
19237 			assert(retval == KERN_SUCCESS);
19238 
19239 			vm_map_lock_read(map);
19240 			vm_map_deallocate(sub_map);
19241 
19242 			/* Move the "info" index by the number of pages we inspected.*/
19243 			info_idx += range_len >> effective_page_shift;
19244 
19245 			/* Move our current offset by the size of the range we inspected.*/
19246 			curr_s_offset += range_len;
19247 
19248 			continue;
19249 		}
19250 
19251 		object = VME_OBJECT(map_entry);
19252 
19253 		if (object == VM_OBJECT_NULL) {
19254 			/*
19255 			 * We don't have an object here and, hence,
19256 			 * no pages to inspect. We'll fill up the
19257 			 * info structure appropriately.
19258 			 */
19259 
19260 			curr_e_offset = MIN(map_entry->vme_end, end);
19261 
19262 			uint64_t num_pages = (curr_e_offset - curr_s_offset) >> effective_page_shift;
19263 
19264 			void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19265 
19266 			bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
19267 
19268 			curr_s_offset = curr_e_offset;
19269 
19270 			info_idx += num_pages;
19271 
19272 			continue;
19273 		}
19274 
19275 		if (do_region_footprint) {
19276 			disposition = 0;
19277 			if (map->has_corpse_footprint) {
19278 				/*
19279 				 * Query the page info data we saved
19280 				 * while forking the corpse.
19281 				 */
19282 				vm_map_corpse_footprint_query_page_info(
19283 					map,
19284 					curr_s_offset,
19285 					&disposition);
19286 			} else {
19287 				/*
19288 				 * Query the live pmap for footprint info
19289 				 * about this page.
19290 				 */
19291 				vm_map_footprint_query_page_info(
19292 					map,
19293 					map_entry,
19294 					curr_s_offset,
19295 					&disposition);
19296 			}
19297 			switch (flavor) {
19298 			case VM_PAGE_INFO_BASIC:
19299 				basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19300 				basic_info->disposition = disposition;
19301 				basic_info->ref_count = 1;
19302 				basic_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
19303 				basic_info->offset = 0;
19304 				basic_info->depth = 0;
19305 
19306 				info_idx++;
19307 				break;
19308 			}
19309 			curr_s_offset += effective_page_size;
19310 			continue;
19311 		}
19312 
19313 		vm_object_reference(object);
19314 		/*
19315 		 * Shared mode -- so we can allow other readers
19316 		 * to grab the lock too.
19317 		 */
19318 		vm_object_lock_shared(object);
19319 
19320 		curr_e_offset = MIN(map_entry->vme_end, end);
19321 
19322 		vm_map_unlock_read(map);
19323 
19324 		map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
19325 
19326 		curr_object = object;
19327 
19328 		for (; curr_s_offset < curr_e_offset;) {
19329 			if (object == curr_object) {
19330 				ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
19331 			} else {
19332 				ref_count = curr_object->ref_count;
19333 			}
19334 
19335 			curr_offset_in_object = offset_in_object;
19336 
19337 			for (;;) {
19338 				m = vm_page_lookup(curr_object, vm_object_trunc_page(curr_offset_in_object));
19339 
19340 				if (m != VM_PAGE_NULL) {
19341 					disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19342 					break;
19343 				} else {
19344 					if (curr_object->internal &&
19345 					    curr_object->alive &&
19346 					    !curr_object->terminating &&
19347 					    curr_object->pager_ready) {
19348 						if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, vm_object_trunc_page(curr_offset_in_object))
19349 						    == VM_EXTERNAL_STATE_EXISTS) {
19350 							/* the pager has that page */
19351 							disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
19352 							break;
19353 						}
19354 					}
19355 
19356 					/*
19357 					 * Go down the VM object shadow chain until we find the page
19358 					 * we're looking for.
19359 					 */
19360 
19361 					if (curr_object->shadow != VM_OBJECT_NULL) {
19362 						vm_object_t shadow = VM_OBJECT_NULL;
19363 
19364 						curr_offset_in_object += curr_object->vo_shadow_offset;
19365 						shadow = curr_object->shadow;
19366 
19367 						vm_object_lock_shared(shadow);
19368 						vm_object_unlock(curr_object);
19369 
19370 						curr_object = shadow;
19371 						depth++;
19372 						continue;
19373 					} else {
19374 						break;
19375 					}
19376 				}
19377 			}
19378 
19379 			/* The ref_count is not strictly accurate, it measures the number   */
19380 			/* of entities holding a ref on the object, they may not be mapping */
19381 			/* the object or may not be mapping the section holding the         */
19382 			/* target page but its still a ball park number and though an over- */
19383 			/* count, it picks up the copy-on-write cases                       */
19384 
19385 			/* We could also get a picture of page sharing from pmap_attributes */
19386 			/* but this would under count as only faulted-in mappings would     */
19387 			/* show up.							    */
19388 
19389 			if ((curr_object == object) && curr_object->shadow) {
19390 				disposition |= VM_PAGE_QUERY_PAGE_COPIED;
19391 			}
19392 
19393 			if (!curr_object->internal) {
19394 				disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
19395 			}
19396 
19397 			if (m != VM_PAGE_NULL) {
19398 				if (m->vmp_fictitious) {
19399 					disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
19400 				} else {
19401 					if (m->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m))) {
19402 						disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
19403 					}
19404 
19405 					if (m->vmp_reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m))) {
19406 						disposition |= VM_PAGE_QUERY_PAGE_REF;
19407 					}
19408 
19409 					if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
19410 						disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
19411 					}
19412 
19413 					/*
19414 					 * XXX TODO4K:
19415 					 * when this routine deals with 4k
19416 					 * pages, check the appropriate CS bit
19417 					 * here.
19418 					 */
19419 					if (m->vmp_cs_validated) {
19420 						disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
19421 					}
19422 					if (m->vmp_cs_tainted) {
19423 						disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
19424 					}
19425 					if (m->vmp_cs_nx) {
19426 						disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
19427 					}
19428 					if (m->vmp_reusable || curr_object->all_reusable) {
19429 						disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
19430 					}
19431 				}
19432 			}
19433 
19434 			switch (flavor) {
19435 			case VM_PAGE_INFO_BASIC:
19436 				basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19437 				basic_info->disposition = disposition;
19438 				basic_info->ref_count = ref_count;
19439 				basic_info->object_id = (vm_object_id_t) (uintptr_t)
19440 				    VM_KERNEL_ADDRPERM(curr_object);
19441 				basic_info->offset =
19442 				    (memory_object_offset_t) curr_offset_in_object + offset_in_page;
19443 				basic_info->depth = depth;
19444 
19445 				info_idx++;
19446 				break;
19447 			}
19448 
19449 			disposition = 0;
19450 			offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
19451 
19452 			/*
19453 			 * Move to next offset in the range and in our object.
19454 			 */
19455 			curr_s_offset += effective_page_size;
19456 			offset_in_object += effective_page_size;
19457 			curr_offset_in_object = offset_in_object;
19458 
19459 			if (curr_object != object) {
19460 				vm_object_unlock(curr_object);
19461 
19462 				curr_object = object;
19463 
19464 				vm_object_lock_shared(curr_object);
19465 			} else {
19466 				vm_object_lock_yield_shared(curr_object);
19467 			}
19468 		}
19469 
19470 		vm_object_unlock(curr_object);
19471 		vm_object_deallocate(curr_object);
19472 
19473 		vm_map_lock_read(map);
19474 	}
19475 
19476 	vm_map_unlock_read(map);
19477 	return retval;
19478 }
19479 
19480 /*
19481  *	vm_map_msync
19482  *
19483  *	Synchronises the memory range specified with its backing store
19484  *	image by either flushing or cleaning the contents to the appropriate
19485  *	memory manager engaging in a memory object synchronize dialog with
19486  *	the manager.  The client doesn't return until the manager issues
19487  *	m_o_s_completed message.  MIG Magically converts user task parameter
19488  *	to the task's address map.
19489  *
19490  *	interpretation of sync_flags
19491  *	VM_SYNC_INVALIDATE	- discard pages, only return precious
19492  *				  pages to manager.
19493  *
19494  *	VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
19495  *				- discard pages, write dirty or precious
19496  *				  pages back to memory manager.
19497  *
19498  *	VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
19499  *				- write dirty or precious pages back to
19500  *				  the memory manager.
19501  *
19502  *	VM_SYNC_CONTIGUOUS	- does everything normally, but if there
19503  *				  is a hole in the region, and we would
19504  *				  have returned KERN_SUCCESS, return
19505  *				  KERN_INVALID_ADDRESS instead.
19506  *
19507  *	NOTE
19508  *	The memory object attributes have not yet been implemented, this
19509  *	function will have to deal with the invalidate attribute
19510  *
19511  *	RETURNS
19512  *	KERN_INVALID_TASK		Bad task parameter
19513  *	KERN_INVALID_ARGUMENT		both sync and async were specified.
19514  *	KERN_SUCCESS			The usual.
19515  *	KERN_INVALID_ADDRESS		There was a hole in the region.
19516  */
19517 
19518 kern_return_t
vm_map_msync(vm_map_t map,vm_map_address_t address,vm_map_size_t size,vm_sync_t sync_flags)19519 vm_map_msync(
19520 	vm_map_t                map,
19521 	vm_map_address_t        address,
19522 	vm_map_size_t           size,
19523 	vm_sync_t               sync_flags)
19524 {
19525 	vm_map_entry_t          entry;
19526 	vm_map_size_t           amount_left;
19527 	vm_object_offset_t      offset;
19528 	vm_object_offset_t      start_offset, end_offset;
19529 	boolean_t               do_sync_req;
19530 	boolean_t               had_hole = FALSE;
19531 	vm_map_offset_t         pmap_offset;
19532 
19533 	if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
19534 	    (sync_flags & VM_SYNC_SYNCHRONOUS)) {
19535 		return KERN_INVALID_ARGUMENT;
19536 	}
19537 
19538 	if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19539 		DEBUG4K_SHARE("map %p address 0x%llx size 0x%llx flags 0x%x\n", map, (uint64_t)address, (uint64_t)size, sync_flags);
19540 	}
19541 
19542 	/*
19543 	 * align address and size on page boundaries
19544 	 */
19545 	size = (vm_map_round_page(address + size,
19546 	    VM_MAP_PAGE_MASK(map)) -
19547 	    vm_map_trunc_page(address,
19548 	    VM_MAP_PAGE_MASK(map)));
19549 	address = vm_map_trunc_page(address,
19550 	    VM_MAP_PAGE_MASK(map));
19551 
19552 	if (map == VM_MAP_NULL) {
19553 		return KERN_INVALID_TASK;
19554 	}
19555 
19556 	if (size == 0) {
19557 		return KERN_SUCCESS;
19558 	}
19559 
19560 	amount_left = size;
19561 
19562 	while (amount_left > 0) {
19563 		vm_object_size_t        flush_size;
19564 		vm_object_t             object;
19565 
19566 		vm_map_lock(map);
19567 		if (!vm_map_lookup_entry(map,
19568 		    address,
19569 		    &entry)) {
19570 			vm_map_size_t   skip;
19571 
19572 			/*
19573 			 * hole in the address map.
19574 			 */
19575 			had_hole = TRUE;
19576 
19577 			if (sync_flags & VM_SYNC_KILLPAGES) {
19578 				/*
19579 				 * For VM_SYNC_KILLPAGES, there should be
19580 				 * no holes in the range, since we couldn't
19581 				 * prevent someone else from allocating in
19582 				 * that hole and we wouldn't want to "kill"
19583 				 * their pages.
19584 				 */
19585 				vm_map_unlock(map);
19586 				break;
19587 			}
19588 
19589 			/*
19590 			 * Check for empty map.
19591 			 */
19592 			if (entry == vm_map_to_entry(map) &&
19593 			    entry->vme_next == entry) {
19594 				vm_map_unlock(map);
19595 				break;
19596 			}
19597 			/*
19598 			 * Check that we don't wrap and that
19599 			 * we have at least one real map entry.
19600 			 */
19601 			if ((map->hdr.nentries == 0) ||
19602 			    (entry->vme_next->vme_start < address)) {
19603 				vm_map_unlock(map);
19604 				break;
19605 			}
19606 			/*
19607 			 * Move up to the next entry if needed
19608 			 */
19609 			skip = (entry->vme_next->vme_start - address);
19610 			if (skip >= amount_left) {
19611 				amount_left = 0;
19612 			} else {
19613 				amount_left -= skip;
19614 			}
19615 			address = entry->vme_next->vme_start;
19616 			vm_map_unlock(map);
19617 			continue;
19618 		}
19619 
19620 		offset = address - entry->vme_start;
19621 		pmap_offset = address;
19622 
19623 		/*
19624 		 * do we have more to flush than is contained in this
19625 		 * entry ?
19626 		 */
19627 		if (amount_left + entry->vme_start + offset > entry->vme_end) {
19628 			flush_size = entry->vme_end -
19629 			    (entry->vme_start + offset);
19630 		} else {
19631 			flush_size = amount_left;
19632 		}
19633 		amount_left -= flush_size;
19634 		address += flush_size;
19635 
19636 		if (entry->is_sub_map == TRUE) {
19637 			vm_map_t        local_map;
19638 			vm_map_offset_t local_offset;
19639 
19640 			local_map = VME_SUBMAP(entry);
19641 			local_offset = VME_OFFSET(entry);
19642 			vm_map_reference(local_map);
19643 			vm_map_unlock(map);
19644 			if (vm_map_msync(
19645 				    local_map,
19646 				    local_offset,
19647 				    flush_size,
19648 				    sync_flags) == KERN_INVALID_ADDRESS) {
19649 				had_hole = TRUE;
19650 			}
19651 			vm_map_deallocate(local_map);
19652 			continue;
19653 		}
19654 		object = VME_OBJECT(entry);
19655 
19656 		/*
19657 		 * We can't sync this object if the object has not been
19658 		 * created yet
19659 		 */
19660 		if (object == VM_OBJECT_NULL) {
19661 			vm_map_unlock(map);
19662 			continue;
19663 		}
19664 		offset += VME_OFFSET(entry);
19665 
19666 		vm_object_lock(object);
19667 
19668 		if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
19669 			int kill_pages = 0;
19670 			boolean_t reusable_pages = FALSE;
19671 
19672 			if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19673 				/*
19674 				 * This is a destructive operation and so we
19675 				 * err on the side of limiting the range of
19676 				 * the operation.
19677 				 */
19678 				start_offset = vm_object_round_page(offset);
19679 				end_offset = vm_object_trunc_page(offset + flush_size);
19680 
19681 				if (end_offset <= start_offset) {
19682 					vm_object_unlock(object);
19683 					vm_map_unlock(map);
19684 					continue;
19685 				}
19686 
19687 				pmap_offset += start_offset - offset;
19688 			} else {
19689 				start_offset = offset;
19690 				end_offset = offset + flush_size;
19691 			}
19692 
19693 			if (sync_flags & VM_SYNC_KILLPAGES) {
19694 				if (((object->ref_count == 1) ||
19695 				    ((object->copy_strategy !=
19696 				    MEMORY_OBJECT_COPY_SYMMETRIC) &&
19697 				    (object->copy == VM_OBJECT_NULL))) &&
19698 				    (object->shadow == VM_OBJECT_NULL)) {
19699 					if (object->ref_count != 1) {
19700 						vm_page_stats_reusable.free_shared++;
19701 					}
19702 					kill_pages = 1;
19703 				} else {
19704 					kill_pages = -1;
19705 				}
19706 			}
19707 			if (kill_pages != -1) {
19708 				vm_object_deactivate_pages(
19709 					object,
19710 					start_offset,
19711 					(vm_object_size_t) (end_offset - start_offset),
19712 					kill_pages,
19713 					reusable_pages,
19714 					map->pmap,
19715 					pmap_offset);
19716 			}
19717 			vm_object_unlock(object);
19718 			vm_map_unlock(map);
19719 			continue;
19720 		}
19721 		/*
19722 		 * We can't sync this object if there isn't a pager.
19723 		 * Don't bother to sync internal objects, since there can't
19724 		 * be any "permanent" storage for these objects anyway.
19725 		 */
19726 		if ((object->pager == MEMORY_OBJECT_NULL) ||
19727 		    (object->internal) || (object->private)) {
19728 			vm_object_unlock(object);
19729 			vm_map_unlock(map);
19730 			continue;
19731 		}
19732 		/*
19733 		 * keep reference on the object until syncing is done
19734 		 */
19735 		vm_object_reference_locked(object);
19736 		vm_object_unlock(object);
19737 
19738 		vm_map_unlock(map);
19739 
19740 		if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19741 			start_offset = vm_object_trunc_page(offset);
19742 			end_offset = vm_object_round_page(offset + flush_size);
19743 		} else {
19744 			start_offset = offset;
19745 			end_offset = offset + flush_size;
19746 		}
19747 
19748 		do_sync_req = vm_object_sync(object,
19749 		    start_offset,
19750 		    (end_offset - start_offset),
19751 		    sync_flags & VM_SYNC_INVALIDATE,
19752 		    ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
19753 		    (sync_flags & VM_SYNC_ASYNCHRONOUS)),
19754 		    sync_flags & VM_SYNC_SYNCHRONOUS);
19755 
19756 		if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
19757 			/*
19758 			 * clear out the clustering and read-ahead hints
19759 			 */
19760 			vm_object_lock(object);
19761 
19762 			object->pages_created = 0;
19763 			object->pages_used = 0;
19764 			object->sequential = 0;
19765 			object->last_alloc = 0;
19766 
19767 			vm_object_unlock(object);
19768 		}
19769 		vm_object_deallocate(object);
19770 	} /* while */
19771 
19772 	/* for proper msync() behaviour */
19773 	if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS)) {
19774 		return KERN_INVALID_ADDRESS;
19775 	}
19776 
19777 	return KERN_SUCCESS;
19778 }/* vm_msync */
19779 
19780 void
vm_named_entry_associate_vm_object(vm_named_entry_t named_entry,vm_object_t object,vm_object_offset_t offset,vm_object_size_t size,vm_prot_t prot)19781 vm_named_entry_associate_vm_object(
19782 	vm_named_entry_t        named_entry,
19783 	vm_object_t             object,
19784 	vm_object_offset_t      offset,
19785 	vm_object_size_t        size,
19786 	vm_prot_t               prot)
19787 {
19788 	vm_map_copy_t copy;
19789 	vm_map_entry_t copy_entry;
19790 
19791 	assert(!named_entry->is_sub_map);
19792 	assert(!named_entry->is_copy);
19793 	assert(!named_entry->is_object);
19794 	assert(!named_entry->internal);
19795 	assert(named_entry->backing.copy == VM_MAP_COPY_NULL);
19796 
19797 	copy = vm_map_copy_allocate();
19798 	copy->type = VM_MAP_COPY_ENTRY_LIST;
19799 	copy->offset = offset;
19800 	copy->size = size;
19801 	copy->cpy_hdr.page_shift = (uint16_t)PAGE_SHIFT;
19802 	vm_map_store_init(&copy->cpy_hdr);
19803 
19804 	copy_entry = vm_map_copy_entry_create(copy, FALSE);
19805 	copy_entry->protection = prot;
19806 	copy_entry->max_protection = prot;
19807 	copy_entry->use_pmap = TRUE;
19808 	copy_entry->vme_start = VM_MAP_TRUNC_PAGE(offset, PAGE_MASK);
19809 	copy_entry->vme_end = VM_MAP_ROUND_PAGE(offset + size, PAGE_MASK);
19810 	VME_OBJECT_SET(copy_entry, object);
19811 	VME_OFFSET_SET(copy_entry, vm_object_trunc_page(offset));
19812 	vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy), copy_entry);
19813 
19814 	named_entry->backing.copy = copy;
19815 	named_entry->is_object = TRUE;
19816 	if (object->internal) {
19817 		named_entry->internal = TRUE;
19818 	}
19819 
19820 	DEBUG4K_MEMENTRY("named_entry %p copy %p object %p offset 0x%llx size 0x%llx prot 0x%x\n",
19821 	    named_entry, copy, object, offset, size, prot);
19822 }
19823 
19824 vm_object_t
vm_named_entry_to_vm_object(vm_named_entry_t named_entry)19825 vm_named_entry_to_vm_object(
19826 	vm_named_entry_t named_entry)
19827 {
19828 	vm_map_copy_t   copy;
19829 	vm_map_entry_t  copy_entry;
19830 	vm_object_t     object;
19831 
19832 	assert(!named_entry->is_sub_map);
19833 	assert(!named_entry->is_copy);
19834 	assert(named_entry->is_object);
19835 	copy = named_entry->backing.copy;
19836 	assert(copy != VM_MAP_COPY_NULL);
19837 	assert(copy->cpy_hdr.nentries == 1);
19838 	copy_entry = vm_map_copy_first_entry(copy);
19839 	assert(!copy_entry->is_sub_map);
19840 	object = VME_OBJECT(copy_entry);
19841 
19842 	DEBUG4K_MEMENTRY("%p -> %p -> %p [0x%llx 0x%llx 0x%llx 0x%x/0x%x ] -> %p offset 0x%llx size 0x%llx prot 0x%x\n", named_entry, copy, copy_entry, (uint64_t)copy_entry->vme_start, (uint64_t)copy_entry->vme_end, copy_entry->vme_offset, copy_entry->protection, copy_entry->max_protection, object, named_entry->offset, named_entry->size, named_entry->protection);
19843 
19844 	return object;
19845 }
19846 
19847 /*
19848  *	Routine:	convert_port_entry_to_map
19849  *	Purpose:
19850  *		Convert from a port specifying an entry or a task
19851  *		to a map. Doesn't consume the port ref; produces a map ref,
19852  *		which may be null.  Unlike convert_port_to_map, the
19853  *		port may be task or a named entry backed.
19854  *	Conditions:
19855  *		Nothing locked.
19856  */
19857 
19858 vm_map_t
convert_port_entry_to_map(ipc_port_t port)19859 convert_port_entry_to_map(
19860 	ipc_port_t      port)
19861 {
19862 	vm_map_t map = VM_MAP_NULL;
19863 	vm_named_entry_t named_entry;
19864 
19865 	if (!IP_VALID(port)) {
19866 		return VM_MAP_NULL;
19867 	}
19868 
19869 	if (ip_kotype(port) != IKOT_NAMED_ENTRY) {
19870 		return convert_port_to_map(port);
19871 	}
19872 
19873 	named_entry = mach_memory_entry_from_port(port);
19874 
19875 	if ((named_entry->is_sub_map) &&
19876 	    (named_entry->protection & VM_PROT_WRITE)) {
19877 		map = named_entry->backing.map;
19878 		if (map->pmap != PMAP_NULL) {
19879 			if (map->pmap == kernel_pmap) {
19880 				panic("userspace has access "
19881 				    "to a kernel map %p", map);
19882 			}
19883 			pmap_require(map->pmap);
19884 		}
19885 		vm_map_reference(map);
19886 	}
19887 
19888 	return map;
19889 }
19890 
19891 /*
19892  * Export routines to other components for the things we access locally through
19893  * macros.
19894  */
19895 #undef current_map
19896 vm_map_t
current_map(void)19897 current_map(void)
19898 {
19899 	return current_map_fast();
19900 }
19901 
19902 /*
19903  *	vm_map_reference:
19904  *
19905  *	Takes a reference on the specified map.
19906  */
19907 void
vm_map_reference(vm_map_t map)19908 vm_map_reference(
19909 	vm_map_t        map)
19910 {
19911 	if (__probable(map != VM_MAP_NULL)) {
19912 		vm_map_require(map);
19913 		os_ref_retain_raw(&map->map_refcnt, &map_refgrp);
19914 	}
19915 }
19916 
19917 /*
19918  *	vm_map_deallocate:
19919  *
19920  *	Removes a reference from the specified map,
19921  *	destroying it if no references remain.
19922  *	The map should not be locked.
19923  */
19924 void
vm_map_deallocate(vm_map_t map)19925 vm_map_deallocate(
19926 	vm_map_t        map)
19927 {
19928 	if (__probable(map != VM_MAP_NULL)) {
19929 		vm_map_require(map);
19930 		if (os_ref_release_raw(&map->map_refcnt, &map_refgrp) == 0) {
19931 			vm_map_destroy(map, VM_MAP_REMOVE_NO_FLAGS);
19932 		}
19933 	}
19934 }
19935 
19936 void
vm_map_inspect_deallocate(vm_map_inspect_t map)19937 vm_map_inspect_deallocate(
19938 	vm_map_inspect_t      map)
19939 {
19940 	vm_map_deallocate((vm_map_t)map);
19941 }
19942 
19943 void
vm_map_read_deallocate(vm_map_read_t map)19944 vm_map_read_deallocate(
19945 	vm_map_read_t      map)
19946 {
19947 	vm_map_deallocate((vm_map_t)map);
19948 }
19949 
19950 
19951 void
vm_map_disable_NX(vm_map_t map)19952 vm_map_disable_NX(vm_map_t map)
19953 {
19954 	if (map == NULL) {
19955 		return;
19956 	}
19957 	if (map->pmap == NULL) {
19958 		return;
19959 	}
19960 
19961 	pmap_disable_NX(map->pmap);
19962 }
19963 
19964 void
vm_map_disallow_data_exec(vm_map_t map)19965 vm_map_disallow_data_exec(vm_map_t map)
19966 {
19967 	if (map == NULL) {
19968 		return;
19969 	}
19970 
19971 	map->map_disallow_data_exec = TRUE;
19972 }
19973 
19974 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
19975  * more descriptive.
19976  */
19977 void
vm_map_set_32bit(vm_map_t map)19978 vm_map_set_32bit(vm_map_t map)
19979 {
19980 #if defined(__arm__) || defined(__arm64__)
19981 	map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
19982 #else
19983 	map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
19984 #endif
19985 }
19986 
19987 
19988 void
vm_map_set_64bit(vm_map_t map)19989 vm_map_set_64bit(vm_map_t map)
19990 {
19991 #if defined(__arm__) || defined(__arm64__)
19992 	map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
19993 #else
19994 	map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
19995 #endif
19996 }
19997 
19998 /*
19999  * Expand the maximum size of an existing map to the maximum supported.
20000  */
20001 void
vm_map_set_jumbo(vm_map_t map)20002 vm_map_set_jumbo(vm_map_t map)
20003 {
20004 #if defined (__arm64__) && !defined(CONFIG_ARROW)
20005 	vm_map_set_max_addr(map, ~0);
20006 #else /* arm64 */
20007 	(void) map;
20008 #endif
20009 }
20010 
20011 /*
20012  * This map has a JIT entitlement
20013  */
20014 void
vm_map_set_jit_entitled(vm_map_t map)20015 vm_map_set_jit_entitled(vm_map_t map)
20016 {
20017 #if defined (__arm64__)
20018 	pmap_set_jit_entitled(map->pmap);
20019 #else /* arm64 */
20020 	(void) map;
20021 #endif
20022 }
20023 
20024 /*
20025  * Expand the maximum size of an existing map.
20026  */
20027 void
vm_map_set_max_addr(vm_map_t map,vm_map_offset_t new_max_offset)20028 vm_map_set_max_addr(vm_map_t map, vm_map_offset_t new_max_offset)
20029 {
20030 #if defined(__arm64__)
20031 	vm_map_offset_t max_supported_offset = 0;
20032 	vm_map_offset_t old_max_offset = map->max_offset;
20033 	max_supported_offset = pmap_max_offset(vm_map_is_64bit(map), ARM_PMAP_MAX_OFFSET_JUMBO);
20034 
20035 	new_max_offset = trunc_page(new_max_offset);
20036 
20037 	/* The address space cannot be shrunk using this routine. */
20038 	if (old_max_offset >= new_max_offset) {
20039 		return;
20040 	}
20041 
20042 	if (max_supported_offset < new_max_offset) {
20043 		new_max_offset = max_supported_offset;
20044 	}
20045 
20046 	map->max_offset = new_max_offset;
20047 
20048 	if (map->holes_list->prev->vme_end == old_max_offset) {
20049 		/*
20050 		 * There is already a hole at the end of the map; simply make it bigger.
20051 		 */
20052 		map->holes_list->prev->vme_end = map->max_offset;
20053 	} else {
20054 		/*
20055 		 * There is no hole at the end, so we need to create a new hole
20056 		 * for the new empty space we're creating.
20057 		 */
20058 		struct vm_map_links *new_hole = zalloc(vm_map_holes_zone);
20059 		new_hole->start = old_max_offset;
20060 		new_hole->end = map->max_offset;
20061 		new_hole->prev = map->holes_list->prev;
20062 		new_hole->next = (struct vm_map_entry *)map->holes_list;
20063 		map->holes_list->prev->links.next = (struct vm_map_entry *)new_hole;
20064 		map->holes_list->prev = (struct vm_map_entry *)new_hole;
20065 	}
20066 #else
20067 	(void)map;
20068 	(void)new_max_offset;
20069 #endif
20070 }
20071 
20072 vm_map_offset_t
vm_compute_max_offset(boolean_t is64)20073 vm_compute_max_offset(boolean_t is64)
20074 {
20075 #if defined(__arm__) || defined(__arm64__)
20076 	return pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE);
20077 #else
20078 	return is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS;
20079 #endif
20080 }
20081 
20082 void
vm_map_get_max_aslr_slide_section(vm_map_t map __unused,int64_t * max_sections,int64_t * section_size)20083 vm_map_get_max_aslr_slide_section(
20084 	vm_map_t                map __unused,
20085 	int64_t                 *max_sections,
20086 	int64_t                 *section_size)
20087 {
20088 #if defined(__arm64__)
20089 	*max_sections = 3;
20090 	*section_size = ARM_TT_TWIG_SIZE;
20091 #else
20092 	*max_sections = 1;
20093 	*section_size = 0;
20094 #endif
20095 }
20096 
20097 uint64_t
vm_map_get_max_aslr_slide_pages(vm_map_t map)20098 vm_map_get_max_aslr_slide_pages(vm_map_t map)
20099 {
20100 #if defined(__arm64__)
20101 	/* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
20102 	 * limited embedded address space; this is also meant to minimize pmap
20103 	 * memory usage on 16KB page systems.
20104 	 */
20105 	return 1 << (24 - VM_MAP_PAGE_SHIFT(map));
20106 #else
20107 	return 1 << (vm_map_is_64bit(map) ? 16 : 8);
20108 #endif
20109 }
20110 
20111 uint64_t
vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)20112 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
20113 {
20114 #if defined(__arm64__)
20115 	/* We limit the loader slide to 4MB, in order to ensure at least 8 bits
20116 	 * of independent entropy on 16KB page systems.
20117 	 */
20118 	return 1 << (22 - VM_MAP_PAGE_SHIFT(map));
20119 #else
20120 	return 1 << (vm_map_is_64bit(map) ? 16 : 8);
20121 #endif
20122 }
20123 
20124 #ifndef __arm__
20125 boolean_t
vm_map_is_64bit(vm_map_t map)20126 vm_map_is_64bit(
20127 	vm_map_t map)
20128 {
20129 	return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
20130 }
20131 #endif
20132 
20133 boolean_t
vm_map_has_hard_pagezero(vm_map_t map,vm_map_offset_t pagezero_size)20134 vm_map_has_hard_pagezero(
20135 	vm_map_t        map,
20136 	vm_map_offset_t pagezero_size)
20137 {
20138 	/*
20139 	 * XXX FBDP
20140 	 * We should lock the VM map (for read) here but we can get away
20141 	 * with it for now because there can't really be any race condition:
20142 	 * the VM map's min_offset is changed only when the VM map is created
20143 	 * and when the zero page is established (when the binary gets loaded),
20144 	 * and this routine gets called only when the task terminates and the
20145 	 * VM map is being torn down, and when a new map is created via
20146 	 * load_machfile()/execve().
20147 	 */
20148 	return map->min_offset >= pagezero_size;
20149 }
20150 
20151 /*
20152  * Raise a VM map's maximun offset.
20153  */
20154 kern_return_t
vm_map_raise_max_offset(vm_map_t map,vm_map_offset_t new_max_offset)20155 vm_map_raise_max_offset(
20156 	vm_map_t        map,
20157 	vm_map_offset_t new_max_offset)
20158 {
20159 	kern_return_t   ret;
20160 
20161 	vm_map_lock(map);
20162 	ret = KERN_INVALID_ADDRESS;
20163 
20164 	if (new_max_offset >= map->max_offset) {
20165 		if (!vm_map_is_64bit(map)) {
20166 			if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
20167 				map->max_offset = new_max_offset;
20168 				ret = KERN_SUCCESS;
20169 			}
20170 		} else {
20171 			if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
20172 				map->max_offset = new_max_offset;
20173 				ret = KERN_SUCCESS;
20174 			}
20175 		}
20176 	}
20177 
20178 	vm_map_unlock(map);
20179 	return ret;
20180 }
20181 
20182 
20183 /*
20184  * Raise a VM map's minimum offset.
20185  * To strictly enforce "page zero" reservation.
20186  */
20187 kern_return_t
vm_map_raise_min_offset(vm_map_t map,vm_map_offset_t new_min_offset)20188 vm_map_raise_min_offset(
20189 	vm_map_t        map,
20190 	vm_map_offset_t new_min_offset)
20191 {
20192 	vm_map_entry_t  first_entry;
20193 
20194 	new_min_offset = vm_map_round_page(new_min_offset,
20195 	    VM_MAP_PAGE_MASK(map));
20196 
20197 	vm_map_lock(map);
20198 
20199 	if (new_min_offset < map->min_offset) {
20200 		/*
20201 		 * Can't move min_offset backwards, as that would expose
20202 		 * a part of the address space that was previously, and for
20203 		 * possibly good reasons, inaccessible.
20204 		 */
20205 		vm_map_unlock(map);
20206 		return KERN_INVALID_ADDRESS;
20207 	}
20208 	if (new_min_offset >= map->max_offset) {
20209 		/* can't go beyond the end of the address space */
20210 		vm_map_unlock(map);
20211 		return KERN_INVALID_ADDRESS;
20212 	}
20213 
20214 	first_entry = vm_map_first_entry(map);
20215 	if (first_entry != vm_map_to_entry(map) &&
20216 	    first_entry->vme_start < new_min_offset) {
20217 		/*
20218 		 * Some memory was already allocated below the new
20219 		 * minimun offset.  It's too late to change it now...
20220 		 */
20221 		vm_map_unlock(map);
20222 		return KERN_NO_SPACE;
20223 	}
20224 
20225 	map->min_offset = new_min_offset;
20226 
20227 	assert(map->holes_list);
20228 	map->holes_list->start = new_min_offset;
20229 	assert(new_min_offset < map->holes_list->end);
20230 
20231 	vm_map_unlock(map);
20232 
20233 	return KERN_SUCCESS;
20234 }
20235 
20236 /*
20237  * Set the limit on the maximum amount of address space and user wired memory allowed for this map.
20238  * This is basically a copy of the RLIMIT_AS and RLIMIT_MEMLOCK rlimit value maintained by the BSD
20239  * side of the kernel. The limits are checked in the mach VM side, so we keep a copy so we don't
20240  * have to reach over to the BSD data structures.
20241  */
20242 
20243 uint64_t vm_map_set_size_limit_count = 0;
20244 kern_return_t
vm_map_set_size_limit(vm_map_t map,uint64_t new_size_limit)20245 vm_map_set_size_limit(vm_map_t map, uint64_t new_size_limit)
20246 {
20247 	kern_return_t kr;
20248 
20249 	vm_map_lock(map);
20250 	if (new_size_limit < map->size) {
20251 		/* new limit should not be lower than its current size */
20252 		DTRACE_VM2(vm_map_set_size_limit_fail,
20253 		    vm_map_size_t, map->size,
20254 		    uint64_t, new_size_limit);
20255 		kr = KERN_FAILURE;
20256 	} else if (new_size_limit == map->size_limit) {
20257 		/* no change */
20258 		kr = KERN_SUCCESS;
20259 	} else {
20260 		/* set new limit */
20261 		DTRACE_VM2(vm_map_set_size_limit,
20262 		    vm_map_size_t, map->size,
20263 		    uint64_t, new_size_limit);
20264 		if (new_size_limit != RLIM_INFINITY) {
20265 			vm_map_set_size_limit_count++;
20266 		}
20267 		map->size_limit = new_size_limit;
20268 		kr = KERN_SUCCESS;
20269 	}
20270 	vm_map_unlock(map);
20271 	return kr;
20272 }
20273 
20274 uint64_t vm_map_set_data_limit_count = 0;
20275 kern_return_t
vm_map_set_data_limit(vm_map_t map,uint64_t new_data_limit)20276 vm_map_set_data_limit(vm_map_t map, uint64_t new_data_limit)
20277 {
20278 	kern_return_t kr;
20279 
20280 	vm_map_lock(map);
20281 	if (new_data_limit < map->size) {
20282 		/* new limit should not be lower than its current size */
20283 		DTRACE_VM2(vm_map_set_data_limit_fail,
20284 		    vm_map_size_t, map->size,
20285 		    uint64_t, new_data_limit);
20286 		kr = KERN_FAILURE;
20287 	} else if (new_data_limit == map->data_limit) {
20288 		/* no change */
20289 		kr = KERN_SUCCESS;
20290 	} else {
20291 		/* set new limit */
20292 		DTRACE_VM2(vm_map_set_data_limit,
20293 		    vm_map_size_t, map->size,
20294 		    uint64_t, new_data_limit);
20295 		if (new_data_limit != RLIM_INFINITY) {
20296 			vm_map_set_data_limit_count++;
20297 		}
20298 		map->data_limit = new_data_limit;
20299 		kr = KERN_SUCCESS;
20300 	}
20301 	vm_map_unlock(map);
20302 	return kr;
20303 }
20304 
20305 void
vm_map_set_user_wire_limit(vm_map_t map,vm_size_t limit)20306 vm_map_set_user_wire_limit(vm_map_t     map,
20307     vm_size_t    limit)
20308 {
20309 	vm_map_lock(map);
20310 	map->user_wire_limit = limit;
20311 	vm_map_unlock(map);
20312 }
20313 
20314 
20315 void
vm_map_switch_protect(vm_map_t map,boolean_t val)20316 vm_map_switch_protect(vm_map_t     map,
20317     boolean_t    val)
20318 {
20319 	vm_map_lock(map);
20320 	map->switch_protect = val;
20321 	vm_map_unlock(map);
20322 }
20323 
20324 extern int cs_process_enforcement_enable;
20325 boolean_t
vm_map_cs_enforcement(vm_map_t map)20326 vm_map_cs_enforcement(
20327 	vm_map_t map)
20328 {
20329 	if (cs_process_enforcement_enable) {
20330 		return TRUE;
20331 	}
20332 	return map->cs_enforcement;
20333 }
20334 
20335 kern_return_t
vm_map_cs_wx_enable(vm_map_t map)20336 vm_map_cs_wx_enable(
20337 	vm_map_t map)
20338 {
20339 	return pmap_cs_allow_invalid(vm_map_pmap(map));
20340 }
20341 
20342 void
vm_map_cs_debugged_set(vm_map_t map,boolean_t val)20343 vm_map_cs_debugged_set(
20344 	vm_map_t map,
20345 	boolean_t val)
20346 {
20347 	vm_map_lock(map);
20348 	map->cs_debugged = val;
20349 	vm_map_unlock(map);
20350 }
20351 
20352 void
vm_map_cs_enforcement_set(vm_map_t map,boolean_t val)20353 vm_map_cs_enforcement_set(
20354 	vm_map_t map,
20355 	boolean_t val)
20356 {
20357 	vm_map_lock(map);
20358 	map->cs_enforcement = val;
20359 	pmap_set_vm_map_cs_enforced(map->pmap, val);
20360 	vm_map_unlock(map);
20361 }
20362 
20363 /*
20364  * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
20365  * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
20366  * bump both counters.
20367  */
20368 void
vm_map_iokit_mapped_region(vm_map_t map,vm_size_t bytes)20369 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
20370 {
20371 	pmap_t pmap = vm_map_pmap(map);
20372 
20373 	ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
20374 	ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
20375 }
20376 
20377 void
vm_map_iokit_unmapped_region(vm_map_t map,vm_size_t bytes)20378 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
20379 {
20380 	pmap_t pmap = vm_map_pmap(map);
20381 
20382 	ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
20383 	ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
20384 }
20385 
20386 /* Add (generate) code signature for memory range */
20387 #if CONFIG_DYNAMIC_CODE_SIGNING
20388 kern_return_t
vm_map_sign(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)20389 vm_map_sign(vm_map_t map,
20390     vm_map_offset_t start,
20391     vm_map_offset_t end)
20392 {
20393 	vm_map_entry_t entry;
20394 	vm_page_t m;
20395 	vm_object_t object;
20396 
20397 	/*
20398 	 * Vet all the input parameters and current type and state of the
20399 	 * underlaying object.  Return with an error if anything is amiss.
20400 	 */
20401 	if (map == VM_MAP_NULL) {
20402 		return KERN_INVALID_ARGUMENT;
20403 	}
20404 
20405 	vm_map_lock_read(map);
20406 
20407 	if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
20408 		/*
20409 		 * Must pass a valid non-submap address.
20410 		 */
20411 		vm_map_unlock_read(map);
20412 		return KERN_INVALID_ADDRESS;
20413 	}
20414 
20415 	if ((entry->vme_start > start) || (entry->vme_end < end)) {
20416 		/*
20417 		 * Map entry doesn't cover the requested range. Not handling
20418 		 * this situation currently.
20419 		 */
20420 		vm_map_unlock_read(map);
20421 		return KERN_INVALID_ARGUMENT;
20422 	}
20423 
20424 	object = VME_OBJECT(entry);
20425 	if (object == VM_OBJECT_NULL) {
20426 		/*
20427 		 * Object must already be present or we can't sign.
20428 		 */
20429 		vm_map_unlock_read(map);
20430 		return KERN_INVALID_ARGUMENT;
20431 	}
20432 
20433 	vm_object_lock(object);
20434 	vm_map_unlock_read(map);
20435 
20436 	while (start < end) {
20437 		uint32_t refmod;
20438 
20439 		m = vm_page_lookup(object,
20440 		    start - entry->vme_start + VME_OFFSET(entry));
20441 		if (m == VM_PAGE_NULL) {
20442 			/* shoud we try to fault a page here? we can probably
20443 			 * demand it exists and is locked for this request */
20444 			vm_object_unlock(object);
20445 			return KERN_FAILURE;
20446 		}
20447 		/* deal with special page status */
20448 		if (m->vmp_busy ||
20449 		    (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_private || m->vmp_absent))) {
20450 			vm_object_unlock(object);
20451 			return KERN_FAILURE;
20452 		}
20453 
20454 		/* Page is OK... now "validate" it */
20455 		/* This is the place where we'll call out to create a code
20456 		 * directory, later */
20457 		/* XXX TODO4K: deal with 4k subpages individually? */
20458 		m->vmp_cs_validated = VMP_CS_ALL_TRUE;
20459 
20460 		/* The page is now "clean" for codesigning purposes. That means
20461 		 * we don't consider it as modified (wpmapped) anymore. But
20462 		 * we'll disconnect the page so we note any future modification
20463 		 * attempts. */
20464 		m->vmp_wpmapped = FALSE;
20465 		refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
20466 
20467 		/* Pull the dirty status from the pmap, since we cleared the
20468 		 * wpmapped bit */
20469 		if ((refmod & VM_MEM_MODIFIED) && !m->vmp_dirty) {
20470 			SET_PAGE_DIRTY(m, FALSE);
20471 		}
20472 
20473 		/* On to the next page */
20474 		start += PAGE_SIZE;
20475 	}
20476 	vm_object_unlock(object);
20477 
20478 	return KERN_SUCCESS;
20479 }
20480 #endif
20481 
20482 kern_return_t
vm_map_partial_reap(vm_map_t map,unsigned int * reclaimed_resident,unsigned int * reclaimed_compressed)20483 vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
20484 {
20485 	vm_map_entry_t  entry = VM_MAP_ENTRY_NULL;
20486 	vm_map_entry_t next_entry;
20487 	kern_return_t   kr = KERN_SUCCESS;
20488 	vm_map_t        zap_map;
20489 
20490 	vm_map_lock(map);
20491 
20492 	/*
20493 	 * We use a "zap_map" to avoid having to unlock
20494 	 * the "map" in vm_map_delete().
20495 	 */
20496 	zap_map = vm_map_create_options(PMAP_NULL,
20497 	    map->min_offset,
20498 	    map->max_offset,
20499 	    VM_MAP_CREATE_ZAP_OPTIONS(map));
20500 	vm_map_set_page_shift(zap_map,
20501 	    VM_MAP_PAGE_SHIFT(map));
20502 
20503 	for (entry = vm_map_first_entry(map);
20504 	    entry != vm_map_to_entry(map);
20505 	    entry = next_entry) {
20506 		next_entry = entry->vme_next;
20507 
20508 		if (VME_OBJECT(entry) &&
20509 		    !entry->is_sub_map &&
20510 		    (VME_OBJECT(entry)->internal == TRUE) &&
20511 		    (VME_OBJECT(entry)->ref_count == 1)) {
20512 			*reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
20513 			*reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
20514 
20515 			(void)vm_map_delete(map,
20516 			    entry->vme_start,
20517 			    entry->vme_end,
20518 			    VM_MAP_REMOVE_SAVE_ENTRIES,
20519 			    zap_map);
20520 		}
20521 	}
20522 
20523 	vm_map_unlock(map);
20524 
20525 	/*
20526 	 * Get rid of the "zap_maps" and all the map entries that
20527 	 * they may still contain.
20528 	 */
20529 	if (zap_map != VM_MAP_NULL) {
20530 		vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
20531 		zap_map = VM_MAP_NULL;
20532 	}
20533 
20534 	return kr;
20535 }
20536 
20537 
20538 #if DEVELOPMENT || DEBUG
20539 
20540 int
vm_map_disconnect_page_mappings(vm_map_t map,boolean_t do_unnest)20541 vm_map_disconnect_page_mappings(
20542 	vm_map_t map,
20543 	boolean_t do_unnest)
20544 {
20545 	vm_map_entry_t entry;
20546 	ledger_amount_t byte_count = 0;
20547 
20548 	if (do_unnest == TRUE) {
20549 #ifndef NO_NESTED_PMAP
20550 		vm_map_lock(map);
20551 
20552 		for (entry = vm_map_first_entry(map);
20553 		    entry != vm_map_to_entry(map);
20554 		    entry = entry->vme_next) {
20555 			if (entry->is_sub_map && entry->use_pmap) {
20556 				/*
20557 				 * Make sure the range between the start of this entry and
20558 				 * the end of this entry is no longer nested, so that
20559 				 * we will only remove mappings from the pmap in use by this
20560 				 * this task
20561 				 */
20562 				vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
20563 			}
20564 		}
20565 		vm_map_unlock(map);
20566 #endif
20567 	}
20568 	vm_map_lock_read(map);
20569 
20570 	ledger_get_balance(map->pmap->ledger, task_ledgers.phys_mem, &byte_count);
20571 
20572 	for (entry = vm_map_first_entry(map);
20573 	    entry != vm_map_to_entry(map);
20574 	    entry = entry->vme_next) {
20575 		if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
20576 		    (VME_OBJECT(entry)->phys_contiguous))) {
20577 			continue;
20578 		}
20579 		if (entry->is_sub_map) {
20580 			assert(!entry->use_pmap);
20581 		}
20582 
20583 		pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
20584 	}
20585 	vm_map_unlock_read(map);
20586 
20587 	return (int) (byte_count / VM_MAP_PAGE_SIZE(map));
20588 }
20589 
20590 kern_return_t
vm_map_inject_error(vm_map_t map,vm_map_offset_t vaddr)20591 vm_map_inject_error(vm_map_t map, vm_map_offset_t vaddr)
20592 {
20593 	vm_object_t object = NULL;
20594 	vm_object_offset_t offset;
20595 	vm_prot_t prot;
20596 	boolean_t wired;
20597 	vm_map_version_t version;
20598 	vm_map_t real_map;
20599 	int result = KERN_FAILURE;
20600 
20601 	vaddr = vm_map_trunc_page(vaddr, PAGE_MASK);
20602 	vm_map_lock(map);
20603 
20604 	result = vm_map_lookup_locked(&map, vaddr, VM_PROT_READ,
20605 	    OBJECT_LOCK_EXCLUSIVE, &version, &object, &offset, &prot, &wired,
20606 	    NULL, &real_map, NULL);
20607 	if (object == NULL) {
20608 		result = KERN_MEMORY_ERROR;
20609 	} else if (object->pager) {
20610 		result = vm_compressor_pager_inject_error(object->pager,
20611 		    offset);
20612 	} else {
20613 		result = KERN_MEMORY_PRESENT;
20614 	}
20615 
20616 	if (object != NULL) {
20617 		vm_object_unlock(object);
20618 	}
20619 
20620 	if (real_map != map) {
20621 		vm_map_unlock(real_map);
20622 	}
20623 	vm_map_unlock(map);
20624 
20625 	return result;
20626 }
20627 
20628 #endif
20629 
20630 
20631 #if CONFIG_FREEZE
20632 
20633 
20634 extern struct freezer_context freezer_context_global;
20635 AbsoluteTime c_freezer_last_yield_ts = 0;
20636 
20637 extern unsigned int memorystatus_freeze_private_shared_pages_ratio;
20638 extern unsigned int memorystatus_freeze_shared_mb_per_process_max;
20639 
20640 kern_return_t
vm_map_freeze(task_t task,unsigned int * purgeable_count,unsigned int * wired_count,unsigned int * clean_count,unsigned int * dirty_count,unsigned int dirty_budget,unsigned int * shared_count,int * freezer_error_code,boolean_t eval_only)20641 vm_map_freeze(
20642 	task_t       task,
20643 	unsigned int *purgeable_count,
20644 	unsigned int *wired_count,
20645 	unsigned int *clean_count,
20646 	unsigned int *dirty_count,
20647 	unsigned int dirty_budget,
20648 	unsigned int *shared_count,
20649 	int          *freezer_error_code,
20650 	boolean_t    eval_only)
20651 {
20652 	vm_map_entry_t  entry2 = VM_MAP_ENTRY_NULL;
20653 	kern_return_t   kr = KERN_SUCCESS;
20654 	boolean_t       evaluation_phase = TRUE;
20655 	vm_object_t     cur_shared_object = NULL;
20656 	int             cur_shared_obj_ref_cnt = 0;
20657 	unsigned int    dirty_private_count = 0, dirty_shared_count = 0, obj_pages_snapshot = 0;
20658 
20659 	*purgeable_count = *wired_count = *clean_count = *dirty_count = *shared_count = 0;
20660 
20661 	/*
20662 	 * We need the exclusive lock here so that we can
20663 	 * block any page faults or lookups while we are
20664 	 * in the middle of freezing this vm map.
20665 	 */
20666 	vm_map_t map = task->map;
20667 
20668 	vm_map_lock(map);
20669 
20670 	assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
20671 
20672 	if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
20673 		if (vm_compressor_low_on_space()) {
20674 			*freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
20675 		}
20676 
20677 		if (vm_swap_low_on_space()) {
20678 			*freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
20679 		}
20680 
20681 		kr = KERN_NO_SPACE;
20682 		goto done;
20683 	}
20684 
20685 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
20686 		/*
20687 		 * In-memory compressor backing the freezer. No disk.
20688 		 * So no need to do the evaluation phase.
20689 		 */
20690 		evaluation_phase = FALSE;
20691 
20692 		if (eval_only == TRUE) {
20693 			/*
20694 			 * We don't support 'eval_only' mode
20695 			 * in this non-swap config.
20696 			 */
20697 			*freezer_error_code = FREEZER_ERROR_GENERIC;
20698 			kr = KERN_INVALID_ARGUMENT;
20699 			goto done;
20700 		}
20701 
20702 		freezer_context_global.freezer_ctx_uncompressed_pages = 0;
20703 		clock_get_uptime(&c_freezer_last_yield_ts);
20704 	}
20705 again:
20706 
20707 	for (entry2 = vm_map_first_entry(map);
20708 	    entry2 != vm_map_to_entry(map);
20709 	    entry2 = entry2->vme_next) {
20710 		vm_object_t     src_object = VME_OBJECT(entry2);
20711 
20712 		if (src_object &&
20713 		    !entry2->is_sub_map &&
20714 		    !src_object->phys_contiguous) {
20715 			/* If eligible, scan the entry, moving eligible pages over to our parent object */
20716 
20717 			if (src_object->internal == TRUE) {
20718 				if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
20719 					/*
20720 					 * We skip purgeable objects during evaluation phase only.
20721 					 * If we decide to freeze this process, we'll explicitly
20722 					 * purge these objects before we go around again with
20723 					 * 'evaluation_phase' set to FALSE.
20724 					 */
20725 
20726 					if ((src_object->purgable == VM_PURGABLE_EMPTY) || (src_object->purgable == VM_PURGABLE_VOLATILE)) {
20727 						/*
20728 						 * We want to purge objects that may not belong to this task but are mapped
20729 						 * in this task alone. Since we already purged this task's purgeable memory
20730 						 * at the end of a successful evaluation phase, we want to avoid doing no-op calls
20731 						 * on this task's purgeable objects. Hence the check for only volatile objects.
20732 						 */
20733 						if (evaluation_phase == FALSE &&
20734 						    (src_object->purgable == VM_PURGABLE_VOLATILE) &&
20735 						    (src_object->ref_count == 1)) {
20736 							vm_object_lock(src_object);
20737 							vm_object_purge(src_object, 0);
20738 							vm_object_unlock(src_object);
20739 						}
20740 						continue;
20741 					}
20742 
20743 					/*
20744 					 * Pages belonging to this object could be swapped to disk.
20745 					 * Make sure it's not a shared object because we could end
20746 					 * up just bringing it back in again.
20747 					 *
20748 					 * We try to optimize somewhat by checking for objects that are mapped
20749 					 * more than once within our own map. But we don't do full searches,
20750 					 * we just look at the entries following our current entry.
20751 					 */
20752 
20753 					if (src_object->ref_count > 1) {
20754 						if (src_object != cur_shared_object) {
20755 							obj_pages_snapshot = (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
20756 							dirty_shared_count += obj_pages_snapshot;
20757 
20758 							cur_shared_object = src_object;
20759 							cur_shared_obj_ref_cnt = 1;
20760 							continue;
20761 						} else {
20762 							cur_shared_obj_ref_cnt++;
20763 							if (src_object->ref_count == cur_shared_obj_ref_cnt) {
20764 								/*
20765 								 * Fall through to below and treat this object as private.
20766 								 * So deduct its pages from our shared total and add it to the
20767 								 * private total.
20768 								 */
20769 
20770 								dirty_shared_count -= obj_pages_snapshot;
20771 								dirty_private_count += obj_pages_snapshot;
20772 							} else {
20773 								continue;
20774 							}
20775 						}
20776 					}
20777 
20778 
20779 					if (src_object->ref_count == 1) {
20780 						dirty_private_count += (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
20781 					}
20782 
20783 					if (evaluation_phase == TRUE) {
20784 						continue;
20785 					}
20786 				}
20787 
20788 				uint32_t paged_out_count = vm_object_compressed_freezer_pageout(src_object, dirty_budget);
20789 				*wired_count += src_object->wired_page_count;
20790 
20791 				if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
20792 					if (vm_compressor_low_on_space()) {
20793 						*freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
20794 					}
20795 
20796 					if (vm_swap_low_on_space()) {
20797 						*freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
20798 					}
20799 
20800 					kr = KERN_NO_SPACE;
20801 					break;
20802 				}
20803 				if (paged_out_count >= dirty_budget) {
20804 					break;
20805 				}
20806 				dirty_budget -= paged_out_count;
20807 			}
20808 		}
20809 	}
20810 
20811 	*shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
20812 	if (evaluation_phase) {
20813 		unsigned int shared_pages_threshold = (memorystatus_freeze_shared_mb_per_process_max * 1024 * 1024ULL) / PAGE_SIZE_64;
20814 
20815 		if (dirty_shared_count > shared_pages_threshold) {
20816 			*freezer_error_code = FREEZER_ERROR_EXCESS_SHARED_MEMORY;
20817 			kr = KERN_FAILURE;
20818 			goto done;
20819 		}
20820 
20821 		if (dirty_shared_count &&
20822 		    ((dirty_private_count / dirty_shared_count) < memorystatus_freeze_private_shared_pages_ratio)) {
20823 			*freezer_error_code = FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO;
20824 			kr = KERN_FAILURE;
20825 			goto done;
20826 		}
20827 
20828 		evaluation_phase = FALSE;
20829 		dirty_shared_count = dirty_private_count = 0;
20830 
20831 		freezer_context_global.freezer_ctx_uncompressed_pages = 0;
20832 		clock_get_uptime(&c_freezer_last_yield_ts);
20833 
20834 		if (eval_only) {
20835 			kr = KERN_SUCCESS;
20836 			goto done;
20837 		}
20838 
20839 		vm_purgeable_purge_task_owned(task);
20840 
20841 		goto again;
20842 	} else {
20843 		kr = KERN_SUCCESS;
20844 	}
20845 
20846 done:
20847 	vm_map_unlock(map);
20848 
20849 	if ((eval_only == FALSE) && (kr == KERN_SUCCESS)) {
20850 		vm_object_compressed_freezer_done();
20851 	}
20852 	return kr;
20853 }
20854 
20855 #endif
20856 
20857 /*
20858  * vm_map_entry_should_cow_for_true_share:
20859  *
20860  * Determines if the map entry should be clipped and setup for copy-on-write
20861  * to avoid applying "true_share" to a large VM object when only a subset is
20862  * targeted.
20863  *
20864  * For now, we target only the map entries created for the Objective C
20865  * Garbage Collector, which initially have the following properties:
20866  *	- alias == VM_MEMORY_MALLOC
20867  *      - wired_count == 0
20868  *      - !needs_copy
20869  * and a VM object with:
20870  *      - internal
20871  *      - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
20872  *      - !true_share
20873  *      - vo_size == ANON_CHUNK_SIZE
20874  *
20875  * Only non-kernel map entries.
20876  */
20877 boolean_t
vm_map_entry_should_cow_for_true_share(vm_map_entry_t entry)20878 vm_map_entry_should_cow_for_true_share(
20879 	vm_map_entry_t  entry)
20880 {
20881 	vm_object_t     object;
20882 
20883 	if (entry->is_sub_map) {
20884 		/* entry does not point at a VM object */
20885 		return FALSE;
20886 	}
20887 
20888 	if (entry->needs_copy) {
20889 		/* already set for copy_on_write: done! */
20890 		return FALSE;
20891 	}
20892 
20893 	if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
20894 	    VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
20895 		/* not a malloc heap or Obj-C Garbage Collector heap */
20896 		return FALSE;
20897 	}
20898 
20899 	if (entry->wired_count) {
20900 		/* wired: can't change the map entry... */
20901 		vm_counters.should_cow_but_wired++;
20902 		return FALSE;
20903 	}
20904 
20905 	object = VME_OBJECT(entry);
20906 
20907 	if (object == VM_OBJECT_NULL) {
20908 		/* no object yet... */
20909 		return FALSE;
20910 	}
20911 
20912 	if (!object->internal) {
20913 		/* not an internal object */
20914 		return FALSE;
20915 	}
20916 
20917 	if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
20918 		/* not the default copy strategy */
20919 		return FALSE;
20920 	}
20921 
20922 	if (object->true_share) {
20923 		/* already true_share: too late to avoid it */
20924 		return FALSE;
20925 	}
20926 
20927 	if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
20928 	    object->vo_size != ANON_CHUNK_SIZE) {
20929 		/* ... not an object created for the ObjC Garbage Collector */
20930 		return FALSE;
20931 	}
20932 
20933 	if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
20934 	    object->vo_size != 2048 * 4096) {
20935 		/* ... not a "MALLOC_SMALL" heap */
20936 		return FALSE;
20937 	}
20938 
20939 	/*
20940 	 * All the criteria match: we have a large object being targeted for "true_share".
20941 	 * To limit the adverse side-effects linked with "true_share", tell the caller to
20942 	 * try and avoid setting up the entire object for "true_share" by clipping the
20943 	 * targeted range and setting it up for copy-on-write.
20944 	 */
20945 	return TRUE;
20946 }
20947 
20948 vm_map_offset_t
vm_map_round_page_mask(vm_map_offset_t offset,vm_map_offset_t mask)20949 vm_map_round_page_mask(
20950 	vm_map_offset_t offset,
20951 	vm_map_offset_t mask)
20952 {
20953 	return VM_MAP_ROUND_PAGE(offset, mask);
20954 }
20955 
20956 vm_map_offset_t
vm_map_trunc_page_mask(vm_map_offset_t offset,vm_map_offset_t mask)20957 vm_map_trunc_page_mask(
20958 	vm_map_offset_t offset,
20959 	vm_map_offset_t mask)
20960 {
20961 	return VM_MAP_TRUNC_PAGE(offset, mask);
20962 }
20963 
20964 boolean_t
vm_map_page_aligned(vm_map_offset_t offset,vm_map_offset_t mask)20965 vm_map_page_aligned(
20966 	vm_map_offset_t offset,
20967 	vm_map_offset_t mask)
20968 {
20969 	return ((offset) & mask) == 0;
20970 }
20971 
20972 int
vm_map_page_shift(vm_map_t map)20973 vm_map_page_shift(
20974 	vm_map_t map)
20975 {
20976 	return VM_MAP_PAGE_SHIFT(map);
20977 }
20978 
20979 int
vm_map_page_size(vm_map_t map)20980 vm_map_page_size(
20981 	vm_map_t map)
20982 {
20983 	return VM_MAP_PAGE_SIZE(map);
20984 }
20985 
20986 vm_map_offset_t
vm_map_page_mask(vm_map_t map)20987 vm_map_page_mask(
20988 	vm_map_t map)
20989 {
20990 	return VM_MAP_PAGE_MASK(map);
20991 }
20992 
20993 kern_return_t
vm_map_set_page_shift(vm_map_t map,int pageshift)20994 vm_map_set_page_shift(
20995 	vm_map_t        map,
20996 	int             pageshift)
20997 {
20998 	if (map->hdr.nentries != 0) {
20999 		/* too late to change page size */
21000 		return KERN_FAILURE;
21001 	}
21002 
21003 	map->hdr.page_shift = (uint16_t)pageshift;
21004 
21005 	return KERN_SUCCESS;
21006 }
21007 
21008 kern_return_t
vm_map_query_volatile(vm_map_t map,mach_vm_size_t * volatile_virtual_size_p,mach_vm_size_t * volatile_resident_size_p,mach_vm_size_t * volatile_compressed_size_p,mach_vm_size_t * volatile_pmap_size_p,mach_vm_size_t * volatile_compressed_pmap_size_p)21009 vm_map_query_volatile(
21010 	vm_map_t        map,
21011 	mach_vm_size_t  *volatile_virtual_size_p,
21012 	mach_vm_size_t  *volatile_resident_size_p,
21013 	mach_vm_size_t  *volatile_compressed_size_p,
21014 	mach_vm_size_t  *volatile_pmap_size_p,
21015 	mach_vm_size_t  *volatile_compressed_pmap_size_p)
21016 {
21017 	mach_vm_size_t  volatile_virtual_size;
21018 	mach_vm_size_t  volatile_resident_count;
21019 	mach_vm_size_t  volatile_compressed_count;
21020 	mach_vm_size_t  volatile_pmap_count;
21021 	mach_vm_size_t  volatile_compressed_pmap_count;
21022 	mach_vm_size_t  resident_count;
21023 	vm_map_entry_t  entry;
21024 	vm_object_t     object;
21025 
21026 	/* map should be locked by caller */
21027 
21028 	volatile_virtual_size = 0;
21029 	volatile_resident_count = 0;
21030 	volatile_compressed_count = 0;
21031 	volatile_pmap_count = 0;
21032 	volatile_compressed_pmap_count = 0;
21033 
21034 	for (entry = vm_map_first_entry(map);
21035 	    entry != vm_map_to_entry(map);
21036 	    entry = entry->vme_next) {
21037 		mach_vm_size_t  pmap_resident_bytes, pmap_compressed_bytes;
21038 
21039 		if (entry->is_sub_map) {
21040 			continue;
21041 		}
21042 		if (!(entry->protection & VM_PROT_WRITE)) {
21043 			continue;
21044 		}
21045 		object = VME_OBJECT(entry);
21046 		if (object == VM_OBJECT_NULL) {
21047 			continue;
21048 		}
21049 		if (object->purgable != VM_PURGABLE_VOLATILE &&
21050 		    object->purgable != VM_PURGABLE_EMPTY) {
21051 			continue;
21052 		}
21053 		if (VME_OFFSET(entry)) {
21054 			/*
21055 			 * If the map entry has been split and the object now
21056 			 * appears several times in the VM map, we don't want
21057 			 * to count the object's resident_page_count more than
21058 			 * once.  We count it only for the first one, starting
21059 			 * at offset 0 and ignore the other VM map entries.
21060 			 */
21061 			continue;
21062 		}
21063 		resident_count = object->resident_page_count;
21064 		if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
21065 			resident_count = 0;
21066 		} else {
21067 			resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
21068 		}
21069 
21070 		volatile_virtual_size += entry->vme_end - entry->vme_start;
21071 		volatile_resident_count += resident_count;
21072 		if (object->pager) {
21073 			volatile_compressed_count +=
21074 			    vm_compressor_pager_get_count(object->pager);
21075 		}
21076 		pmap_compressed_bytes = 0;
21077 		pmap_resident_bytes =
21078 		    pmap_query_resident(map->pmap,
21079 		    entry->vme_start,
21080 		    entry->vme_end,
21081 		    &pmap_compressed_bytes);
21082 		volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
21083 		volatile_compressed_pmap_count += (pmap_compressed_bytes
21084 		    / PAGE_SIZE);
21085 	}
21086 
21087 	/* map is still locked on return */
21088 
21089 	*volatile_virtual_size_p = volatile_virtual_size;
21090 	*volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
21091 	*volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
21092 	*volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
21093 	*volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
21094 
21095 	return KERN_SUCCESS;
21096 }
21097 
21098 void
vm_map_sizes(vm_map_t map,vm_map_size_t * psize,vm_map_size_t * pfree,vm_map_size_t * plargest_free)21099 vm_map_sizes(vm_map_t map,
21100     vm_map_size_t * psize,
21101     vm_map_size_t * pfree,
21102     vm_map_size_t * plargest_free)
21103 {
21104 	vm_map_entry_t  entry;
21105 	vm_map_offset_t prev;
21106 	vm_map_size_t   free, total_free, largest_free;
21107 	boolean_t       end;
21108 
21109 	if (!map) {
21110 		*psize = *pfree = *plargest_free = 0;
21111 		return;
21112 	}
21113 	total_free = largest_free = 0;
21114 
21115 	vm_map_lock_read(map);
21116 	if (psize) {
21117 		*psize = map->max_offset - map->min_offset;
21118 	}
21119 
21120 	prev = map->min_offset;
21121 	for (entry = vm_map_first_entry(map);; entry = entry->vme_next) {
21122 		end = (entry == vm_map_to_entry(map));
21123 
21124 		if (end) {
21125 			free = entry->vme_end   - prev;
21126 		} else {
21127 			free = entry->vme_start - prev;
21128 		}
21129 
21130 		total_free += free;
21131 		if (free > largest_free) {
21132 			largest_free = free;
21133 		}
21134 
21135 		if (end) {
21136 			break;
21137 		}
21138 		prev = entry->vme_end;
21139 	}
21140 	vm_map_unlock_read(map);
21141 	if (pfree) {
21142 		*pfree = total_free;
21143 	}
21144 	if (plargest_free) {
21145 		*plargest_free = largest_free;
21146 	}
21147 }
21148 
21149 #if VM_SCAN_FOR_SHADOW_CHAIN
21150 int vm_map_shadow_max(vm_map_t map);
21151 int
vm_map_shadow_max(vm_map_t map)21152 vm_map_shadow_max(
21153 	vm_map_t map)
21154 {
21155 	int             shadows, shadows_max;
21156 	vm_map_entry_t  entry;
21157 	vm_object_t     object, next_object;
21158 
21159 	if (map == NULL) {
21160 		return 0;
21161 	}
21162 
21163 	shadows_max = 0;
21164 
21165 	vm_map_lock_read(map);
21166 
21167 	for (entry = vm_map_first_entry(map);
21168 	    entry != vm_map_to_entry(map);
21169 	    entry = entry->vme_next) {
21170 		if (entry->is_sub_map) {
21171 			continue;
21172 		}
21173 		object = VME_OBJECT(entry);
21174 		if (object == NULL) {
21175 			continue;
21176 		}
21177 		vm_object_lock_shared(object);
21178 		for (shadows = 0;
21179 		    object->shadow != NULL;
21180 		    shadows++, object = next_object) {
21181 			next_object = object->shadow;
21182 			vm_object_lock_shared(next_object);
21183 			vm_object_unlock(object);
21184 		}
21185 		vm_object_unlock(object);
21186 		if (shadows > shadows_max) {
21187 			shadows_max = shadows;
21188 		}
21189 	}
21190 
21191 	vm_map_unlock_read(map);
21192 
21193 	return shadows_max;
21194 }
21195 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
21196 
21197 void
vm_commit_pagezero_status(vm_map_t lmap)21198 vm_commit_pagezero_status(vm_map_t lmap)
21199 {
21200 	pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
21201 }
21202 
21203 #if XNU_TARGET_OS_OSX
21204 void
vm_map_set_high_start(vm_map_t map,vm_map_offset_t high_start)21205 vm_map_set_high_start(
21206 	vm_map_t        map,
21207 	vm_map_offset_t high_start)
21208 {
21209 	map->vmmap_high_start = high_start;
21210 }
21211 #endif /* XNU_TARGET_OS_OSX */
21212 
21213 
21214 /*
21215  * FORKED CORPSE FOOTPRINT
21216  *
21217  * A forked corpse gets a copy of the original VM map but its pmap is mostly
21218  * empty since it never ran and never got to fault in any pages.
21219  * Collecting footprint info (via "sysctl vm.self_region_footprint") for
21220  * a forked corpse would therefore return very little information.
21221  *
21222  * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
21223  * to vm_map_fork() to collect footprint information from the original VM map
21224  * and its pmap, and store it in the forked corpse's VM map.  That information
21225  * is stored in place of the VM map's "hole list" since we'll never need to
21226  * lookup for holes in the corpse's map.
21227  *
21228  * The corpse's footprint info looks like this:
21229  *
21230  * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
21231  * as follows:
21232  *                     +---------------------------------------+
21233  *            header-> | cf_size                               |
21234  *                     +-------------------+-------------------+
21235  *                     | cf_last_region    | cf_last_zeroes    |
21236  *                     +-------------------+-------------------+
21237  *           region1-> | cfr_vaddr                             |
21238  *                     +-------------------+-------------------+
21239  *                     | cfr_num_pages     | d0 | d1 | d2 | d3 |
21240  *                     +---------------------------------------+
21241  *                     | d4 | d5 | ...                         |
21242  *                     +---------------------------------------+
21243  *                     | ...                                   |
21244  *                     +-------------------+-------------------+
21245  *                     | dy | dz | na | na | cfr_vaddr...      | <-region2
21246  *                     +-------------------+-------------------+
21247  *                     | cfr_vaddr (ctd)   | cfr_num_pages     |
21248  *                     +---------------------------------------+
21249  *                     | d0 | d1 ...                           |
21250  *                     +---------------------------------------+
21251  *                       ...
21252  *                     +---------------------------------------+
21253  *       last region-> | cfr_vaddr                             |
21254  *                     +---------------------------------------+
21255  *                     + cfr_num_pages     | d0 | d1 | d2 | d3 |
21256  *                     +---------------------------------------+
21257  *                       ...
21258  *                     +---------------------------------------+
21259  *                     | dx | dy | dz | na | na | na | na | na |
21260  *                     +---------------------------------------+
21261  *
21262  * where:
21263  *      cf_size:	total size of the buffer (rounded to page size)
21264  *      cf_last_region:	offset in the buffer of the last "region" sub-header
21265  *	cf_last_zeroes: number of trailing "zero" dispositions at the end
21266  *			of last region
21267  *	cfr_vaddr:	virtual address of the start of the covered "region"
21268  *	cfr_num_pages:	number of pages in the covered "region"
21269  *	d*:		disposition of the page at that virtual address
21270  * Regions in the buffer are word-aligned.
21271  *
21272  * We estimate the size of the buffer based on the number of memory regions
21273  * and the virtual size of the address space.  While copying each memory region
21274  * during vm_map_fork(), we also collect the footprint info for that region
21275  * and store it in the buffer, packing it as much as possible (coalescing
21276  * contiguous memory regions to avoid having too many region headers and
21277  * avoiding long streaks of "zero" page dispositions by splitting footprint
21278  * "regions", so the number of regions in the footprint buffer might not match
21279  * the number of memory regions in the address space.
21280  *
21281  * We also have to copy the original task's "nonvolatile" ledgers since that's
21282  * part of the footprint and will need to be reported to any tool asking for
21283  * the footprint information of the forked corpse.
21284  */
21285 
21286 uint64_t vm_map_corpse_footprint_count = 0;
21287 uint64_t vm_map_corpse_footprint_size_avg = 0;
21288 uint64_t vm_map_corpse_footprint_size_max = 0;
21289 uint64_t vm_map_corpse_footprint_full = 0;
21290 uint64_t vm_map_corpse_footprint_no_buf = 0;
21291 
21292 struct vm_map_corpse_footprint_header {
21293 	vm_size_t       cf_size;        /* allocated buffer size */
21294 	uint32_t        cf_last_region; /* offset of last region in buffer */
21295 	union {
21296 		uint32_t cfu_last_zeroes; /* during creation:
21297 		                           * number of "zero" dispositions at
21298 		                           * end of last region */
21299 		uint32_t cfu_hint_region; /* during lookup:
21300 		                           * offset of last looked up region */
21301 #define cf_last_zeroes cfu.cfu_last_zeroes
21302 #define cf_hint_region cfu.cfu_hint_region
21303 	} cfu;
21304 };
21305 typedef uint8_t cf_disp_t;
21306 struct vm_map_corpse_footprint_region {
21307 	vm_map_offset_t cfr_vaddr;      /* region start virtual address */
21308 	uint32_t        cfr_num_pages;  /* number of pages in this "region" */
21309 	cf_disp_t   cfr_disposition[0]; /* disposition of each page */
21310 } __attribute__((packed));
21311 
21312 static cf_disp_t
vm_page_disposition_to_cf_disp(int disposition)21313 vm_page_disposition_to_cf_disp(
21314 	int disposition)
21315 {
21316 	assert(sizeof(cf_disp_t) == 1);
21317 	/* relocate bits that don't fit in a "uint8_t" */
21318 	if (disposition & VM_PAGE_QUERY_PAGE_REUSABLE) {
21319 		disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
21320 	}
21321 	/* cast gets rid of extra bits */
21322 	return (cf_disp_t) disposition;
21323 }
21324 
21325 static int
vm_page_cf_disp_to_disposition(cf_disp_t cf_disp)21326 vm_page_cf_disp_to_disposition(
21327 	cf_disp_t cf_disp)
21328 {
21329 	int disposition;
21330 
21331 	assert(sizeof(cf_disp_t) == 1);
21332 	disposition = (int) cf_disp;
21333 	/* move relocated bits back in place */
21334 	if (cf_disp & VM_PAGE_QUERY_PAGE_FICTITIOUS) {
21335 		disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
21336 		disposition &= ~VM_PAGE_QUERY_PAGE_FICTITIOUS;
21337 	}
21338 	return disposition;
21339 }
21340 
21341 /*
21342  * vm_map_corpse_footprint_new_region:
21343  *      closes the current footprint "region" and creates a new one
21344  *
21345  * Returns NULL if there's not enough space in the buffer for a new region.
21346  */
21347 static struct vm_map_corpse_footprint_region *
vm_map_corpse_footprint_new_region(struct vm_map_corpse_footprint_header * footprint_header)21348 vm_map_corpse_footprint_new_region(
21349 	struct vm_map_corpse_footprint_header *footprint_header)
21350 {
21351 	uintptr_t       footprint_edge;
21352 	uint32_t        new_region_offset;
21353 	struct vm_map_corpse_footprint_region *footprint_region;
21354 	struct vm_map_corpse_footprint_region *new_footprint_region;
21355 
21356 	footprint_edge = ((uintptr_t)footprint_header +
21357 	    footprint_header->cf_size);
21358 	footprint_region = ((struct vm_map_corpse_footprint_region *)
21359 	    ((char *)footprint_header +
21360 	    footprint_header->cf_last_region));
21361 	assert((uintptr_t)footprint_region + sizeof(*footprint_region) <=
21362 	    footprint_edge);
21363 
21364 	/* get rid of trailing zeroes in the last region */
21365 	assert(footprint_region->cfr_num_pages >=
21366 	    footprint_header->cf_last_zeroes);
21367 	footprint_region->cfr_num_pages -=
21368 	    footprint_header->cf_last_zeroes;
21369 	footprint_header->cf_last_zeroes = 0;
21370 
21371 	/* reuse this region if it's now empty */
21372 	if (footprint_region->cfr_num_pages == 0) {
21373 		return footprint_region;
21374 	}
21375 
21376 	/* compute offset of new region */
21377 	new_region_offset = footprint_header->cf_last_region;
21378 	new_region_offset += sizeof(*footprint_region);
21379 	new_region_offset += (footprint_region->cfr_num_pages * sizeof(cf_disp_t));
21380 	new_region_offset = roundup(new_region_offset, sizeof(int));
21381 
21382 	/* check if we're going over the edge */
21383 	if (((uintptr_t)footprint_header +
21384 	    new_region_offset +
21385 	    sizeof(*footprint_region)) >=
21386 	    footprint_edge) {
21387 		/* over the edge: no new region */
21388 		return NULL;
21389 	}
21390 
21391 	/* adjust offset of last region in header */
21392 	footprint_header->cf_last_region = new_region_offset;
21393 
21394 	new_footprint_region = (struct vm_map_corpse_footprint_region *)
21395 	    ((char *)footprint_header +
21396 	    footprint_header->cf_last_region);
21397 	new_footprint_region->cfr_vaddr = 0;
21398 	new_footprint_region->cfr_num_pages = 0;
21399 	/* caller needs to initialize new region */
21400 
21401 	return new_footprint_region;
21402 }
21403 
21404 /*
21405  * vm_map_corpse_footprint_collect:
21406  *	collect footprint information for "old_entry" in "old_map" and
21407  *	stores it in "new_map"'s vmmap_footprint_info.
21408  */
21409 kern_return_t
vm_map_corpse_footprint_collect(vm_map_t old_map,vm_map_entry_t old_entry,vm_map_t new_map)21410 vm_map_corpse_footprint_collect(
21411 	vm_map_t        old_map,
21412 	vm_map_entry_t  old_entry,
21413 	vm_map_t        new_map)
21414 {
21415 	vm_map_offset_t va;
21416 	kern_return_t   kr;
21417 	struct vm_map_corpse_footprint_header *footprint_header;
21418 	struct vm_map_corpse_footprint_region *footprint_region;
21419 	struct vm_map_corpse_footprint_region *new_footprint_region;
21420 	cf_disp_t       *next_disp_p;
21421 	uintptr_t       footprint_edge;
21422 	uint32_t        num_pages_tmp;
21423 	int             effective_page_size;
21424 
21425 	effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(old_map));
21426 
21427 	va = old_entry->vme_start;
21428 
21429 	vm_map_lock_assert_exclusive(old_map);
21430 	vm_map_lock_assert_exclusive(new_map);
21431 
21432 	assert(new_map->has_corpse_footprint);
21433 	assert(!old_map->has_corpse_footprint);
21434 	if (!new_map->has_corpse_footprint ||
21435 	    old_map->has_corpse_footprint) {
21436 		/*
21437 		 * This can only transfer footprint info from a
21438 		 * map with a live pmap to a map with a corpse footprint.
21439 		 */
21440 		return KERN_NOT_SUPPORTED;
21441 	}
21442 
21443 	if (new_map->vmmap_corpse_footprint == NULL) {
21444 		vm_offset_t     buf;
21445 		vm_size_t       buf_size;
21446 
21447 		buf = 0;
21448 		buf_size = (sizeof(*footprint_header) +
21449 		    (old_map->hdr.nentries
21450 		    *
21451 		    (sizeof(*footprint_region) +
21452 		    +3))            /* potential alignment for each region */
21453 		    +
21454 		    ((old_map->size / effective_page_size)
21455 		    *
21456 		    sizeof(cf_disp_t)));      /* disposition for each page */
21457 //		printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
21458 		buf_size = round_page(buf_size);
21459 
21460 		/* limit buffer to 1 page to validate overflow detection */
21461 //		buf_size = PAGE_SIZE;
21462 
21463 		/* limit size to a somewhat sane amount */
21464 #if XNU_TARGET_OS_OSX
21465 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE   (8*1024*1024)   /* 8MB */
21466 #else /* XNU_TARGET_OS_OSX */
21467 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE   (256*1024)      /* 256KB */
21468 #endif /* XNU_TARGET_OS_OSX */
21469 		if (buf_size > VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE) {
21470 			buf_size = VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE;
21471 		}
21472 
21473 		/*
21474 		 * Allocate the pageable buffer (with a trailing guard page).
21475 		 * It will be zero-filled on demand.
21476 		 */
21477 		kr = kernel_memory_allocate(kernel_map,
21478 		    &buf,
21479 		    (buf_size
21480 		    + PAGE_SIZE),                          /* trailing guard page */
21481 		    0,                         /* mask */
21482 		    KMA_PAGEABLE | KMA_GUARD_LAST,
21483 		    VM_KERN_MEMORY_DIAG);
21484 		if (kr != KERN_SUCCESS) {
21485 			vm_map_corpse_footprint_no_buf++;
21486 			return kr;
21487 		}
21488 
21489 		/* initialize header and 1st region */
21490 		footprint_header = (struct vm_map_corpse_footprint_header *)buf;
21491 		new_map->vmmap_corpse_footprint = footprint_header;
21492 
21493 		footprint_header->cf_size = buf_size;
21494 		footprint_header->cf_last_region =
21495 		    sizeof(*footprint_header);
21496 		footprint_header->cf_last_zeroes = 0;
21497 
21498 		footprint_region = (struct vm_map_corpse_footprint_region *)
21499 		    ((char *)footprint_header +
21500 		    footprint_header->cf_last_region);
21501 		footprint_region->cfr_vaddr = 0;
21502 		footprint_region->cfr_num_pages = 0;
21503 	} else {
21504 		/* retrieve header and last region */
21505 		footprint_header = (struct vm_map_corpse_footprint_header *)
21506 		    new_map->vmmap_corpse_footprint;
21507 		footprint_region = (struct vm_map_corpse_footprint_region *)
21508 		    ((char *)footprint_header +
21509 		    footprint_header->cf_last_region);
21510 	}
21511 	footprint_edge = ((uintptr_t)footprint_header +
21512 	    footprint_header->cf_size);
21513 
21514 	if ((footprint_region->cfr_vaddr +
21515 	    (((vm_map_offset_t)footprint_region->cfr_num_pages) *
21516 	    effective_page_size))
21517 	    != old_entry->vme_start) {
21518 		uint64_t num_pages_delta, num_pages_delta_size;
21519 		uint32_t region_offset_delta_size;
21520 
21521 		/*
21522 		 * Not the next contiguous virtual address:
21523 		 * start a new region or store "zero" dispositions for
21524 		 * the missing pages?
21525 		 */
21526 		/* size of gap in actual page dispositions */
21527 		num_pages_delta = ((old_entry->vme_start -
21528 		    footprint_region->cfr_vaddr) / effective_page_size)
21529 		    - footprint_region->cfr_num_pages;
21530 		num_pages_delta_size = num_pages_delta * sizeof(cf_disp_t);
21531 		/* size of gap as a new footprint region header */
21532 		region_offset_delta_size =
21533 		    (sizeof(*footprint_region) +
21534 		    roundup(((footprint_region->cfr_num_pages -
21535 		    footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)),
21536 		    sizeof(int)) -
21537 		    ((footprint_region->cfr_num_pages -
21538 		    footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)));
21539 //		printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
21540 		if (region_offset_delta_size < num_pages_delta_size ||
21541 		    os_add3_overflow(footprint_region->cfr_num_pages,
21542 		    (uint32_t) num_pages_delta,
21543 		    1,
21544 		    &num_pages_tmp)) {
21545 			/*
21546 			 * Storing data for this gap would take more space
21547 			 * than inserting a new footprint region header:
21548 			 * let's start a new region and save space. If it's a
21549 			 * tie, let's avoid using a new region, since that
21550 			 * would require more region hops to find the right
21551 			 * range during lookups.
21552 			 *
21553 			 * If the current region's cfr_num_pages would overflow
21554 			 * if we added "zero" page dispositions for the gap,
21555 			 * no choice but to start a new region.
21556 			 */
21557 //			printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
21558 			new_footprint_region =
21559 			    vm_map_corpse_footprint_new_region(footprint_header);
21560 			/* check that we're not going over the edge */
21561 			if (new_footprint_region == NULL) {
21562 				goto over_the_edge;
21563 			}
21564 			footprint_region = new_footprint_region;
21565 			/* initialize new region as empty */
21566 			footprint_region->cfr_vaddr = old_entry->vme_start;
21567 			footprint_region->cfr_num_pages = 0;
21568 		} else {
21569 			/*
21570 			 * Store "zero" page dispositions for the missing
21571 			 * pages.
21572 			 */
21573 //			printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
21574 			for (; num_pages_delta > 0; num_pages_delta--) {
21575 				next_disp_p = (cf_disp_t *)
21576 				    ((uintptr_t) footprint_region +
21577 				    sizeof(*footprint_region));
21578 				next_disp_p += footprint_region->cfr_num_pages;
21579 				/* check that we're not going over the edge */
21580 				if ((uintptr_t)next_disp_p >= footprint_edge) {
21581 					goto over_the_edge;
21582 				}
21583 				/* store "zero" disposition for this gap page */
21584 				footprint_region->cfr_num_pages++;
21585 				*next_disp_p = (cf_disp_t) 0;
21586 				footprint_header->cf_last_zeroes++;
21587 			}
21588 		}
21589 	}
21590 
21591 	for (va = old_entry->vme_start;
21592 	    va < old_entry->vme_end;
21593 	    va += effective_page_size) {
21594 		int             disposition;
21595 		cf_disp_t       cf_disp;
21596 
21597 		vm_map_footprint_query_page_info(old_map,
21598 		    old_entry,
21599 		    va,
21600 		    &disposition);
21601 		cf_disp = vm_page_disposition_to_cf_disp(disposition);
21602 
21603 //		if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
21604 
21605 		if (cf_disp == 0 && footprint_region->cfr_num_pages == 0) {
21606 			/*
21607 			 * Ignore "zero" dispositions at start of
21608 			 * region: just move start of region.
21609 			 */
21610 			footprint_region->cfr_vaddr += effective_page_size;
21611 			continue;
21612 		}
21613 
21614 		/* would region's cfr_num_pages overflow? */
21615 		if (os_add_overflow(footprint_region->cfr_num_pages, 1,
21616 		    &num_pages_tmp)) {
21617 			/* overflow: create a new region */
21618 			new_footprint_region =
21619 			    vm_map_corpse_footprint_new_region(
21620 				footprint_header);
21621 			if (new_footprint_region == NULL) {
21622 				goto over_the_edge;
21623 			}
21624 			footprint_region = new_footprint_region;
21625 			footprint_region->cfr_vaddr = va;
21626 			footprint_region->cfr_num_pages = 0;
21627 		}
21628 
21629 		next_disp_p = (cf_disp_t *) ((uintptr_t) footprint_region +
21630 		    sizeof(*footprint_region));
21631 		next_disp_p += footprint_region->cfr_num_pages;
21632 		/* check that we're not going over the edge */
21633 		if ((uintptr_t)next_disp_p >= footprint_edge) {
21634 			goto over_the_edge;
21635 		}
21636 		/* store this dispostion */
21637 		*next_disp_p = cf_disp;
21638 		footprint_region->cfr_num_pages++;
21639 
21640 		if (cf_disp != 0) {
21641 			/* non-zero disp: break the current zero streak */
21642 			footprint_header->cf_last_zeroes = 0;
21643 			/* done */
21644 			continue;
21645 		}
21646 
21647 		/* zero disp: add to the current streak of zeroes */
21648 		footprint_header->cf_last_zeroes++;
21649 		if ((footprint_header->cf_last_zeroes +
21650 		    roundup(((footprint_region->cfr_num_pages -
21651 		    footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)) &
21652 		    (sizeof(int) - 1),
21653 		    sizeof(int))) <
21654 		    (sizeof(*footprint_header))) {
21655 			/*
21656 			 * There are not enough trailing "zero" dispositions
21657 			 * (+ the extra padding we would need for the previous
21658 			 * region); creating a new region would not save space
21659 			 * at this point, so let's keep this "zero" disposition
21660 			 * in this region and reconsider later.
21661 			 */
21662 			continue;
21663 		}
21664 		/*
21665 		 * Create a new region to avoid having too many consecutive
21666 		 * "zero" dispositions.
21667 		 */
21668 		new_footprint_region =
21669 		    vm_map_corpse_footprint_new_region(footprint_header);
21670 		if (new_footprint_region == NULL) {
21671 			goto over_the_edge;
21672 		}
21673 		footprint_region = new_footprint_region;
21674 		/* initialize the new region as empty ... */
21675 		footprint_region->cfr_num_pages = 0;
21676 		/* ... and skip this "zero" disp */
21677 		footprint_region->cfr_vaddr = va + effective_page_size;
21678 	}
21679 
21680 	return KERN_SUCCESS;
21681 
21682 over_the_edge:
21683 //	printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
21684 	vm_map_corpse_footprint_full++;
21685 	return KERN_RESOURCE_SHORTAGE;
21686 }
21687 
21688 /*
21689  * vm_map_corpse_footprint_collect_done:
21690  *	completes the footprint collection by getting rid of any remaining
21691  *	trailing "zero" dispositions and trimming the unused part of the
21692  *	kernel buffer
21693  */
21694 void
vm_map_corpse_footprint_collect_done(vm_map_t new_map)21695 vm_map_corpse_footprint_collect_done(
21696 	vm_map_t        new_map)
21697 {
21698 	struct vm_map_corpse_footprint_header *footprint_header;
21699 	struct vm_map_corpse_footprint_region *footprint_region;
21700 	vm_size_t       buf_size, actual_size;
21701 	kern_return_t   kr;
21702 
21703 	assert(new_map->has_corpse_footprint);
21704 	if (!new_map->has_corpse_footprint ||
21705 	    new_map->vmmap_corpse_footprint == NULL) {
21706 		return;
21707 	}
21708 
21709 	footprint_header = (struct vm_map_corpse_footprint_header *)
21710 	    new_map->vmmap_corpse_footprint;
21711 	buf_size = footprint_header->cf_size;
21712 
21713 	footprint_region = (struct vm_map_corpse_footprint_region *)
21714 	    ((char *)footprint_header +
21715 	    footprint_header->cf_last_region);
21716 
21717 	/* get rid of trailing zeroes in last region */
21718 	assert(footprint_region->cfr_num_pages >= footprint_header->cf_last_zeroes);
21719 	footprint_region->cfr_num_pages -= footprint_header->cf_last_zeroes;
21720 	footprint_header->cf_last_zeroes = 0;
21721 
21722 	actual_size = (vm_size_t)(footprint_header->cf_last_region +
21723 	    sizeof(*footprint_region) +
21724 	    (footprint_region->cfr_num_pages * sizeof(cf_disp_t)));
21725 
21726 //	printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
21727 	vm_map_corpse_footprint_size_avg =
21728 	    (((vm_map_corpse_footprint_size_avg *
21729 	    vm_map_corpse_footprint_count) +
21730 	    actual_size) /
21731 	    (vm_map_corpse_footprint_count + 1));
21732 	vm_map_corpse_footprint_count++;
21733 	if (actual_size > vm_map_corpse_footprint_size_max) {
21734 		vm_map_corpse_footprint_size_max = actual_size;
21735 	}
21736 
21737 	actual_size = round_page(actual_size);
21738 	if (buf_size > actual_size) {
21739 		kr = vm_deallocate(kernel_map,
21740 		    ((vm_address_t)footprint_header +
21741 		    actual_size +
21742 		    PAGE_SIZE),                 /* trailing guard page */
21743 		    (buf_size - actual_size));
21744 		assertf(kr == KERN_SUCCESS,
21745 		    "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21746 		    footprint_header,
21747 		    (uint64_t) buf_size,
21748 		    (uint64_t) actual_size,
21749 		    kr);
21750 		kr = vm_protect(kernel_map,
21751 		    ((vm_address_t)footprint_header +
21752 		    actual_size),
21753 		    PAGE_SIZE,
21754 		    FALSE,             /* set_maximum */
21755 		    VM_PROT_NONE);
21756 		assertf(kr == KERN_SUCCESS,
21757 		    "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21758 		    footprint_header,
21759 		    (uint64_t) buf_size,
21760 		    (uint64_t) actual_size,
21761 		    kr);
21762 	}
21763 
21764 	footprint_header->cf_size = actual_size;
21765 }
21766 
21767 /*
21768  * vm_map_corpse_footprint_query_page_info:
21769  *	retrieves the disposition of the page at virtual address "vaddr"
21770  *	in the forked corpse's VM map
21771  *
21772  * This is the equivalent of vm_map_footprint_query_page_info() for a forked corpse.
21773  */
21774 kern_return_t
vm_map_corpse_footprint_query_page_info(vm_map_t map,vm_map_offset_t va,int * disposition_p)21775 vm_map_corpse_footprint_query_page_info(
21776 	vm_map_t        map,
21777 	vm_map_offset_t va,
21778 	int             *disposition_p)
21779 {
21780 	struct vm_map_corpse_footprint_header *footprint_header;
21781 	struct vm_map_corpse_footprint_region *footprint_region;
21782 	uint32_t        footprint_region_offset;
21783 	vm_map_offset_t region_start, region_end;
21784 	int             disp_idx;
21785 	kern_return_t   kr;
21786 	int             effective_page_size;
21787 	cf_disp_t       cf_disp;
21788 
21789 	if (!map->has_corpse_footprint) {
21790 		*disposition_p = 0;
21791 		kr = KERN_INVALID_ARGUMENT;
21792 		goto done;
21793 	}
21794 
21795 	footprint_header = map->vmmap_corpse_footprint;
21796 	if (footprint_header == NULL) {
21797 		*disposition_p = 0;
21798 //		if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21799 		kr = KERN_INVALID_ARGUMENT;
21800 		goto done;
21801 	}
21802 
21803 	/* start looking at the hint ("cf_hint_region") */
21804 	footprint_region_offset = footprint_header->cf_hint_region;
21805 
21806 	effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(map));
21807 
21808 lookup_again:
21809 	if (footprint_region_offset < sizeof(*footprint_header)) {
21810 		/* hint too low: start from 1st region */
21811 		footprint_region_offset = sizeof(*footprint_header);
21812 	}
21813 	if (footprint_region_offset >= footprint_header->cf_last_region) {
21814 		/* hint too high: re-start from 1st region */
21815 		footprint_region_offset = sizeof(*footprint_header);
21816 	}
21817 	footprint_region = (struct vm_map_corpse_footprint_region *)
21818 	    ((char *)footprint_header + footprint_region_offset);
21819 	region_start = footprint_region->cfr_vaddr;
21820 	region_end = (region_start +
21821 	    ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
21822 	    effective_page_size));
21823 	if (va < region_start &&
21824 	    footprint_region_offset != sizeof(*footprint_header)) {
21825 		/* our range starts before the hint region */
21826 
21827 		/* reset the hint (in a racy way...) */
21828 		footprint_header->cf_hint_region = sizeof(*footprint_header);
21829 		/* lookup "va" again from 1st region */
21830 		footprint_region_offset = sizeof(*footprint_header);
21831 		goto lookup_again;
21832 	}
21833 
21834 	while (va >= region_end) {
21835 		if (footprint_region_offset >= footprint_header->cf_last_region) {
21836 			break;
21837 		}
21838 		/* skip the region's header */
21839 		footprint_region_offset += sizeof(*footprint_region);
21840 		/* skip the region's page dispositions */
21841 		footprint_region_offset += (footprint_region->cfr_num_pages * sizeof(cf_disp_t));
21842 		/* align to next word boundary */
21843 		footprint_region_offset =
21844 		    roundup(footprint_region_offset,
21845 		    sizeof(int));
21846 		footprint_region = (struct vm_map_corpse_footprint_region *)
21847 		    ((char *)footprint_header + footprint_region_offset);
21848 		region_start = footprint_region->cfr_vaddr;
21849 		region_end = (region_start +
21850 		    ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
21851 		    effective_page_size));
21852 	}
21853 	if (va < region_start || va >= region_end) {
21854 		/* page not found */
21855 		*disposition_p = 0;
21856 //		if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21857 		kr = KERN_SUCCESS;
21858 		goto done;
21859 	}
21860 
21861 	/* "va" found: set the lookup hint for next lookup (in a racy way...) */
21862 	footprint_header->cf_hint_region = footprint_region_offset;
21863 
21864 	/* get page disposition for "va" in this region */
21865 	disp_idx = (int) ((va - footprint_region->cfr_vaddr) / effective_page_size);
21866 	cf_disp = footprint_region->cfr_disposition[disp_idx];
21867 	*disposition_p = vm_page_cf_disp_to_disposition(cf_disp);
21868 	kr = KERN_SUCCESS;
21869 done:
21870 //	if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21871 	/* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
21872 	DTRACE_VM4(footprint_query_page_info,
21873 	    vm_map_t, map,
21874 	    vm_map_offset_t, va,
21875 	    int, *disposition_p,
21876 	    kern_return_t, kr);
21877 
21878 	return kr;
21879 }
21880 
21881 void
vm_map_corpse_footprint_destroy(vm_map_t map)21882 vm_map_corpse_footprint_destroy(
21883 	vm_map_t        map)
21884 {
21885 	if (map->has_corpse_footprint &&
21886 	    map->vmmap_corpse_footprint != 0) {
21887 		struct vm_map_corpse_footprint_header *footprint_header;
21888 		vm_size_t buf_size;
21889 		kern_return_t kr;
21890 
21891 		footprint_header = map->vmmap_corpse_footprint;
21892 		buf_size = footprint_header->cf_size;
21893 		kr = vm_deallocate(kernel_map,
21894 		    (vm_offset_t) map->vmmap_corpse_footprint,
21895 		    ((vm_size_t) buf_size
21896 		    + PAGE_SIZE));                 /* trailing guard page */
21897 		assertf(kr == KERN_SUCCESS, "kr=0x%x\n", kr);
21898 		map->vmmap_corpse_footprint = 0;
21899 		map->has_corpse_footprint = FALSE;
21900 	}
21901 }
21902 
21903 /*
21904  * vm_map_copy_footprint_ledgers:
21905  *	copies any ledger that's relevant to the memory footprint of "old_task"
21906  *	into the forked corpse's task ("new_task")
21907  */
21908 void
vm_map_copy_footprint_ledgers(task_t old_task,task_t new_task)21909 vm_map_copy_footprint_ledgers(
21910 	task_t  old_task,
21911 	task_t  new_task)
21912 {
21913 	vm_map_copy_ledger(old_task, new_task, task_ledgers.phys_footprint);
21914 	vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile);
21915 	vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile_compressed);
21916 	vm_map_copy_ledger(old_task, new_task, task_ledgers.internal);
21917 	vm_map_copy_ledger(old_task, new_task, task_ledgers.internal_compressed);
21918 	vm_map_copy_ledger(old_task, new_task, task_ledgers.iokit_mapped);
21919 	vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting);
21920 	vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting_compressed);
21921 	vm_map_copy_ledger(old_task, new_task, task_ledgers.page_table);
21922 	vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint);
21923 	vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint_compressed);
21924 	vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile);
21925 	vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile_compressed);
21926 	vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint);
21927 	vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint_compressed);
21928 	vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint);
21929 	vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint_compressed);
21930 	vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint);
21931 	vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint_compressed);
21932 	vm_map_copy_ledger(old_task, new_task, task_ledgers.wired_mem);
21933 }
21934 
21935 /*
21936  * vm_map_copy_ledger:
21937  *	copy a single ledger from "old_task" to "new_task"
21938  */
21939 void
vm_map_copy_ledger(task_t old_task,task_t new_task,int ledger_entry)21940 vm_map_copy_ledger(
21941 	task_t  old_task,
21942 	task_t  new_task,
21943 	int     ledger_entry)
21944 {
21945 	ledger_amount_t old_balance, new_balance, delta;
21946 
21947 	assert(new_task->map->has_corpse_footprint);
21948 	if (!new_task->map->has_corpse_footprint) {
21949 		return;
21950 	}
21951 
21952 	/* turn off sanity checks for the ledger we're about to mess with */
21953 	ledger_disable_panic_on_negative(new_task->ledger,
21954 	    ledger_entry);
21955 
21956 	/* adjust "new_task" to match "old_task" */
21957 	ledger_get_balance(old_task->ledger,
21958 	    ledger_entry,
21959 	    &old_balance);
21960 	ledger_get_balance(new_task->ledger,
21961 	    ledger_entry,
21962 	    &new_balance);
21963 	if (new_balance == old_balance) {
21964 		/* new == old: done */
21965 	} else if (new_balance > old_balance) {
21966 		/* new > old ==> new -= new - old */
21967 		delta = new_balance - old_balance;
21968 		ledger_debit(new_task->ledger,
21969 		    ledger_entry,
21970 		    delta);
21971 	} else {
21972 		/* new < old ==> new += old - new */
21973 		delta = old_balance - new_balance;
21974 		ledger_credit(new_task->ledger,
21975 		    ledger_entry,
21976 		    delta);
21977 	}
21978 }
21979 
21980 /*
21981  * vm_map_get_pmap:
21982  * returns the pmap associated with the vm_map
21983  */
21984 pmap_t
vm_map_get_pmap(vm_map_t map)21985 vm_map_get_pmap(vm_map_t map)
21986 {
21987 	return vm_map_pmap(map);
21988 }
21989 
21990 #if MACH_ASSERT
21991 
21992 extern int pmap_ledgers_panic;
21993 extern int pmap_ledgers_panic_leeway;
21994 
21995 #define LEDGER_DRIFT(__LEDGER)                    \
21996 	int             __LEDGER##_over;          \
21997 	ledger_amount_t __LEDGER##_over_total;    \
21998 	ledger_amount_t __LEDGER##_over_max;      \
21999 	int             __LEDGER##_under;         \
22000 	ledger_amount_t __LEDGER##_under_total;   \
22001 	ledger_amount_t __LEDGER##_under_max
22002 
22003 struct {
22004 	uint64_t        num_pmaps_checked;
22005 
22006 	LEDGER_DRIFT(phys_footprint);
22007 	LEDGER_DRIFT(internal);
22008 	LEDGER_DRIFT(internal_compressed);
22009 	LEDGER_DRIFT(external);
22010 	LEDGER_DRIFT(reusable);
22011 	LEDGER_DRIFT(iokit_mapped);
22012 	LEDGER_DRIFT(alternate_accounting);
22013 	LEDGER_DRIFT(alternate_accounting_compressed);
22014 	LEDGER_DRIFT(page_table);
22015 	LEDGER_DRIFT(purgeable_volatile);
22016 	LEDGER_DRIFT(purgeable_nonvolatile);
22017 	LEDGER_DRIFT(purgeable_volatile_compressed);
22018 	LEDGER_DRIFT(purgeable_nonvolatile_compressed);
22019 	LEDGER_DRIFT(tagged_nofootprint);
22020 	LEDGER_DRIFT(tagged_footprint);
22021 	LEDGER_DRIFT(tagged_nofootprint_compressed);
22022 	LEDGER_DRIFT(tagged_footprint_compressed);
22023 	LEDGER_DRIFT(network_volatile);
22024 	LEDGER_DRIFT(network_nonvolatile);
22025 	LEDGER_DRIFT(network_volatile_compressed);
22026 	LEDGER_DRIFT(network_nonvolatile_compressed);
22027 	LEDGER_DRIFT(media_nofootprint);
22028 	LEDGER_DRIFT(media_footprint);
22029 	LEDGER_DRIFT(media_nofootprint_compressed);
22030 	LEDGER_DRIFT(media_footprint_compressed);
22031 	LEDGER_DRIFT(graphics_nofootprint);
22032 	LEDGER_DRIFT(graphics_footprint);
22033 	LEDGER_DRIFT(graphics_nofootprint_compressed);
22034 	LEDGER_DRIFT(graphics_footprint_compressed);
22035 	LEDGER_DRIFT(neural_nofootprint);
22036 	LEDGER_DRIFT(neural_footprint);
22037 	LEDGER_DRIFT(neural_nofootprint_compressed);
22038 	LEDGER_DRIFT(neural_footprint_compressed);
22039 } pmap_ledgers_drift;
22040 
22041 void
vm_map_pmap_check_ledgers(pmap_t pmap,ledger_t ledger,int pid,char * procname)22042 vm_map_pmap_check_ledgers(
22043 	pmap_t          pmap,
22044 	ledger_t        ledger,
22045 	int             pid,
22046 	char            *procname)
22047 {
22048 	ledger_amount_t bal;
22049 	boolean_t       do_panic;
22050 
22051 	do_panic = FALSE;
22052 
22053 	pmap_ledgers_drift.num_pmaps_checked++;
22054 
22055 #define LEDGER_CHECK_BALANCE(__LEDGER)                                  \
22056 MACRO_BEGIN                                                             \
22057 	int panic_on_negative = TRUE;                                   \
22058 	ledger_get_balance(ledger,                                      \
22059 	                   task_ledgers.__LEDGER,                       \
22060 	                   &bal);                                       \
22061 	ledger_get_panic_on_negative(ledger,                            \
22062 	                             task_ledgers.__LEDGER,             \
22063 	                             &panic_on_negative);               \
22064 	if (bal != 0) {                                                 \
22065 	        if (panic_on_negative ||                                \
22066 	            (pmap_ledgers_panic &&                              \
22067 	             pmap_ledgers_panic_leeway > 0 &&                   \
22068 	             (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) ||  \
22069 	              bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
22070 	                do_panic = TRUE;                                \
22071 	        }                                                       \
22072 	        printf("LEDGER BALANCE proc %d (%s) "                   \
22073 	               "\"%s\" = %lld\n",                               \
22074 	               pid, procname, #__LEDGER, bal);                  \
22075 	        if (bal > 0) {                                          \
22076 	                pmap_ledgers_drift.__LEDGER##_over++;           \
22077 	                pmap_ledgers_drift.__LEDGER##_over_total += bal; \
22078 	                if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
22079 	                        pmap_ledgers_drift.__LEDGER##_over_max = bal; \
22080 	                }                                               \
22081 	        } else if (bal < 0) {                                   \
22082 	                pmap_ledgers_drift.__LEDGER##_under++;          \
22083 	                pmap_ledgers_drift.__LEDGER##_under_total += bal; \
22084 	                if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
22085 	                        pmap_ledgers_drift.__LEDGER##_under_max = bal; \
22086 	                }                                               \
22087 	        }                                                       \
22088 	}                                                               \
22089 MACRO_END
22090 
22091 	LEDGER_CHECK_BALANCE(phys_footprint);
22092 	LEDGER_CHECK_BALANCE(internal);
22093 	LEDGER_CHECK_BALANCE(internal_compressed);
22094 	LEDGER_CHECK_BALANCE(external);
22095 	LEDGER_CHECK_BALANCE(reusable);
22096 	LEDGER_CHECK_BALANCE(iokit_mapped);
22097 	LEDGER_CHECK_BALANCE(alternate_accounting);
22098 	LEDGER_CHECK_BALANCE(alternate_accounting_compressed);
22099 	LEDGER_CHECK_BALANCE(page_table);
22100 	LEDGER_CHECK_BALANCE(purgeable_volatile);
22101 	LEDGER_CHECK_BALANCE(purgeable_nonvolatile);
22102 	LEDGER_CHECK_BALANCE(purgeable_volatile_compressed);
22103 	LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed);
22104 	LEDGER_CHECK_BALANCE(tagged_nofootprint);
22105 	LEDGER_CHECK_BALANCE(tagged_footprint);
22106 	LEDGER_CHECK_BALANCE(tagged_nofootprint_compressed);
22107 	LEDGER_CHECK_BALANCE(tagged_footprint_compressed);
22108 	LEDGER_CHECK_BALANCE(network_volatile);
22109 	LEDGER_CHECK_BALANCE(network_nonvolatile);
22110 	LEDGER_CHECK_BALANCE(network_volatile_compressed);
22111 	LEDGER_CHECK_BALANCE(network_nonvolatile_compressed);
22112 	LEDGER_CHECK_BALANCE(media_nofootprint);
22113 	LEDGER_CHECK_BALANCE(media_footprint);
22114 	LEDGER_CHECK_BALANCE(media_nofootprint_compressed);
22115 	LEDGER_CHECK_BALANCE(media_footprint_compressed);
22116 	LEDGER_CHECK_BALANCE(graphics_nofootprint);
22117 	LEDGER_CHECK_BALANCE(graphics_footprint);
22118 	LEDGER_CHECK_BALANCE(graphics_nofootprint_compressed);
22119 	LEDGER_CHECK_BALANCE(graphics_footprint_compressed);
22120 	LEDGER_CHECK_BALANCE(neural_nofootprint);
22121 	LEDGER_CHECK_BALANCE(neural_footprint);
22122 	LEDGER_CHECK_BALANCE(neural_nofootprint_compressed);
22123 	LEDGER_CHECK_BALANCE(neural_footprint_compressed);
22124 
22125 	if (do_panic) {
22126 		if (pmap_ledgers_panic) {
22127 			panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers",
22128 			    pmap, pid, procname);
22129 		} else {
22130 			printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
22131 			    pmap, pid, procname);
22132 		}
22133 	}
22134 }
22135 #endif /* MACH_ASSERT */
22136