xref: /xnu-8019.80.24/osfmk/vm/vm_map.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 /*
59  *	File:	vm/vm_map.c
60  *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
61  *	Date:	1985
62  *
63  *	Virtual memory mapping module.
64  */
65 
66 #include <mach_assert.h>
67 
68 #include <vm/vm_options.h>
69 
70 #include <libkern/OSAtomic.h>
71 
72 #include <mach/kern_return.h>
73 #include <mach/port.h>
74 #include <mach/vm_attributes.h>
75 #include <mach/vm_param.h>
76 #include <mach/vm_behavior.h>
77 #include <mach/vm_statistics.h>
78 #include <mach/memory_object.h>
79 #include <mach/mach_vm.h>
80 #include <machine/cpu_capabilities.h>
81 #include <mach/sdt.h>
82 
83 #include <kern/assert.h>
84 #include <kern/backtrace.h>
85 #include <kern/counter.h>
86 #include <kern/exc_guard.h>
87 #include <kern/kalloc.h>
88 #include <kern/zalloc_internal.h>
89 
90 #include <vm/cpm.h>
91 #include <vm/vm_compressor.h>
92 #include <vm/vm_compressor_pager.h>
93 #include <vm/vm_init.h>
94 #include <vm/vm_fault.h>
95 #include <vm/vm_map.h>
96 #include <vm/vm_object.h>
97 #include <vm/vm_page.h>
98 #include <vm/vm_pageout.h>
99 #include <vm/pmap.h>
100 #include <vm/vm_kern.h>
101 #include <ipc/ipc_port.h>
102 #include <kern/sched_prim.h>
103 #include <kern/misc_protos.h>
104 
105 #include <mach/vm_map_server.h>
106 #include <mach/mach_host_server.h>
107 #include <vm/vm_protos.h>
108 #include <vm/vm_purgeable_internal.h>
109 
110 #include <vm/vm_protos.h>
111 #include <vm/vm_shared_region.h>
112 #include <vm/vm_map_store.h>
113 
114 #include <san/kasan.h>
115 
116 #include <sys/resource.h>
117 #include <sys/codesign.h>
118 #include <sys/mman.h>
119 #include <sys/reboot.h>
120 #include <sys/kdebug_triage.h>
121 
122 #if __LP64__
123 #define HAVE_VM_MAP_RESERVED_ENTRY_ZONE 0
124 #else
125 #define HAVE_VM_MAP_RESERVED_ENTRY_ZONE 1
126 #endif
127 
128 #include <libkern/section_keywords.h>
129 #if DEVELOPMENT || DEBUG
130 extern int proc_selfcsflags(void);
131 int panic_on_unsigned_execute = 0;
132 int panic_on_mlock_failure = 0;
133 #endif /* DEVELOPMENT || DEBUG */
134 
135 #if MACH_ASSERT
136 int debug4k_filter = 0;
137 char debug4k_proc_name[1024] = "";
138 int debug4k_proc_filter = (int)-1 & ~(1 << __DEBUG4K_FAULT);
139 int debug4k_panic_on_misaligned_sharing = 0;
140 const char *debug4k_category_name[] = {
141 	"error",        /* 0 */
142 	"life",         /* 1 */
143 	"load",         /* 2 */
144 	"fault",        /* 3 */
145 	"copy",         /* 4 */
146 	"share",        /* 5 */
147 	"adjust",       /* 6 */
148 	"pmap",         /* 7 */
149 	"mementry",     /* 8 */
150 	"iokit",        /* 9 */
151 	"upl",          /* 10 */
152 	"exc",          /* 11 */
153 	"vfs"           /* 12 */
154 };
155 #endif /* MACH_ASSERT */
156 int debug4k_no_cow_copyin = 0;
157 
158 
159 #if __arm64__
160 extern const int fourk_binary_compatibility_unsafe;
161 extern const int fourk_binary_compatibility_allow_wx;
162 #endif /* __arm64__ */
163 extern int proc_selfpid(void);
164 extern char *proc_name_address(void *p);
165 
166 #if VM_MAP_DEBUG_APPLE_PROTECT
167 int vm_map_debug_apple_protect = 0;
168 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
169 #if VM_MAP_DEBUG_FOURK
170 int vm_map_debug_fourk = 0;
171 #endif /* VM_MAP_DEBUG_FOURK */
172 
173 SECURITY_READ_ONLY_LATE(int) vm_map_executable_immutable = 1;
174 int vm_map_executable_immutable_verbose = 0;
175 
176 os_refgrp_decl(static, map_refgrp, "vm_map", NULL);
177 
178 extern u_int32_t random(void);  /* from <libkern/libkern.h> */
179 /* Internal prototypes
180  */
181 
182 static void vm_map_simplify_range(
183 	vm_map_t        map,
184 	vm_map_offset_t start,
185 	vm_map_offset_t end);   /* forward */
186 
187 static boolean_t        vm_map_range_check(
188 	vm_map_t        map,
189 	vm_map_offset_t start,
190 	vm_map_offset_t end,
191 	vm_map_entry_t  *entry);
192 
193 static vm_map_entry_t   _vm_map_entry_create(
194 	struct vm_map_header    *map_header, boolean_t map_locked);
195 
196 static void             _vm_map_entry_dispose(
197 	struct vm_map_header    *map_header,
198 	vm_map_entry_t          entry);
199 
200 static void             vm_map_pmap_enter(
201 	vm_map_t                map,
202 	vm_map_offset_t         addr,
203 	vm_map_offset_t         end_addr,
204 	vm_object_t             object,
205 	vm_object_offset_t      offset,
206 	vm_prot_t               protection);
207 
208 static void             _vm_map_clip_end(
209 	struct vm_map_header    *map_header,
210 	vm_map_entry_t          entry,
211 	vm_map_offset_t         end);
212 
213 static void             _vm_map_clip_start(
214 	struct vm_map_header    *map_header,
215 	vm_map_entry_t          entry,
216 	vm_map_offset_t         start);
217 
218 static void             vm_map_entry_delete(
219 	vm_map_t        map,
220 	vm_map_entry_t  entry);
221 
222 static kern_return_t    vm_map_delete(
223 	vm_map_t        map,
224 	vm_map_offset_t start,
225 	vm_map_offset_t end,
226 	int             flags,
227 	vm_map_t        zap_map);
228 
229 static void             vm_map_copy_insert(
230 	vm_map_t        map,
231 	vm_map_entry_t  after_where,
232 	vm_map_copy_t   copy);
233 
234 static kern_return_t    vm_map_copy_overwrite_unaligned(
235 	vm_map_t        dst_map,
236 	vm_map_entry_t  entry,
237 	vm_map_copy_t   copy,
238 	vm_map_address_t start,
239 	boolean_t       discard_on_success);
240 
241 static kern_return_t    vm_map_copy_overwrite_aligned(
242 	vm_map_t        dst_map,
243 	vm_map_entry_t  tmp_entry,
244 	vm_map_copy_t   copy,
245 	vm_map_offset_t start,
246 	pmap_t          pmap);
247 
248 static kern_return_t    vm_map_copyin_kernel_buffer(
249 	vm_map_t        src_map,
250 	vm_map_address_t src_addr,
251 	vm_map_size_t   len,
252 	boolean_t       src_destroy,
253 	vm_map_copy_t   *copy_result);  /* OUT */
254 
255 static kern_return_t    vm_map_copyout_kernel_buffer(
256 	vm_map_t        map,
257 	vm_map_address_t *addr, /* IN/OUT */
258 	vm_map_copy_t   copy,
259 	vm_map_size_t   copy_size,
260 	boolean_t       overwrite,
261 	boolean_t       consume_on_success);
262 
263 static void             vm_map_fork_share(
264 	vm_map_t        old_map,
265 	vm_map_entry_t  old_entry,
266 	vm_map_t        new_map);
267 
268 static boolean_t        vm_map_fork_copy(
269 	vm_map_t        old_map,
270 	vm_map_entry_t  *old_entry_p,
271 	vm_map_t        new_map,
272 	int             vm_map_copyin_flags);
273 
274 static kern_return_t    vm_map_wire_nested(
275 	vm_map_t                   map,
276 	vm_map_offset_t            start,
277 	vm_map_offset_t            end,
278 	vm_prot_t                  caller_prot,
279 	vm_tag_t                   tag,
280 	boolean_t                  user_wire,
281 	pmap_t                     map_pmap,
282 	vm_map_offset_t            pmap_addr,
283 	ppnum_t                    *physpage_p);
284 
285 static kern_return_t    vm_map_unwire_nested(
286 	vm_map_t                   map,
287 	vm_map_offset_t            start,
288 	vm_map_offset_t            end,
289 	boolean_t                  user_wire,
290 	pmap_t                     map_pmap,
291 	vm_map_offset_t            pmap_addr);
292 
293 static kern_return_t    vm_map_overwrite_submap_recurse(
294 	vm_map_t                   dst_map,
295 	vm_map_offset_t            dst_addr,
296 	vm_map_size_t              dst_size);
297 
298 static kern_return_t    vm_map_copy_overwrite_nested(
299 	vm_map_t                   dst_map,
300 	vm_map_offset_t            dst_addr,
301 	vm_map_copy_t              copy,
302 	boolean_t                  interruptible,
303 	pmap_t                     pmap,
304 	boolean_t                  discard_on_success);
305 
306 static kern_return_t    vm_map_remap_extract(
307 	vm_map_t                map,
308 	vm_map_offset_t         addr,
309 	vm_map_size_t           size,
310 	boolean_t               copy,
311 	struct vm_map_header    *map_header,
312 	vm_prot_t               *cur_protection,
313 	vm_prot_t               *max_protection,
314 	vm_inherit_t            inheritance,
315 	vm_map_kernel_flags_t   vmk_flags);
316 
317 static kern_return_t    vm_map_remap_range_allocate(
318 	vm_map_t                map,
319 	vm_map_address_t        *address,
320 	vm_map_size_t           size,
321 	vm_map_offset_t         mask,
322 	int                     flags,
323 	vm_map_kernel_flags_t   vmk_flags,
324 	vm_tag_t                tag,
325 	vm_map_entry_t          *map_entry);
326 
327 static void             vm_map_region_look_for_page(
328 	vm_map_t                   map,
329 	vm_map_offset_t            va,
330 	vm_object_t                object,
331 	vm_object_offset_t         offset,
332 	int                        max_refcnt,
333 	unsigned short             depth,
334 	vm_region_extended_info_t  extended,
335 	mach_msg_type_number_t count);
336 
337 static int              vm_map_region_count_obj_refs(
338 	vm_map_entry_t             entry,
339 	vm_object_t                object);
340 
341 
342 static kern_return_t    vm_map_willneed(
343 	vm_map_t        map,
344 	vm_map_offset_t start,
345 	vm_map_offset_t end);
346 
347 static kern_return_t    vm_map_reuse_pages(
348 	vm_map_t        map,
349 	vm_map_offset_t start,
350 	vm_map_offset_t end);
351 
352 static kern_return_t    vm_map_reusable_pages(
353 	vm_map_t        map,
354 	vm_map_offset_t start,
355 	vm_map_offset_t end);
356 
357 static kern_return_t    vm_map_can_reuse(
358 	vm_map_t        map,
359 	vm_map_offset_t start,
360 	vm_map_offset_t end);
361 
362 #if MACH_ASSERT
363 static kern_return_t    vm_map_pageout(
364 	vm_map_t        map,
365 	vm_map_offset_t start,
366 	vm_map_offset_t end);
367 #endif /* MACH_ASSERT */
368 
369 kern_return_t vm_map_corpse_footprint_collect(
370 	vm_map_t        old_map,
371 	vm_map_entry_t  old_entry,
372 	vm_map_t        new_map);
373 void vm_map_corpse_footprint_collect_done(
374 	vm_map_t        new_map);
375 void vm_map_corpse_footprint_destroy(
376 	vm_map_t        map);
377 kern_return_t vm_map_corpse_footprint_query_page_info(
378 	vm_map_t        map,
379 	vm_map_offset_t va,
380 	int             *disposition_p);
381 void vm_map_footprint_query_page_info(
382 	vm_map_t        map,
383 	vm_map_entry_t  map_entry,
384 	vm_map_offset_t curr_s_offset,
385 	int             *disposition_p);
386 
387 pid_t find_largest_process_vm_map_entries(void);
388 
389 extern int exit_with_guard_exception(void *p, mach_exception_data_type_t code,
390     mach_exception_data_type_t subcode);
391 
392 /*
393  * Macros to copy a vm_map_entry. We must be careful to correctly
394  * manage the wired page count. vm_map_entry_copy() creates a new
395  * map entry to the same memory - the wired count in the new entry
396  * must be set to zero. vm_map_entry_copy_full() creates a new
397  * entry that is identical to the old entry.  This preserves the
398  * wire count; it's used for map splitting and zone changing in
399  * vm_map_copyout.
400  */
401 
402 static inline void
vm_map_entry_copy_pmap_cs_assoc(vm_map_t map __unused,vm_map_entry_t new __unused,vm_map_entry_t old __unused)403 vm_map_entry_copy_pmap_cs_assoc(
404 	vm_map_t map __unused,
405 	vm_map_entry_t new __unused,
406 	vm_map_entry_t old __unused)
407 {
408 	/* when pmap_cs is not enabled, assert as a sanity check */
409 	assert(new->pmap_cs_associated == FALSE);
410 }
411 
412 /*
413  * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
414  * But for security reasons on some platforms, we don't want the
415  * new mapping to be "used for jit", so we reset the flag here.
416  */
417 static inline void
vm_map_entry_copy_code_signing(vm_map_t map,vm_map_entry_t new,vm_map_entry_t old __unused)418 vm_map_entry_copy_code_signing(
419 	vm_map_t map,
420 	vm_map_entry_t new,
421 	vm_map_entry_t old __unused)
422 {
423 	if (VM_MAP_POLICY_ALLOW_JIT_COPY(map)) {
424 		assert(new->used_for_jit == old->used_for_jit);
425 	} else {
426 		new->used_for_jit = FALSE;
427 	}
428 }
429 
430 static inline void
vm_map_entry_copy(vm_map_t map,vm_map_entry_t new,vm_map_entry_t old)431 vm_map_entry_copy(
432 	vm_map_t map,
433 	vm_map_entry_t new,
434 	vm_map_entry_t old)
435 {
436 	*new = *old;
437 	new->is_shared = FALSE;
438 	new->needs_wakeup = FALSE;
439 	new->in_transition = FALSE;
440 	new->wired_count = 0;
441 	new->user_wired_count = 0;
442 	new->permanent = FALSE;
443 	vm_map_entry_copy_code_signing(map, new, old);
444 	vm_map_entry_copy_pmap_cs_assoc(map, new, old);
445 	if (new->iokit_acct) {
446 		assertf(!new->use_pmap, "old %p new %p\n", old, new);
447 		new->iokit_acct = FALSE;
448 		new->use_pmap = TRUE;
449 	}
450 	new->vme_resilient_codesign = FALSE;
451 	new->vme_resilient_media = FALSE;
452 	new->vme_atomic = FALSE;
453 	new->vme_no_copy_on_read = FALSE;
454 }
455 
456 static inline void
vm_map_entry_copy_full(vm_map_entry_t new,vm_map_entry_t old)457 vm_map_entry_copy_full(
458 	vm_map_entry_t new,
459 	vm_map_entry_t old)
460 {
461 	*new = *old;
462 }
463 
464 /*
465  * Normal lock_read_to_write() returns FALSE/0 on failure.
466  * These functions evaluate to zero on success and non-zero value on failure.
467  */
468 __attribute__((always_inline))
469 int
vm_map_lock_read_to_write(vm_map_t map)470 vm_map_lock_read_to_write(vm_map_t map)
471 {
472 	if (lck_rw_lock_shared_to_exclusive(&(map)->lock)) {
473 		DTRACE_VM(vm_map_lock_upgrade);
474 		return 0;
475 	}
476 	return 1;
477 }
478 
479 __attribute__((always_inline))
480 boolean_t
vm_map_try_lock(vm_map_t map)481 vm_map_try_lock(vm_map_t map)
482 {
483 	if (lck_rw_try_lock_exclusive(&(map)->lock)) {
484 		DTRACE_VM(vm_map_lock_w);
485 		return TRUE;
486 	}
487 	return FALSE;
488 }
489 
490 __attribute__((always_inline))
491 boolean_t
vm_map_try_lock_read(vm_map_t map)492 vm_map_try_lock_read(vm_map_t map)
493 {
494 	if (lck_rw_try_lock_shared(&(map)->lock)) {
495 		DTRACE_VM(vm_map_lock_r);
496 		return TRUE;
497 	}
498 	return FALSE;
499 }
500 
501 /*
502  * Routines to get the page size the caller should
503  * use while inspecting the target address space.
504  * Use the "_safely" variant if the caller is dealing with a user-provided
505  * array whose size depends on the page size, to avoid any overflow or
506  * underflow of a user-allocated buffer.
507  */
508 int
vm_self_region_page_shift_safely(vm_map_t target_map)509 vm_self_region_page_shift_safely(
510 	vm_map_t target_map)
511 {
512 	int effective_page_shift = 0;
513 
514 	if (PAGE_SIZE == (4096)) {
515 		/* x86_64 and 4k watches: always use 4k */
516 		return PAGE_SHIFT;
517 	}
518 	/* did caller provide an explicit page size for this thread to use? */
519 	effective_page_shift = thread_self_region_page_shift();
520 	if (effective_page_shift) {
521 		/* use the explicitly-provided page size */
522 		return effective_page_shift;
523 	}
524 	/* no explicit page size: use the caller's page size... */
525 	effective_page_shift = VM_MAP_PAGE_SHIFT(current_map());
526 	if (effective_page_shift == VM_MAP_PAGE_SHIFT(target_map)) {
527 		/* page size match: safe to use */
528 		return effective_page_shift;
529 	}
530 	/* page size mismatch */
531 	return -1;
532 }
533 int
vm_self_region_page_shift(vm_map_t target_map)534 vm_self_region_page_shift(
535 	vm_map_t target_map)
536 {
537 	int effective_page_shift;
538 
539 	effective_page_shift = vm_self_region_page_shift_safely(target_map);
540 	if (effective_page_shift == -1) {
541 		/* no safe value but OK to guess for caller */
542 		effective_page_shift = MIN(VM_MAP_PAGE_SHIFT(current_map()),
543 		    VM_MAP_PAGE_SHIFT(target_map));
544 	}
545 	return effective_page_shift;
546 }
547 
548 
549 /*
550  *	Decide if we want to allow processes to execute from their data or stack areas.
551  *	override_nx() returns true if we do.  Data/stack execution can be enabled independently
552  *	for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
553  *	or allow_stack_exec to enable data execution for that type of data area for that particular
554  *	ABI (or both by or'ing the flags together).  These are initialized in the architecture
555  *	specific pmap files since the default behavior varies according to architecture.  The
556  *	main reason it varies is because of the need to provide binary compatibility with old
557  *	applications that were written before these restrictions came into being.  In the old
558  *	days, an app could execute anything it could read, but this has slowly been tightened
559  *	up over time.  The default behavior is:
560  *
561  *	32-bit PPC apps		may execute from both stack and data areas
562  *	32-bit Intel apps	may exeucte from data areas but not stack
563  *	64-bit PPC/Intel apps	may not execute from either data or stack
564  *
565  *	An application on any architecture may override these defaults by explicitly
566  *	adding PROT_EXEC permission to the page in question with the mprotect(2)
567  *	system call.  This code here just determines what happens when an app tries to
568  *      execute from a page that lacks execute permission.
569  *
570  *	Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
571  *	default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
572  *	a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
573  *	execution from data areas for a particular binary even if the arch normally permits it. As
574  *	a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
575  *	to support some complicated use cases, notably browsers with out-of-process plugins that
576  *	are not all NX-safe.
577  */
578 
579 extern int allow_data_exec, allow_stack_exec;
580 
581 int
override_nx(vm_map_t map,uint32_t user_tag)582 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
583 {
584 	int current_abi;
585 
586 	if (map->pmap == kernel_pmap) {
587 		return FALSE;
588 	}
589 
590 	/*
591 	 * Determine if the app is running in 32 or 64 bit mode.
592 	 */
593 
594 	if (vm_map_is_64bit(map)) {
595 		current_abi = VM_ABI_64;
596 	} else {
597 		current_abi = VM_ABI_32;
598 	}
599 
600 	/*
601 	 * Determine if we should allow the execution based on whether it's a
602 	 * stack or data area and the current architecture.
603 	 */
604 
605 	if (user_tag == VM_MEMORY_STACK) {
606 		return allow_stack_exec & current_abi;
607 	}
608 
609 	return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
610 }
611 
612 
613 /*
614  *	Virtual memory maps provide for the mapping, protection,
615  *	and sharing of virtual memory objects.  In addition,
616  *	this module provides for an efficient virtual copy of
617  *	memory from one map to another.
618  *
619  *	Synchronization is required prior to most operations.
620  *
621  *	Maps consist of an ordered doubly-linked list of simple
622  *	entries; a single hint is used to speed up lookups.
623  *
624  *	Sharing maps have been deleted from this version of Mach.
625  *	All shared objects are now mapped directly into the respective
626  *	maps.  This requires a change in the copy on write strategy;
627  *	the asymmetric (delayed) strategy is used for shared temporary
628  *	objects instead of the symmetric (shadow) strategy.  All maps
629  *	are now "top level" maps (either task map, kernel map or submap
630  *	of the kernel map).
631  *
632  *	Since portions of maps are specified by start/end addreses,
633  *	which may not align with existing map entries, all
634  *	routines merely "clip" entries to these start/end values.
635  *	[That is, an entry is split into two, bordering at a
636  *	start or end value.]  Note that these clippings may not
637  *	always be necessary (as the two resulting entries are then
638  *	not changed); however, the clipping is done for convenience.
639  *	No attempt is currently made to "glue back together" two
640  *	abutting entries.
641  *
642  *	The symmetric (shadow) copy strategy implements virtual copy
643  *	by copying VM object references from one map to
644  *	another, and then marking both regions as copy-on-write.
645  *	It is important to note that only one writeable reference
646  *	to a VM object region exists in any map when this strategy
647  *	is used -- this means that shadow object creation can be
648  *	delayed until a write operation occurs.  The symmetric (delayed)
649  *	strategy allows multiple maps to have writeable references to
650  *	the same region of a vm object, and hence cannot delay creating
651  *	its copy objects.  See vm_object_copy_quickly() in vm_object.c.
652  *	Copying of permanent objects is completely different; see
653  *	vm_object_copy_strategically() in vm_object.c.
654  */
655 
656 static SECURITY_READ_ONLY_LATE(zone_t) vm_map_zone;       /* zone for vm_map structures */
657 static SECURITY_READ_ONLY_LATE(zone_t) vm_map_copy_zone;  /* zone for vm_map_copy structures */
658 
659 SECURITY_READ_ONLY_LATE(zone_t)        vm_map_entry_zone; /* zone for vm_map_entry structures */
660 SECURITY_READ_ONLY_LATE(zone_t)        vm_map_holes_zone; /* zone for vm map holes (vm_map_links) structures */
661 #if HAVE_VM_MAP_RESERVED_ENTRY_ZONE
662 SECURITY_READ_ONLY_LATE(zone_t)        vm_map_entry_reserved_zone;
663 #endif /* HAVE_VM_MAP_RESERVED_ENTRY_ZONE */
664 
665 #define VM_MAP_ZONE_NAME "maps"
666 #define VM_MAP_ZFLAGS ( \
667 	ZC_NOENCRYPT | \
668 	ZC_NOGZALLOC | \
669 	ZC_ALLOW_FOREIGN)
670 
671 #define VM_MAP_ENTRY_ZONE_NAME "VM map entries"
672 #define VM_MAP_ENTRY_ZFLAGS ( \
673 	ZC_NOENCRYPT | \
674 	ZC_CACHING | \
675 	ZC_NOGZALLOC | \
676 	ZC_KASAN_NOQUARANTINE | \
677 	ZC_VM_LP64 | \
678 	ZC_ALLOW_FOREIGN)
679 
680 #if HAVE_VM_MAP_RESERVED_ENTRY_ZONE
681 #define VM_MAP_ENTRY_RESERVED_ZONE_NAME "Reserved VM map entries"
682 #define VM_MAP_ENTRY_RESERVED_ZFLAGS ( \
683 	ZC_NOENCRYPT | \
684 	ZC_NOCACHING | \
685 	ZC_NOGZALLOC | \
686 	ZC_KASAN_NOQUARANTINE | \
687 	ZC_VM)
688 #endif /* HAVE_VM_MAP_RESERVED_ENTRY_ZONE */
689 
690 #define VM_MAP_HOLES_ZONE_NAME "VM map holes"
691 #define VM_MAP_HOLES_ZFLAGS ( \
692 	ZC_NOENCRYPT | \
693 	ZC_CACHING | \
694 	ZC_NOGZALLOC | \
695 	ZC_KASAN_NOQUARANTINE | \
696 	ZC_VM_LP64 | \
697 	ZC_ALLOW_FOREIGN)
698 
699 /*
700  * Asserts that a vm_map_copy object is coming from the
701  * vm_map_copy_zone to ensure that it isn't a fake constructed
702  * anywhere else.
703  */
704 static inline void
vm_map_copy_require(struct vm_map_copy * copy)705 vm_map_copy_require(struct vm_map_copy *copy)
706 {
707 	zone_id_require(ZONE_ID_VM_MAP_COPY, sizeof(struct vm_map_copy), copy);
708 }
709 
710 /*
711  *	vm_map_require:
712  *
713  *	Ensures that the argument is memory allocated from the genuine
714  *	vm map zone. (See zone_id_require_allow_foreign).
715  */
716 void
vm_map_require(vm_map_t map)717 vm_map_require(vm_map_t map)
718 {
719 	zone_id_require_allow_foreign(ZONE_ID_VM_MAP, sizeof(struct _vm_map), map);
720 }
721 
722 static __startup_data vm_offset_t      map_data;
723 static __startup_data vm_size_t        map_data_size;
724 static __startup_data vm_offset_t      kentry_data;
725 static __startup_data vm_size_t        kentry_data_size;
726 static __startup_data vm_offset_t      map_holes_data;
727 static __startup_data vm_size_t        map_holes_data_size;
728 
729 #if XNU_TARGET_OS_OSX
730 #define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
731 #else /* XNU_TARGET_OS_OSX */
732 #define         NO_COALESCE_LIMIT  0
733 #endif /* XNU_TARGET_OS_OSX */
734 
735 /* Skip acquiring locks if we're in the midst of a kernel core dump */
736 unsigned int not_in_kdp = 1;
737 
738 unsigned int vm_map_set_cache_attr_count = 0;
739 
740 kern_return_t
vm_map_set_cache_attr(vm_map_t map,vm_map_offset_t va)741 vm_map_set_cache_attr(
742 	vm_map_t        map,
743 	vm_map_offset_t va)
744 {
745 	vm_map_entry_t  map_entry;
746 	vm_object_t     object;
747 	kern_return_t   kr = KERN_SUCCESS;
748 
749 	vm_map_lock_read(map);
750 
751 	if (!vm_map_lookup_entry(map, va, &map_entry) ||
752 	    map_entry->is_sub_map) {
753 		/*
754 		 * that memory is not properly mapped
755 		 */
756 		kr = KERN_INVALID_ARGUMENT;
757 		goto done;
758 	}
759 	object = VME_OBJECT(map_entry);
760 
761 	if (object == VM_OBJECT_NULL) {
762 		/*
763 		 * there should be a VM object here at this point
764 		 */
765 		kr = KERN_INVALID_ARGUMENT;
766 		goto done;
767 	}
768 	vm_object_lock(object);
769 	object->set_cache_attr = TRUE;
770 	vm_object_unlock(object);
771 
772 	vm_map_set_cache_attr_count++;
773 done:
774 	vm_map_unlock_read(map);
775 
776 	return kr;
777 }
778 
779 
780 #if CONFIG_CODE_DECRYPTION
781 /*
782  * vm_map_apple_protected:
783  * This remaps the requested part of the object with an object backed by
784  * the decrypting pager.
785  * crypt_info contains entry points and session data for the crypt module.
786  * The crypt_info block will be copied by vm_map_apple_protected. The data structures
787  * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
788  */
789 kern_return_t
vm_map_apple_protected(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_object_offset_t crypto_backing_offset,struct pager_crypt_info * crypt_info,uint32_t cryptid)790 vm_map_apple_protected(
791 	vm_map_t                map,
792 	vm_map_offset_t         start,
793 	vm_map_offset_t         end,
794 	vm_object_offset_t      crypto_backing_offset,
795 	struct pager_crypt_info *crypt_info,
796 	uint32_t                cryptid)
797 {
798 	boolean_t       map_locked;
799 	kern_return_t   kr;
800 	vm_map_entry_t  map_entry;
801 	struct vm_map_entry tmp_entry;
802 	memory_object_t unprotected_mem_obj;
803 	vm_object_t     protected_object;
804 	vm_map_offset_t map_addr;
805 	vm_map_offset_t start_aligned, end_aligned;
806 	vm_object_offset_t      crypto_start, crypto_end;
807 	int             vm_flags;
808 	vm_map_kernel_flags_t vmk_flags;
809 	boolean_t       cache_pager;
810 
811 	vm_flags = 0;
812 	vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
813 
814 	map_locked = FALSE;
815 	unprotected_mem_obj = MEMORY_OBJECT_NULL;
816 
817 	start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
818 	end_aligned = vm_map_round_page(end, PAGE_MASK_64);
819 	start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
820 	end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
821 
822 #if __arm64__
823 	/*
824 	 * "start" and "end" might be 4K-aligned but not 16K-aligned,
825 	 * so we might have to loop and establish up to 3 mappings:
826 	 *
827 	 * + the first 16K-page, which might overlap with the previous
828 	 *   4K-aligned mapping,
829 	 * + the center,
830 	 * + the last 16K-page, which might overlap with the next
831 	 *   4K-aligned mapping.
832 	 * Each of these mapping might be backed by a vnode pager (if
833 	 * properly page-aligned) or a "fourk_pager", itself backed by a
834 	 * vnode pager (if 4K-aligned but not page-aligned).
835 	 */
836 #endif /* __arm64__ */
837 
838 	map_addr = start_aligned;
839 	for (map_addr = start_aligned;
840 	    map_addr < end;
841 	    map_addr = tmp_entry.vme_end) {
842 		vm_map_lock(map);
843 		map_locked = TRUE;
844 
845 		/* lookup the protected VM object */
846 		if (!vm_map_lookup_entry(map,
847 		    map_addr,
848 		    &map_entry) ||
849 		    map_entry->is_sub_map ||
850 		    VME_OBJECT(map_entry) == VM_OBJECT_NULL) {
851 			/* that memory is not properly mapped */
852 			kr = KERN_INVALID_ARGUMENT;
853 			goto done;
854 		}
855 
856 		/* ensure mapped memory is mapped as executable except
857 		 *  except for model decryption flow */
858 		if ((cryptid != CRYPTID_MODEL_ENCRYPTION) &&
859 		    !(map_entry->protection & VM_PROT_EXECUTE)) {
860 			kr = KERN_INVALID_ARGUMENT;
861 			goto done;
862 		}
863 
864 		/* get the protected object to be decrypted */
865 		protected_object = VME_OBJECT(map_entry);
866 		if (protected_object == VM_OBJECT_NULL) {
867 			/* there should be a VM object here at this point */
868 			kr = KERN_INVALID_ARGUMENT;
869 			goto done;
870 		}
871 		/* ensure protected object stays alive while map is unlocked */
872 		vm_object_reference(protected_object);
873 
874 		/* limit the map entry to the area we want to cover */
875 		vm_map_clip_start(map, map_entry, start_aligned);
876 		vm_map_clip_end(map, map_entry, end_aligned);
877 
878 		tmp_entry = *map_entry;
879 		map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
880 		vm_map_unlock(map);
881 		map_locked = FALSE;
882 
883 		/*
884 		 * This map entry might be only partially encrypted
885 		 * (if not fully "page-aligned").
886 		 */
887 		crypto_start = 0;
888 		crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
889 		if (tmp_entry.vme_start < start) {
890 			if (tmp_entry.vme_start != start_aligned) {
891 				kr = KERN_INVALID_ADDRESS;
892 			}
893 			crypto_start += (start - tmp_entry.vme_start);
894 		}
895 		if (tmp_entry.vme_end > end) {
896 			if (tmp_entry.vme_end != end_aligned) {
897 				kr = KERN_INVALID_ADDRESS;
898 			}
899 			crypto_end -= (tmp_entry.vme_end - end);
900 		}
901 
902 		/*
903 		 * This "extra backing offset" is needed to get the decryption
904 		 * routine to use the right key.  It adjusts for the possibly
905 		 * relative offset of an interposed "4K" pager...
906 		 */
907 		if (crypto_backing_offset == (vm_object_offset_t) -1) {
908 			crypto_backing_offset = VME_OFFSET(&tmp_entry);
909 		}
910 
911 		cache_pager = TRUE;
912 #if XNU_TARGET_OS_OSX
913 		if (vm_map_is_alien(map)) {
914 			cache_pager = FALSE;
915 		}
916 #endif /* XNU_TARGET_OS_OSX */
917 
918 		/*
919 		 * Lookup (and create if necessary) the protected memory object
920 		 * matching that VM object.
921 		 * If successful, this also grabs a reference on the memory object,
922 		 * to guarantee that it doesn't go away before we get a chance to map
923 		 * it.
924 		 */
925 		unprotected_mem_obj = apple_protect_pager_setup(
926 			protected_object,
927 			VME_OFFSET(&tmp_entry),
928 			crypto_backing_offset,
929 			crypt_info,
930 			crypto_start,
931 			crypto_end,
932 			cache_pager);
933 
934 		/* release extra ref on protected object */
935 		vm_object_deallocate(protected_object);
936 
937 		if (unprotected_mem_obj == NULL) {
938 			kr = KERN_FAILURE;
939 			goto done;
940 		}
941 
942 		vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
943 		/* can overwrite an immutable mapping */
944 		vmk_flags.vmkf_overwrite_immutable = TRUE;
945 #if __arm64__
946 		if (tmp_entry.used_for_jit &&
947 		    (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
948 		    PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
949 		    fourk_binary_compatibility_unsafe &&
950 		    fourk_binary_compatibility_allow_wx) {
951 			printf("** FOURK_COMPAT [%d]: "
952 			    "allowing write+execute at 0x%llx\n",
953 			    proc_selfpid(), tmp_entry.vme_start);
954 			vmk_flags.vmkf_map_jit = TRUE;
955 		}
956 #endif /* __arm64__ */
957 
958 		/* map this memory object in place of the current one */
959 		map_addr = tmp_entry.vme_start;
960 		kr = vm_map_enter_mem_object(map,
961 		    &map_addr,
962 		    (tmp_entry.vme_end -
963 		    tmp_entry.vme_start),
964 		    (mach_vm_offset_t) 0,
965 		    vm_flags,
966 		    vmk_flags,
967 		    VM_KERN_MEMORY_NONE,
968 		    (ipc_port_t)(uintptr_t) unprotected_mem_obj,
969 		    0,
970 		    TRUE,
971 		    tmp_entry.protection,
972 		    tmp_entry.max_protection,
973 		    tmp_entry.inheritance);
974 		assertf(kr == KERN_SUCCESS,
975 		    "kr = 0x%x\n", kr);
976 		assertf(map_addr == tmp_entry.vme_start,
977 		    "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
978 		    (uint64_t)map_addr,
979 		    (uint64_t) tmp_entry.vme_start,
980 		    &tmp_entry);
981 
982 #if VM_MAP_DEBUG_APPLE_PROTECT
983 		if (vm_map_debug_apple_protect) {
984 			printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
985 			    " backing:[object:%p,offset:0x%llx,"
986 			    "crypto_backing_offset:0x%llx,"
987 			    "crypto_start:0x%llx,crypto_end:0x%llx]\n",
988 			    map,
989 			    (uint64_t) map_addr,
990 			    (uint64_t) (map_addr + (tmp_entry.vme_end -
991 			    tmp_entry.vme_start)),
992 			    unprotected_mem_obj,
993 			    protected_object,
994 			    VME_OFFSET(&tmp_entry),
995 			    crypto_backing_offset,
996 			    crypto_start,
997 			    crypto_end);
998 		}
999 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1000 
1001 		/*
1002 		 * Release the reference obtained by
1003 		 * apple_protect_pager_setup().
1004 		 * The mapping (if it succeeded) is now holding a reference on
1005 		 * the memory object.
1006 		 */
1007 		memory_object_deallocate(unprotected_mem_obj);
1008 		unprotected_mem_obj = MEMORY_OBJECT_NULL;
1009 
1010 		/* continue with next map entry */
1011 		crypto_backing_offset += (tmp_entry.vme_end -
1012 		    tmp_entry.vme_start);
1013 		crypto_backing_offset -= crypto_start;
1014 	}
1015 	kr = KERN_SUCCESS;
1016 
1017 done:
1018 	if (map_locked) {
1019 		vm_map_unlock(map);
1020 	}
1021 	return kr;
1022 }
1023 #endif  /* CONFIG_CODE_DECRYPTION */
1024 
1025 
1026 LCK_GRP_DECLARE(vm_map_lck_grp, "vm_map");
1027 LCK_ATTR_DECLARE(vm_map_lck_attr, 0, 0);
1028 LCK_ATTR_DECLARE(vm_map_lck_rw_attr, 0, LCK_ATTR_DEBUG);
1029 
1030 #if XNU_TARGET_OS_OSX
1031 int malloc_no_cow = 0;
1032 #else /* XNU_TARGET_OS_OSX */
1033 int malloc_no_cow = 1;
1034 #endif /* XNU_TARGET_OS_OSX */
1035 uint64_t vm_memory_malloc_no_cow_mask = 0ULL;
1036 #if DEBUG
1037 int vm_check_map_sanity = 0;
1038 #endif
1039 
1040 /*
1041  *	vm_map_init:
1042  *
1043  *	Initialize the vm_map module.  Must be called before
1044  *	any other vm_map routines.
1045  *
1046  *	Map and entry structures are allocated from zones -- we must
1047  *	initialize those zones.
1048  *
1049  *	There are three zones of interest:
1050  *
1051  *	vm_map_zone:		used to allocate maps.
1052  *	vm_map_entry_zone:	used to allocate map entries.
1053  *
1054  *	LP32:
1055  *	vm_map_entry_reserved_zone:     fallback zone for kernel map entries
1056  *
1057  *	The kernel allocates map entries from a special zone that is initially
1058  *	"crammed" with memory.  It would be difficult (perhaps impossible) for
1059  *	the kernel to allocate more memory to a entry zone when it became
1060  *	empty since the very act of allocating memory implies the creation
1061  *	of a new entry.
1062  */
1063 __startup_func
1064 void
vm_map_init(void)1065 vm_map_init(void)
1066 {
1067 
1068 #if MACH_ASSERT
1069 	PE_parse_boot_argn("debug4k_filter", &debug4k_filter,
1070 	    sizeof(debug4k_filter));
1071 #endif /* MACH_ASSERT */
1072 
1073 	vm_map_zone = zone_create_ext(VM_MAP_ZONE_NAME, sizeof(struct _vm_map),
1074 	    VM_MAP_ZFLAGS, ZONE_ID_VM_MAP, NULL);
1075 
1076 	/*
1077 	 * Don't quarantine because we always need elements available
1078 	 * Disallow GC on this zone... to aid the GC.
1079 	 */
1080 	vm_map_entry_zone = zone_create_ext(VM_MAP_ENTRY_ZONE_NAME,
1081 	    sizeof(struct vm_map_entry), VM_MAP_ENTRY_ZFLAGS,
1082 	    ZONE_ID_VM_MAP_ENTRY, ^(zone_t z) {
1083 		z->z_elems_rsv = (uint16_t)(32 *
1084 		(ml_early_cpu_max_number() + 1));
1085 	});
1086 #if HAVE_VM_MAP_RESERVED_ENTRY_ZONE
1087 	vm_map_entry_reserved_zone = zone_create(VM_MAP_ENTRY_RESERVED_ZONE_NAME,
1088 	    sizeof(struct vm_map_entry), VM_MAP_ENTRY_RESERVED_ZFLAGS);
1089 #endif /* HAVE_VM_MAP_RESERVED_ENTRY_ZONE */
1090 
1091 	vm_map_holes_zone = zone_create_ext(VM_MAP_HOLES_ZONE_NAME,
1092 	    sizeof(struct vm_map_links), VM_MAP_HOLES_ZFLAGS,
1093 	    ZONE_ID_VM_MAP_HOLES, ^(zone_t z) {
1094 		z->z_elems_rsv = (uint16_t)(16 * 1024 / zone_elem_size(z));
1095 	});
1096 
1097 	vm_map_copy_zone = zone_create_ext("VM map copies", sizeof(struct vm_map_copy),
1098 	    ZC_NOENCRYPT | ZC_CACHING, ZONE_ID_VM_MAP_COPY, NULL);
1099 
1100 	/*
1101 	 * Add the stolen memory to zones, adjust zone size and stolen counts.
1102 	 */
1103 	zone_cram_foreign(vm_map_zone, map_data, map_data_size);
1104 	zone_cram_foreign(vm_map_entry_zone, kentry_data, kentry_data_size);
1105 	zone_cram_foreign(vm_map_holes_zone, map_holes_data, map_holes_data_size);
1106 	printf("VM boostrap: %d maps, %d entries and %d holes available\n",
1107 	    vm_map_zone->z_elems_free,
1108 	    vm_map_entry_zone->z_elems_free,
1109 	    vm_map_holes_zone->z_elems_free);
1110 
1111 	/*
1112 	 * Since these are covered by zones, remove them from stolen page accounting.
1113 	 */
1114 	VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
1115 
1116 #if VM_MAP_DEBUG_APPLE_PROTECT
1117 	PE_parse_boot_argn("vm_map_debug_apple_protect",
1118 	    &vm_map_debug_apple_protect,
1119 	    sizeof(vm_map_debug_apple_protect));
1120 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1121 #if VM_MAP_DEBUG_APPLE_FOURK
1122 	PE_parse_boot_argn("vm_map_debug_fourk",
1123 	    &vm_map_debug_fourk,
1124 	    sizeof(vm_map_debug_fourk));
1125 #endif /* VM_MAP_DEBUG_FOURK */
1126 	PE_parse_boot_argn("vm_map_executable_immutable",
1127 	    &vm_map_executable_immutable,
1128 	    sizeof(vm_map_executable_immutable));
1129 	PE_parse_boot_argn("vm_map_executable_immutable_verbose",
1130 	    &vm_map_executable_immutable_verbose,
1131 	    sizeof(vm_map_executable_immutable_verbose));
1132 
1133 	PE_parse_boot_argn("malloc_no_cow",
1134 	    &malloc_no_cow,
1135 	    sizeof(malloc_no_cow));
1136 	if (malloc_no_cow) {
1137 		vm_memory_malloc_no_cow_mask = 0ULL;
1138 		vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC;
1139 		vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_SMALL;
1140 		vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_MEDIUM;
1141 		vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE;
1142 //		vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
1143 //		vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
1144 		vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_TINY;
1145 		vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE;
1146 		vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED;
1147 		vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_NANO;
1148 //		vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
1149 		PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
1150 		    &vm_memory_malloc_no_cow_mask,
1151 		    sizeof(vm_memory_malloc_no_cow_mask));
1152 	}
1153 
1154 #if DEBUG
1155 	PE_parse_boot_argn("vm_check_map_sanity", &vm_check_map_sanity, sizeof(vm_check_map_sanity));
1156 	if (vm_check_map_sanity) {
1157 		kprintf("VM sanity checking enabled\n");
1158 	} else {
1159 		kprintf("VM sanity checking disabled. Set bootarg vm_check_map_sanity=1 to enable\n");
1160 	}
1161 #endif /* DEBUG */
1162 
1163 #if DEVELOPMENT || DEBUG
1164 	PE_parse_boot_argn("panic_on_unsigned_execute",
1165 	    &panic_on_unsigned_execute,
1166 	    sizeof(panic_on_unsigned_execute));
1167 	PE_parse_boot_argn("panic_on_mlock_failure",
1168 	    &panic_on_mlock_failure,
1169 	    sizeof(panic_on_mlock_failure));
1170 #endif /* DEVELOPMENT || DEBUG */
1171 }
1172 
1173 __startup_func
1174 static void
vm_map_steal_memory(void)1175 vm_map_steal_memory(void)
1176 {
1177 	uint16_t kentry_initial_pages;
1178 	uint16_t zone_foreign_pages;
1179 	bool overloaded = false;
1180 
1181 	/*
1182 	 * 1 page of maps and holes is enough for early boot
1183 	 *
1184 	 * Those early crams are only needed to bootstrap zones
1185 	 * until zone_init() has run (STARTUP_RANK_FIRST of ZALLOC).
1186 	 * After that point, zones know how to allocate vm map entries,
1187 	 * holes, and maps.
1188 	 */
1189 	map_data_size = zone_get_foreign_alloc_size(VM_MAP_ZONE_NAME,
1190 	    sizeof(struct _vm_map), VM_MAP_ZFLAGS, 1);
1191 
1192 	map_holes_data_size = zone_get_foreign_alloc_size(VM_MAP_HOLES_ZONE_NAME,
1193 	    sizeof(struct vm_map_links), VM_MAP_HOLES_ZFLAGS, 1);
1194 
1195 	/*
1196 	 * kentry_initial_pages corresponds to the number of kernel map entries
1197 	 * required during bootstrap for the duration of zone_init().
1198 	 */
1199 #if defined(__LP64__)
1200 	kentry_initial_pages = (uint16_t)atop(10 * 4096);
1201 #else
1202 	kentry_initial_pages = 6;
1203 #endif
1204 
1205 #if CONFIG_GZALLOC
1206 	/*
1207 	 * If using the guard allocator, reserve more memory for the kernel
1208 	 * reserved map entry pool.
1209 	 */
1210 	if (gzalloc_enabled()) {
1211 		kentry_initial_pages *= 100;
1212 		overloaded = true;
1213 	}
1214 #endif
1215 	if (PE_parse_boot_argn("zone_foreign_pages", &zone_foreign_pages,
1216 	    sizeof(zone_foreign_pages))) {
1217 		kentry_initial_pages = zone_foreign_pages;
1218 		overloaded = true;
1219 	}
1220 
1221 	kentry_data_size = zone_get_foreign_alloc_size(VM_MAP_ENTRY_ZONE_NAME,
1222 	    sizeof(struct vm_map_entry), VM_MAP_ENTRY_ZFLAGS,
1223 	    kentry_initial_pages);
1224 
1225 	/*
1226 	 * Steal a contiguous range of memory so that a simple range check
1227 	 * can validate foreign addresses being freed/crammed to these
1228 	 * zones
1229 	 */
1230 	vm_size_t total_size;
1231 	if (os_add3_overflow(map_data_size, kentry_data_size,
1232 	    map_holes_data_size, &total_size)) {
1233 		panic("vm_map_steal_memory: overflow in amount of memory requested");
1234 	}
1235 	map_data = zone_foreign_mem_init(total_size, overloaded);
1236 	kentry_data = map_data + map_data_size;
1237 	map_holes_data = kentry_data + kentry_data_size;
1238 }
1239 STARTUP(PMAP_STEAL, STARTUP_RANK_FIRST, vm_map_steal_memory);
1240 
1241 __startup_func
1242 static void
vm_kernel_boostraped(void)1243 vm_kernel_boostraped(void)
1244 {
1245 	printf("VM bootstrap done: %d maps, %d entries and %d holes left\n",
1246 	    vm_map_zone->z_elems_free,
1247 	    vm_map_entry_zone->z_elems_free,
1248 	    vm_map_holes_zone->z_elems_free);
1249 }
1250 STARTUP(ZALLOC, STARTUP_RANK_SECOND, vm_kernel_boostraped);
1251 
1252 void
vm_map_disable_hole_optimization(vm_map_t map)1253 vm_map_disable_hole_optimization(vm_map_t map)
1254 {
1255 	vm_map_entry_t  head_entry, hole_entry, next_hole_entry;
1256 
1257 	if (map->holelistenabled) {
1258 		head_entry = hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1259 
1260 		while (hole_entry != NULL) {
1261 			next_hole_entry = hole_entry->vme_next;
1262 
1263 			hole_entry->vme_next = NULL;
1264 			hole_entry->vme_prev = NULL;
1265 			zfree(vm_map_holes_zone, hole_entry);
1266 
1267 			if (next_hole_entry == head_entry) {
1268 				hole_entry = NULL;
1269 			} else {
1270 				hole_entry = next_hole_entry;
1271 			}
1272 		}
1273 
1274 		map->holes_list = NULL;
1275 		map->holelistenabled = FALSE;
1276 
1277 		map->first_free = vm_map_first_entry(map);
1278 		SAVE_HINT_HOLE_WRITE(map, NULL);
1279 	}
1280 }
1281 
1282 boolean_t
vm_kernel_map_is_kernel(vm_map_t map)1283 vm_kernel_map_is_kernel(vm_map_t map)
1284 {
1285 	return map->pmap == kernel_pmap;
1286 }
1287 
1288 /*
1289  *	vm_map_create:
1290  *
1291  *	Creates and returns a new empty VM map with
1292  *	the given physical map structure, and having
1293  *	the given lower and upper address bounds.
1294  */
1295 
1296 vm_map_t
vm_map_create(pmap_t pmap,vm_map_offset_t min,vm_map_offset_t max,boolean_t pageable)1297 vm_map_create(
1298 	pmap_t          pmap,
1299 	vm_map_offset_t min,
1300 	vm_map_offset_t max,
1301 	boolean_t       pageable)
1302 {
1303 	int options;
1304 
1305 	options = 0;
1306 	if (pageable) {
1307 		options |= VM_MAP_CREATE_PAGEABLE;
1308 	}
1309 	return vm_map_create_options(pmap, min, max, options);
1310 }
1311 
1312 vm_map_t
vm_map_create_options(pmap_t pmap,vm_map_offset_t min,vm_map_offset_t max,int options)1313 vm_map_create_options(
1314 	pmap_t          pmap,
1315 	vm_map_offset_t min,
1316 	vm_map_offset_t max,
1317 	int             options)
1318 {
1319 	vm_map_t        result;
1320 	struct vm_map_links     *hole_entry = NULL;
1321 
1322 	if (options & ~(VM_MAP_CREATE_ALL_OPTIONS)) {
1323 		/* unknown option */
1324 		return VM_MAP_NULL;
1325 	}
1326 
1327 	result = zalloc_flags(vm_map_zone, Z_WAITOK | Z_NOFAIL);
1328 
1329 	vm_map_first_entry(result) = vm_map_to_entry(result);
1330 	vm_map_last_entry(result)  = vm_map_to_entry(result);
1331 	result->hdr.nentries = 0;
1332 	if (options & VM_MAP_CREATE_PAGEABLE) {
1333 		result->hdr.entries_pageable = TRUE;
1334 	} else {
1335 		result->hdr.entries_pageable = FALSE;
1336 	}
1337 
1338 	vm_map_store_init( &(result->hdr));
1339 
1340 	result->hdr.page_shift = PAGE_SHIFT;
1341 
1342 	result->size = 0;
1343 	result->size_limit = RLIM_INFINITY;             /* default unlimited */
1344 	result->data_limit = RLIM_INFINITY;             /* default unlimited */
1345 	result->user_wire_limit = MACH_VM_MAX_ADDRESS;  /* default limit is unlimited */
1346 	result->user_wire_size  = 0;
1347 #if XNU_TARGET_OS_OSX
1348 	result->vmmap_high_start = 0;
1349 #endif
1350 	os_ref_init_count(&result->map_refcnt, &map_refgrp, 1);
1351 	result->pmap = pmap;
1352 	result->min_offset = min;
1353 	result->max_offset = max;
1354 	result->wiring_required = FALSE;
1355 	result->no_zero_fill = FALSE;
1356 	result->mapped_in_other_pmaps = FALSE;
1357 	result->wait_for_space = FALSE;
1358 	result->switch_protect = FALSE;
1359 	result->disable_vmentry_reuse = FALSE;
1360 	result->map_disallow_data_exec = FALSE;
1361 	result->is_nested_map = FALSE;
1362 	result->map_disallow_new_exec = FALSE;
1363 	result->terminated = FALSE;
1364 	result->cs_enforcement = FALSE;
1365 	result->cs_debugged = FALSE;
1366 	result->highest_entry_end = 0;
1367 	result->first_free = vm_map_to_entry(result);
1368 	result->hint = vm_map_to_entry(result);
1369 	result->jit_entry_exists = FALSE;
1370 	result->is_alien = FALSE;
1371 	result->reserved_regions = FALSE;
1372 	result->single_jit = FALSE;
1373 
1374 	/* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1375 	if (options & VM_MAP_CREATE_CORPSE_FOOTPRINT) {
1376 		result->has_corpse_footprint = TRUE;
1377 		result->holelistenabled = FALSE;
1378 		result->vmmap_corpse_footprint = NULL;
1379 	} else if (startup_phase >= STARTUP_SUB_ZALLOC) {
1380 		hole_entry = zalloc(vm_map_holes_zone);
1381 
1382 		hole_entry->start = min;
1383 #if defined(__arm__) || defined(__arm64__)
1384 		hole_entry->end = result->max_offset;
1385 #else
1386 		hole_entry->end = MAX(max, (vm_map_offset_t)MACH_VM_MAX_ADDRESS);
1387 #endif
1388 		result->holes_list = result->hole_hint = hole_entry;
1389 		hole_entry->prev = hole_entry->next = CAST_TO_VM_MAP_ENTRY(hole_entry);
1390 		result->holelistenabled = TRUE;
1391 		result->has_corpse_footprint = FALSE;
1392 	} else {
1393 		result->holelistenabled = FALSE;
1394 		result->has_corpse_footprint = FALSE;
1395 	}
1396 
1397 	vm_map_lock_init(result);
1398 	lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
1399 
1400 	return result;
1401 }
1402 
1403 vm_map_size_t
vm_map_adjusted_size(vm_map_t map)1404 vm_map_adjusted_size(vm_map_t map)
1405 {
1406 	struct vm_reserved_region *regions = NULL;
1407 	size_t num_regions = 0;
1408 	mach_vm_size_t  reserved_size = 0, map_size = 0;
1409 
1410 	if (map == NULL || (map->size == 0)) {
1411 		return 0;
1412 	}
1413 
1414 	map_size = map->size;
1415 
1416 	if (map->reserved_regions == FALSE || !vm_map_is_exotic(map) || map->terminated) {
1417 		/*
1418 		 * No special reserved regions or not an exotic map or the task
1419 		 * is terminating and these special regions might have already
1420 		 * been deallocated.
1421 		 */
1422 		return map_size;
1423 	}
1424 
1425 	num_regions = ml_get_vm_reserved_regions(vm_map_is_64bit(map), &regions);
1426 	assert((num_regions == 0) || (num_regions > 0 && regions != NULL));
1427 
1428 	while (num_regions) {
1429 		reserved_size += regions[--num_regions].vmrr_size;
1430 	}
1431 
1432 	/*
1433 	 * There are a few places where the map is being switched out due to
1434 	 * 'termination' without that bit being set (e.g. exec and corpse purging).
1435 	 * In those cases, we could have the map's regions being deallocated on
1436 	 * a core while some accounting process is trying to get the map's size.
1437 	 * So this assert can't be enabled till all those places are uniform in
1438 	 * their use of the 'map->terminated' bit.
1439 	 *
1440 	 * assert(map_size >= reserved_size);
1441 	 */
1442 
1443 	return (map_size >= reserved_size) ? (map_size - reserved_size) : map_size;
1444 }
1445 
1446 /*
1447  *	vm_map_entry_create:	[ internal use only ]
1448  *
1449  *	Allocates a VM map entry for insertion in the
1450  *	given map (or map copy).  No fields are filled.
1451  */
1452 #define vm_map_entry_create(map, map_locked)    _vm_map_entry_create(&(map)->hdr, map_locked)
1453 
1454 #define vm_map_copy_entry_create(copy, map_locked)                                      \
1455 	_vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1456 
1457 static vm_map_entry_t
_vm_map_entry_create(struct vm_map_header * map_header __unused,boolean_t map_locked __unused)1458 _vm_map_entry_create(
1459 	struct vm_map_header    *map_header __unused,
1460 	boolean_t               map_locked __unused)
1461 {
1462 	vm_map_entry_t  entry = NULL;
1463 	zone_t zone = vm_map_entry_zone;
1464 
1465 	assert(map_header->entries_pageable ? !map_locked : TRUE);
1466 
1467 #if HAVE_VM_MAP_RESERVED_ENTRY_ZONE
1468 	zone_security_flags_t zsflags = zone_security_array[ZONE_ID_VM_MAP_ENTRY];
1469 	if (map_header == &zone_submap(zsflags)->hdr) {
1470 		/*
1471 		 * If we are trying to allocate an entry for the submap
1472 		 * of the vm_map_entry_zone, then this can cause recursive
1473 		 * locking of this map.
1474 		 *
1475 		 * Try to allocate _without blocking_ from this zone,
1476 		 * but if it is depleted, we need to go to the
1477 		 * vm_map_entry_reserved_zone which is in the zalloc
1478 		 * "VM" submap, which can grow without taking any map lock.
1479 		 *
1480 		 * Note: the vm_map_entry_zone has a rather high "reserve"
1481 		 * setup in order to minimize usage of the reserved one.
1482 		 */
1483 		entry = zalloc_flags(vm_map_entry_zone, Z_NOWAIT | Z_ZERO);
1484 		zone = vm_map_entry_reserved_zone;
1485 	}
1486 #endif
1487 	if (entry == NULL) {
1488 		entry = zalloc_flags(zone, Z_WAITOK | Z_ZERO);
1489 	}
1490 
1491 	entry->behavior = VM_BEHAVIOR_DEFAULT;
1492 	entry->inheritance = VM_INHERIT_DEFAULT;
1493 
1494 	vm_map_store_update((vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1495 #if     MAP_ENTRY_CREATION_DEBUG
1496 	entry->vme_creation_maphdr = map_header;
1497 	backtrace(&entry->vme_creation_bt[0],
1498 	    (sizeof(entry->vme_creation_bt) / sizeof(uintptr_t)), NULL, NULL);
1499 #endif
1500 	return entry;
1501 }
1502 
1503 /*
1504  *	vm_map_entry_dispose:	[ internal use only ]
1505  *
1506  *	Inverse of vm_map_entry_create.
1507  *
1508  *      write map lock held so no need to
1509  *	do anything special to insure correctness
1510  *      of the stores
1511  */
1512 #define vm_map_entry_dispose(map, entry)                        \
1513 	_vm_map_entry_dispose(&(map)->hdr, (entry))
1514 
1515 #define vm_map_copy_entry_dispose(copy, entry) \
1516 	_vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1517 
1518 static void
_vm_map_entry_dispose(struct vm_map_header * map_header __unused,vm_map_entry_t entry)1519 _vm_map_entry_dispose(
1520 	struct vm_map_header    *map_header __unused,
1521 	vm_map_entry_t          entry)
1522 {
1523 #if HAVE_VM_MAP_RESERVED_ENTRY_ZONE
1524 	switch (zone_id_for_native_element(entry, sizeof(*entry))) {
1525 	case ZONE_ID_VM_MAP_ENTRY:
1526 	case ZONE_ID_INVALID: /* foreign elements are regular entries always */
1527 		break;
1528 	default:
1529 		zfree(vm_map_entry_reserved_zone, entry);
1530 		return;
1531 	}
1532 #endif /* HAVE_VM_MAP_RESERVED_ENTRY_ZONE */
1533 	zfree(vm_map_entry_zone, entry);
1534 }
1535 
1536 #if MACH_ASSERT
1537 static boolean_t first_free_check = FALSE;
1538 boolean_t
first_free_is_valid(vm_map_t map)1539 first_free_is_valid(
1540 	vm_map_t        map)
1541 {
1542 	if (!first_free_check) {
1543 		return TRUE;
1544 	}
1545 
1546 	return first_free_is_valid_store( map );
1547 }
1548 #endif /* MACH_ASSERT */
1549 
1550 
1551 #define vm_map_copy_entry_link(copy, after_where, entry)                \
1552 	_vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1553 
1554 #define vm_map_copy_entry_unlink(copy, entry)                           \
1555 	_vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1556 
1557 /*
1558  *	vm_map_destroy:
1559  *
1560  *	Actually destroy a map.
1561  */
1562 void
vm_map_destroy(vm_map_t map,int flags)1563 vm_map_destroy(
1564 	vm_map_t        map,
1565 	int             flags)
1566 {
1567 	vm_map_lock(map);
1568 
1569 	/* final cleanup: no need to unnest shared region */
1570 	flags |= VM_MAP_REMOVE_NO_UNNESTING;
1571 	/* final cleanup: ok to remove immutable mappings */
1572 	flags |= VM_MAP_REMOVE_IMMUTABLE;
1573 	/* final cleanup: allow gaps in range */
1574 	flags |= VM_MAP_REMOVE_GAPS_OK;
1575 
1576 	/* clean up regular map entries */
1577 	(void) vm_map_delete(map, map->min_offset, map->max_offset,
1578 	    flags, VM_MAP_NULL);
1579 	/* clean up leftover special mappings (commpage, GPU carveout, etc...) */
1580 #if     !defined(__arm__)
1581 	(void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1582 	    flags, VM_MAP_NULL);
1583 #endif /* !__arm__ */
1584 
1585 	vm_map_disable_hole_optimization(map);
1586 	vm_map_corpse_footprint_destroy(map);
1587 
1588 	vm_map_unlock(map);
1589 
1590 	assert(map->hdr.nentries == 0);
1591 
1592 	if (map->pmap) {
1593 		pmap_destroy(map->pmap);
1594 	}
1595 
1596 #if LOCKS_INDIRECT_ALLOW
1597 	if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1598 		/*
1599 		 * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1600 		 * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1601 		 * structure or kalloc'ed via lck_mtx_init.
1602 		 * An example is s_lock_ext within struct _vm_map.
1603 		 *
1604 		 * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1605 		 * can add another tag to detect embedded vs alloc'ed indirect external
1606 		 * mutexes but that'll be additional checks in the lock path and require
1607 		 * updating dependencies for the old vs new tag.
1608 		 *
1609 		 * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1610 		 * just when lock debugging is ON, we choose to forego explicitly destroying
1611 		 * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1612 		 * count on vm_map_lck_grp, which has no serious side-effect.
1613 		 */
1614 	} else
1615 #endif /* LOCKS_INDIRECT_ALLOW */
1616 	{
1617 		lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1618 		lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1619 	}
1620 
1621 	zfree(vm_map_zone, map);
1622 }
1623 
1624 /*
1625  * Returns pid of the task with the largest number of VM map entries.
1626  * Used in the zone-map-exhaustion jetsam path.
1627  */
1628 pid_t
find_largest_process_vm_map_entries(void)1629 find_largest_process_vm_map_entries(void)
1630 {
1631 	pid_t victim_pid = -1;
1632 	int max_vm_map_entries = 0;
1633 	task_t task = TASK_NULL;
1634 	queue_head_t *task_list = &tasks;
1635 
1636 	lck_mtx_lock(&tasks_threads_lock);
1637 	queue_iterate(task_list, task, task_t, tasks) {
1638 		if (task == kernel_task || !task->active) {
1639 			continue;
1640 		}
1641 
1642 		vm_map_t task_map = task->map;
1643 		if (task_map != VM_MAP_NULL) {
1644 			int task_vm_map_entries = task_map->hdr.nentries;
1645 			if (task_vm_map_entries > max_vm_map_entries) {
1646 				max_vm_map_entries = task_vm_map_entries;
1647 				victim_pid = pid_from_task(task);
1648 			}
1649 		}
1650 	}
1651 	lck_mtx_unlock(&tasks_threads_lock);
1652 
1653 	printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1654 	return victim_pid;
1655 }
1656 
1657 
1658 /*
1659  *	vm_map_lookup_entry:	[ internal use only ]
1660  *
1661  *	Calls into the vm map store layer to find the map
1662  *	entry containing (or immediately preceding) the
1663  *	specified address in the given map; the entry is returned
1664  *	in the "entry" parameter.  The boolean
1665  *	result indicates whether the address is
1666  *	actually contained in the map.
1667  */
1668 boolean_t
vm_map_lookup_entry(vm_map_t map,vm_map_offset_t address,vm_map_entry_t * entry)1669 vm_map_lookup_entry(
1670 	vm_map_t                map,
1671 	vm_map_offset_t address,
1672 	vm_map_entry_t          *entry)         /* OUT */
1673 {
1674 #if CONFIG_KERNEL_TBI
1675 	if (VM_KERNEL_ADDRESS(address)) {
1676 		address = VM_KERNEL_STRIP_UPTR(address);
1677 	}
1678 #endif /* CONFIG_KERNEL_TBI */
1679 	return vm_map_store_lookup_entry( map, address, entry );
1680 }
1681 
1682 /*
1683  *	Routine:	vm_map_find_space
1684  *	Purpose:
1685  *		Allocate a range in the specified virtual address map,
1686  *		returning the entry allocated for that range.
1687  *		Used by kmem_alloc, etc.
1688  *
1689  *		The map must be NOT be locked. It will be returned locked
1690  *		on KERN_SUCCESS, unlocked on failure.
1691  *
1692  *		If an entry is allocated, the object/offset fields
1693  *		are initialized to zero.
1694  *
1695  *      If VM_MAP_FIND_LAST_FREE flag is set, allocate from end of map. This
1696  *      is currently only used for allocating memory for zones backing
1697  *      one of the kalloc heaps.(rdar://65832263)
1698  */
1699 kern_return_t
vm_map_find_space(vm_map_t map,vm_map_offset_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,vm_map_entry_t * o_entry)1700 vm_map_find_space(
1701 	vm_map_t                map,
1702 	vm_map_offset_t         *address,       /* OUT */
1703 	vm_map_size_t           size,
1704 	vm_map_offset_t         mask,
1705 	int                     flags,
1706 	vm_map_kernel_flags_t   vmk_flags,
1707 	vm_tag_t                tag,
1708 	vm_map_entry_t          *o_entry)       /* OUT */
1709 {
1710 	vm_map_entry_t          entry, new_entry, hole_entry;
1711 	vm_map_offset_t         start;
1712 	vm_map_offset_t         end;
1713 
1714 	if (size == 0) {
1715 		*address = 0;
1716 		return KERN_INVALID_ARGUMENT;
1717 	}
1718 
1719 	new_entry = vm_map_entry_create(map, FALSE);
1720 	vm_map_lock(map);
1721 
1722 	if (flags & VM_MAP_FIND_LAST_FREE) {
1723 		assert(!map->disable_vmentry_reuse);
1724 		/* TODO: Make backward lookup generic and support guard pages */
1725 		assert(!vmk_flags.vmkf_guard_after && !vmk_flags.vmkf_guard_before);
1726 		assert(VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map)));
1727 
1728 		/* Allocate space from end of map */
1729 		vm_map_store_find_last_free(map, &entry);
1730 
1731 		if (!entry) {
1732 			goto noSpace;
1733 		}
1734 
1735 		if (entry == vm_map_to_entry(map)) {
1736 			end = map->max_offset;
1737 		} else {
1738 			end = entry->vme_start;
1739 		}
1740 
1741 		while (TRUE) {
1742 			vm_map_entry_t prev;
1743 
1744 			start = end - size;
1745 
1746 			if ((start < map->min_offset) || end < start) {
1747 				goto noSpace;
1748 			}
1749 
1750 			prev = entry->vme_prev;
1751 			entry = prev;
1752 
1753 			if (prev == vm_map_to_entry(map)) {
1754 				break;
1755 			}
1756 
1757 			if (prev->vme_end <= start) {
1758 				break;
1759 			}
1760 
1761 			/*
1762 			 *	Didn't fit -- move to the next entry.
1763 			 */
1764 
1765 			end = entry->vme_start;
1766 		}
1767 	} else {
1768 		if (vmk_flags.vmkf_guard_after) {
1769 			/* account for the back guard page in the size */
1770 			size += VM_MAP_PAGE_SIZE(map);
1771 		}
1772 
1773 		/*
1774 		 *	Look for the first possible address; if there's already
1775 		 *	something at this address, we have to start after it.
1776 		 */
1777 
1778 		if (map->disable_vmentry_reuse == TRUE) {
1779 			VM_MAP_HIGHEST_ENTRY(map, entry, start);
1780 		} else {
1781 			if (map->holelistenabled) {
1782 				hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1783 
1784 				if (hole_entry == NULL) {
1785 					/*
1786 					 * No more space in the map?
1787 					 */
1788 					goto noSpace;
1789 				}
1790 
1791 				entry = hole_entry;
1792 				start = entry->vme_start;
1793 			} else {
1794 				assert(first_free_is_valid(map));
1795 				if ((entry = map->first_free) == vm_map_to_entry(map)) {
1796 					start = map->min_offset;
1797 				} else {
1798 					start = entry->vme_end;
1799 				}
1800 			}
1801 		}
1802 
1803 		/*
1804 		 *	In any case, the "entry" always precedes
1805 		 *	the proposed new region throughout the loop:
1806 		 */
1807 
1808 		while (TRUE) {
1809 			vm_map_entry_t  next;
1810 
1811 			/*
1812 			 *	Find the end of the proposed new region.
1813 			 *	Be sure we didn't go beyond the end, or
1814 			 *	wrap around the address.
1815 			 */
1816 
1817 			if (vmk_flags.vmkf_guard_before) {
1818 				/* reserve space for the front guard page */
1819 				start += VM_MAP_PAGE_SIZE(map);
1820 			}
1821 			end = ((start + mask) & ~mask);
1822 
1823 			if (end < start) {
1824 				goto noSpace;
1825 			}
1826 			start = end;
1827 			assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
1828 			end += size;
1829 			assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
1830 
1831 			if ((end > map->max_offset) || (end < start)) {
1832 				goto noSpace;
1833 			}
1834 
1835 			next = entry->vme_next;
1836 
1837 			if (map->holelistenabled) {
1838 				if (entry->vme_end >= end) {
1839 					break;
1840 				}
1841 			} else {
1842 				/*
1843 				 *	If there are no more entries, we must win.
1844 				 *
1845 				 *	OR
1846 				 *
1847 				 *	If there is another entry, it must be
1848 				 *	after the end of the potential new region.
1849 				 */
1850 
1851 				if (next == vm_map_to_entry(map)) {
1852 					break;
1853 				}
1854 
1855 				if (next->vme_start >= end) {
1856 					break;
1857 				}
1858 			}
1859 
1860 			/*
1861 			 *	Didn't fit -- move to the next entry.
1862 			 */
1863 
1864 			entry = next;
1865 
1866 			if (map->holelistenabled) {
1867 				if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
1868 					/*
1869 					 * Wrapped around
1870 					 */
1871 					goto noSpace;
1872 				}
1873 				start = entry->vme_start;
1874 			} else {
1875 				start = entry->vme_end;
1876 			}
1877 		}
1878 
1879 		if (vmk_flags.vmkf_guard_before) {
1880 			/* go back for the front guard page */
1881 			start -= VM_MAP_PAGE_SIZE(map);
1882 		}
1883 	}
1884 
1885 	if (map->holelistenabled) {
1886 		if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1887 			panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.", entry, (unsigned long long)entry->vme_start);
1888 		}
1889 	}
1890 
1891 	/*
1892 	 *	At this point,
1893 	 *		"start" and "end" should define the endpoints of the
1894 	 *			available new range, and
1895 	 *		"entry" should refer to the region before the new
1896 	 *			range, and
1897 	 *
1898 	 *		the map should be locked.
1899 	 */
1900 
1901 	*address = start;
1902 
1903 	assert(start < end);
1904 	new_entry->vme_start = start;
1905 	new_entry->vme_end = end;
1906 	assert(page_aligned(new_entry->vme_start));
1907 	assert(page_aligned(new_entry->vme_end));
1908 	assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1909 	    VM_MAP_PAGE_MASK(map)));
1910 	assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1911 	    VM_MAP_PAGE_MASK(map)));
1912 
1913 	new_entry->is_shared = FALSE;
1914 	new_entry->is_sub_map = FALSE;
1915 	new_entry->use_pmap = TRUE;
1916 	VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1917 	VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1918 
1919 	new_entry->needs_copy = FALSE;
1920 
1921 	new_entry->inheritance = VM_INHERIT_DEFAULT;
1922 	new_entry->protection = VM_PROT_DEFAULT;
1923 	new_entry->max_protection = VM_PROT_ALL;
1924 	new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1925 	new_entry->wired_count = 0;
1926 	new_entry->user_wired_count = 0;
1927 
1928 	new_entry->in_transition = FALSE;
1929 	new_entry->needs_wakeup = FALSE;
1930 	new_entry->no_cache = FALSE;
1931 	new_entry->permanent = FALSE;
1932 	new_entry->superpage_size = FALSE;
1933 	if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1934 		new_entry->map_aligned = TRUE;
1935 	} else {
1936 		new_entry->map_aligned = FALSE;
1937 	}
1938 
1939 	new_entry->used_for_jit = FALSE;
1940 	new_entry->pmap_cs_associated = FALSE;
1941 	new_entry->zero_wired_pages = FALSE;
1942 	new_entry->iokit_acct = FALSE;
1943 	new_entry->vme_resilient_codesign = FALSE;
1944 	new_entry->vme_resilient_media = FALSE;
1945 	if (vmk_flags.vmkf_atomic_entry) {
1946 		new_entry->vme_atomic = TRUE;
1947 	} else {
1948 		new_entry->vme_atomic = FALSE;
1949 	}
1950 
1951 	VME_ALIAS_SET(new_entry, tag);
1952 
1953 	/*
1954 	 *	Insert the new entry into the list
1955 	 */
1956 
1957 	vm_map_store_entry_link(map, entry, new_entry, VM_MAP_KERNEL_FLAGS_NONE);
1958 
1959 	map->size += size;
1960 
1961 	/*
1962 	 *	Update the lookup hint
1963 	 */
1964 	SAVE_HINT_MAP_WRITE(map, new_entry);
1965 
1966 	*o_entry = new_entry;
1967 	return KERN_SUCCESS;
1968 
1969 noSpace:
1970 
1971 	vm_map_entry_dispose(map, new_entry);
1972 	vm_map_unlock(map);
1973 	return KERN_NO_SPACE;
1974 }
1975 
1976 int vm_map_pmap_enter_print = FALSE;
1977 int vm_map_pmap_enter_enable = FALSE;
1978 
1979 /*
1980  *	Routine:	vm_map_pmap_enter [internal only]
1981  *
1982  *	Description:
1983  *		Force pages from the specified object to be entered into
1984  *		the pmap at the specified address if they are present.
1985  *		As soon as a page not found in the object the scan ends.
1986  *
1987  *	Returns:
1988  *		Nothing.
1989  *
1990  *	In/out conditions:
1991  *		The source map should not be locked on entry.
1992  */
1993 __unused static void
vm_map_pmap_enter(vm_map_t map,vm_map_offset_t addr,vm_map_offset_t end_addr,vm_object_t object,vm_object_offset_t offset,vm_prot_t protection)1994 vm_map_pmap_enter(
1995 	vm_map_t                map,
1996 	vm_map_offset_t         addr,
1997 	vm_map_offset_t         end_addr,
1998 	vm_object_t             object,
1999 	vm_object_offset_t      offset,
2000 	vm_prot_t               protection)
2001 {
2002 	int                     type_of_fault;
2003 	kern_return_t           kr;
2004 	struct vm_object_fault_info fault_info = {};
2005 
2006 	if (map->pmap == 0) {
2007 		return;
2008 	}
2009 
2010 	assert(VM_MAP_PAGE_SHIFT(map) == PAGE_SHIFT);
2011 
2012 	while (addr < end_addr) {
2013 		vm_page_t       m;
2014 
2015 
2016 		/*
2017 		 * TODO:
2018 		 * From vm_map_enter(), we come into this function without the map
2019 		 * lock held or the object lock held.
2020 		 * We haven't taken a reference on the object either.
2021 		 * We should do a proper lookup on the map to make sure
2022 		 * that things are sane before we go locking objects that
2023 		 * could have been deallocated from under us.
2024 		 */
2025 
2026 		vm_object_lock(object);
2027 
2028 		m = vm_page_lookup(object, offset);
2029 
2030 		if (m == VM_PAGE_NULL || m->vmp_busy || m->vmp_fictitious ||
2031 		    (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_absent))) {
2032 			vm_object_unlock(object);
2033 			return;
2034 		}
2035 
2036 		if (vm_map_pmap_enter_print) {
2037 			printf("vm_map_pmap_enter:");
2038 			printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
2039 			    map, (unsigned long long)addr, object, (unsigned long long)offset);
2040 		}
2041 		type_of_fault = DBG_CACHE_HIT_FAULT;
2042 		kr = vm_fault_enter(m, map->pmap,
2043 		    addr,
2044 		    PAGE_SIZE, 0,
2045 		    protection, protection,
2046 		    VM_PAGE_WIRED(m),
2047 		    FALSE,                 /* change_wiring */
2048 		    VM_KERN_MEMORY_NONE,                 /* tag - not wiring */
2049 		    &fault_info,
2050 		    NULL,                  /* need_retry */
2051 		    &type_of_fault);
2052 
2053 		vm_object_unlock(object);
2054 
2055 		offset += PAGE_SIZE_64;
2056 		addr += PAGE_SIZE;
2057 	}
2058 }
2059 
2060 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
2061 kern_return_t
vm_map_random_address_for_size(vm_map_t map,vm_map_offset_t * address,vm_map_size_t size)2062 vm_map_random_address_for_size(
2063 	vm_map_t        map,
2064 	vm_map_offset_t *address,
2065 	vm_map_size_t   size)
2066 {
2067 	kern_return_t   kr = KERN_SUCCESS;
2068 	int             tries = 0;
2069 	vm_map_offset_t random_addr = 0;
2070 	vm_map_offset_t hole_end;
2071 
2072 	vm_map_entry_t  next_entry = VM_MAP_ENTRY_NULL;
2073 	vm_map_entry_t  prev_entry = VM_MAP_ENTRY_NULL;
2074 	vm_map_size_t   vm_hole_size = 0;
2075 	vm_map_size_t   addr_space_size;
2076 
2077 	addr_space_size = vm_map_max(map) - vm_map_min(map);
2078 
2079 	assert(VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map)));
2080 
2081 	while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2082 		if (startup_phase < STARTUP_SUB_ZALLOC) {
2083 			random_addr = (vm_map_offset_t)early_random();
2084 		} else {
2085 			random_addr = (vm_map_offset_t)random();
2086 		}
2087 		random_addr <<= VM_MAP_PAGE_SHIFT(map);
2088 		random_addr = vm_map_trunc_page(
2089 			vm_map_min(map) + (random_addr % addr_space_size),
2090 			VM_MAP_PAGE_MASK(map));
2091 
2092 		if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
2093 			if (prev_entry == vm_map_to_entry(map)) {
2094 				next_entry = vm_map_first_entry(map);
2095 			} else {
2096 				next_entry = prev_entry->vme_next;
2097 			}
2098 			if (next_entry == vm_map_to_entry(map)) {
2099 				hole_end = vm_map_max(map);
2100 			} else {
2101 				hole_end = next_entry->vme_start;
2102 			}
2103 			vm_hole_size = hole_end - random_addr;
2104 			if (vm_hole_size >= size) {
2105 				*address = random_addr;
2106 				break;
2107 			}
2108 		}
2109 		tries++;
2110 	}
2111 
2112 	if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2113 		kr = KERN_NO_SPACE;
2114 	}
2115 	return kr;
2116 }
2117 
2118 static boolean_t
vm_memory_malloc_no_cow(int alias)2119 vm_memory_malloc_no_cow(
2120 	int alias)
2121 {
2122 	uint64_t alias_mask;
2123 
2124 	if (alias > 63) {
2125 		return FALSE;
2126 	}
2127 
2128 	alias_mask = 1ULL << alias;
2129 	if (alias_mask & vm_memory_malloc_no_cow_mask) {
2130 		return TRUE;
2131 	}
2132 	return FALSE;
2133 }
2134 
2135 uint64_t vm_map_enter_RLIMIT_AS_count = 0;
2136 uint64_t vm_map_enter_RLIMIT_DATA_count = 0;
2137 /*
2138  *	Routine:	vm_map_enter
2139  *
2140  *	Description:
2141  *		Allocate a range in the specified virtual address map.
2142  *		The resulting range will refer to memory defined by
2143  *		the given memory object and offset into that object.
2144  *
2145  *		Arguments are as defined in the vm_map call.
2146  */
2147 static unsigned int vm_map_enter_restore_successes = 0;
2148 static unsigned int vm_map_enter_restore_failures = 0;
2149 kern_return_t
vm_map_enter(vm_map_t map,vm_map_offset_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t alias,vm_object_t object,vm_object_offset_t offset,boolean_t needs_copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)2150 vm_map_enter(
2151 	vm_map_t                map,
2152 	vm_map_offset_t         *address,       /* IN/OUT */
2153 	vm_map_size_t           size,
2154 	vm_map_offset_t         mask,
2155 	int                     flags,
2156 	vm_map_kernel_flags_t   vmk_flags,
2157 	vm_tag_t                alias,
2158 	vm_object_t             object,
2159 	vm_object_offset_t      offset,
2160 	boolean_t               needs_copy,
2161 	vm_prot_t               cur_protection,
2162 	vm_prot_t               max_protection,
2163 	vm_inherit_t            inheritance)
2164 {
2165 	vm_map_entry_t          entry, new_entry;
2166 	vm_map_offset_t         start, tmp_start, tmp_offset;
2167 	vm_map_offset_t         end, tmp_end;
2168 	vm_map_offset_t         tmp2_start, tmp2_end;
2169 	vm_map_offset_t         desired_empty_end;
2170 	vm_map_offset_t         step;
2171 	kern_return_t           result = KERN_SUCCESS;
2172 	vm_map_t                zap_old_map = VM_MAP_NULL;
2173 	vm_map_t                zap_new_map = VM_MAP_NULL;
2174 	boolean_t               map_locked = FALSE;
2175 	boolean_t               pmap_empty = TRUE;
2176 	boolean_t               new_mapping_established = FALSE;
2177 	boolean_t               keep_map_locked = vmk_flags.vmkf_keep_map_locked;
2178 	boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
2179 	boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
2180 	boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2181 	boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
2182 	boolean_t               is_submap = vmk_flags.vmkf_submap;
2183 	boolean_t               permanent = (((flags & VM_FLAGS_PERMANENT) != 0) || vmk_flags.vmkf_permanent);
2184 	boolean_t               no_copy_on_read = vmk_flags.vmkf_no_copy_on_read;
2185 	boolean_t               entry_for_jit = vmk_flags.vmkf_map_jit;
2186 	boolean_t               iokit_acct = vmk_flags.vmkf_iokit_acct;
2187 	boolean_t               translated_allow_execute = vmk_flags.vmkf_translated_allow_execute;
2188 	boolean_t               resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
2189 	boolean_t               resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
2190 	boolean_t               random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
2191 	unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
2192 	vm_tag_t                user_alias;
2193 	vm_map_offset_t         effective_min_offset, effective_max_offset;
2194 	kern_return_t           kr;
2195 	boolean_t               clear_map_aligned = FALSE;
2196 	vm_map_entry_t          hole_entry;
2197 	vm_map_size_t           chunk_size = 0;
2198 	vm_object_t             caller_object;
2199 
2200 	caller_object = object;
2201 
2202 	assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2203 
2204 	if (flags & VM_FLAGS_4GB_CHUNK) {
2205 #if defined(__LP64__)
2206 		chunk_size = (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2207 #else /* __LP64__ */
2208 		chunk_size = ANON_CHUNK_SIZE;
2209 #endif /* __LP64__ */
2210 	} else {
2211 		chunk_size = ANON_CHUNK_SIZE;
2212 	}
2213 
2214 	if (superpage_size) {
2215 		switch (superpage_size) {
2216 			/*
2217 			 * Note that the current implementation only supports
2218 			 * a single size for superpages, SUPERPAGE_SIZE, per
2219 			 * architecture. As soon as more sizes are supposed
2220 			 * to be supported, SUPERPAGE_SIZE has to be replaced
2221 			 * with a lookup of the size depending on superpage_size.
2222 			 */
2223 #ifdef __x86_64__
2224 		case SUPERPAGE_SIZE_ANY:
2225 			/* handle it like 2 MB and round up to page size */
2226 			size = (size + 2 * 1024 * 1024 - 1) & ~(2 * 1024 * 1024 - 1);
2227 			OS_FALLTHROUGH;
2228 		case SUPERPAGE_SIZE_2MB:
2229 			break;
2230 #endif
2231 		default:
2232 			return KERN_INVALID_ARGUMENT;
2233 		}
2234 		mask = SUPERPAGE_SIZE - 1;
2235 		if (size & (SUPERPAGE_SIZE - 1)) {
2236 			return KERN_INVALID_ARGUMENT;
2237 		}
2238 		inheritance = VM_INHERIT_NONE;  /* fork() children won't inherit superpages */
2239 	}
2240 
2241 
2242 	if ((cur_protection & VM_PROT_WRITE) &&
2243 	    (cur_protection & VM_PROT_EXECUTE) &&
2244 #if XNU_TARGET_OS_OSX
2245 	    map->pmap != kernel_pmap &&
2246 	    (cs_process_global_enforcement() ||
2247 	    (vmk_flags.vmkf_cs_enforcement_override
2248 	    ? vmk_flags.vmkf_cs_enforcement
2249 	    : (vm_map_cs_enforcement(map)
2250 #if __arm64__
2251 	    || !VM_MAP_IS_EXOTIC(map)
2252 #endif /* __arm64__ */
2253 	    ))) &&
2254 #endif /* XNU_TARGET_OS_OSX */
2255 	    (VM_MAP_POLICY_WX_FAIL(map) ||
2256 	    VM_MAP_POLICY_WX_STRIP_X(map)) &&
2257 	    !entry_for_jit) {
2258 		boolean_t vm_protect_wx_fail = VM_MAP_POLICY_WX_FAIL(map);
2259 
2260 		DTRACE_VM3(cs_wx,
2261 		    uint64_t, 0,
2262 		    uint64_t, 0,
2263 		    vm_prot_t, cur_protection);
2264 		printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. %s\n",
2265 		    proc_selfpid(),
2266 		    (current_task()->bsd_info
2267 		    ? proc_name_address(current_task()->bsd_info)
2268 		    : "?"),
2269 		    __FUNCTION__,
2270 		    (vm_protect_wx_fail ? "failing" : "turning off execute"));
2271 		cur_protection &= ~VM_PROT_EXECUTE;
2272 		if (vm_protect_wx_fail) {
2273 			return KERN_PROTECTION_FAILURE;
2274 		}
2275 	}
2276 
2277 	/*
2278 	 * If the task has requested executable lockdown,
2279 	 * deny any new executable mapping.
2280 	 */
2281 	if (map->map_disallow_new_exec == TRUE) {
2282 		if (cur_protection & VM_PROT_EXECUTE) {
2283 			return KERN_PROTECTION_FAILURE;
2284 		}
2285 	}
2286 
2287 	if (resilient_codesign) {
2288 		assert(!is_submap);
2289 		int reject_prot = (needs_copy ? VM_PROT_ALLEXEC : (VM_PROT_WRITE | VM_PROT_ALLEXEC));
2290 		if ((cur_protection | max_protection) & reject_prot) {
2291 			return KERN_PROTECTION_FAILURE;
2292 		}
2293 	}
2294 
2295 	if (resilient_media) {
2296 		assert(!is_submap);
2297 //		assert(!needs_copy);
2298 		if (object != VM_OBJECT_NULL &&
2299 		    !object->internal) {
2300 			/*
2301 			 * This mapping is directly backed by an external
2302 			 * memory manager (e.g. a vnode pager for a file):
2303 			 * we would not have any safe place to inject
2304 			 * a zero-filled page if an actual page is not
2305 			 * available, without possibly impacting the actual
2306 			 * contents of the mapped object (e.g. the file),
2307 			 * so we can't provide any media resiliency here.
2308 			 */
2309 			return KERN_INVALID_ARGUMENT;
2310 		}
2311 	}
2312 
2313 	if (is_submap) {
2314 		if (purgable) {
2315 			/* submaps can not be purgeable */
2316 			return KERN_INVALID_ARGUMENT;
2317 		}
2318 		if (object == VM_OBJECT_NULL) {
2319 			/* submaps can not be created lazily */
2320 			return KERN_INVALID_ARGUMENT;
2321 		}
2322 	}
2323 	if (vmk_flags.vmkf_already) {
2324 		/*
2325 		 * VM_FLAGS_ALREADY says that it's OK if the same mapping
2326 		 * is already present.  For it to be meaningul, the requested
2327 		 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2328 		 * we shouldn't try and remove what was mapped there first
2329 		 * (!VM_FLAGS_OVERWRITE).
2330 		 */
2331 		if ((flags & VM_FLAGS_ANYWHERE) ||
2332 		    (flags & VM_FLAGS_OVERWRITE)) {
2333 			return KERN_INVALID_ARGUMENT;
2334 		}
2335 	}
2336 
2337 	effective_min_offset = map->min_offset;
2338 
2339 	if (vmk_flags.vmkf_beyond_max) {
2340 		/*
2341 		 * Allow an insertion beyond the map's max offset.
2342 		 */
2343 #if     !defined(__arm__)
2344 		if (vm_map_is_64bit(map)) {
2345 			effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
2346 		} else
2347 #endif  /* __arm__ */
2348 		effective_max_offset = 0x00000000FFFFF000ULL;
2349 	} else {
2350 #if XNU_TARGET_OS_OSX
2351 		if (__improbable(vmk_flags.vmkf_32bit_map_va)) {
2352 			effective_max_offset = MIN(map->max_offset, 0x00000000FFFFF000ULL);
2353 		} else {
2354 			effective_max_offset = map->max_offset;
2355 		}
2356 #else /* XNU_TARGET_OS_OSX */
2357 		effective_max_offset = map->max_offset;
2358 #endif /* XNU_TARGET_OS_OSX */
2359 	}
2360 
2361 	if (size == 0 ||
2362 	    (offset & MIN(VM_MAP_PAGE_MASK(map), PAGE_MASK_64)) != 0) {
2363 		*address = 0;
2364 		return KERN_INVALID_ARGUMENT;
2365 	}
2366 
2367 	if (map->pmap == kernel_pmap) {
2368 		user_alias = VM_KERN_MEMORY_NONE;
2369 	} else {
2370 		user_alias = alias;
2371 	}
2372 
2373 	if (user_alias == VM_MEMORY_MALLOC_MEDIUM) {
2374 		chunk_size = MALLOC_MEDIUM_CHUNK_SIZE;
2375 	}
2376 
2377 #define RETURN(value)   { result = value; goto BailOut; }
2378 
2379 	assertf(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK), "0x%llx", (uint64_t)*address);
2380 	assertf(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK), "0x%llx", (uint64_t)size);
2381 	if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK) {
2382 		assertf(page_aligned(*address), "0x%llx", (uint64_t)*address);
2383 		assertf(page_aligned(size), "0x%llx", (uint64_t)size);
2384 	}
2385 
2386 	if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK &&
2387 	    !VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
2388 		/*
2389 		 * In most cases, the caller rounds the size up to the
2390 		 * map's page size.
2391 		 * If we get a size that is explicitly not map-aligned here,
2392 		 * we'll have to respect the caller's wish and mark the
2393 		 * mapping as "not map-aligned" to avoid tripping the
2394 		 * map alignment checks later.
2395 		 */
2396 		clear_map_aligned = TRUE;
2397 	}
2398 	if (!anywhere &&
2399 	    VM_MAP_PAGE_MASK(map) >= PAGE_MASK &&
2400 	    !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2401 		/*
2402 		 * We've been asked to map at a fixed address and that
2403 		 * address is not aligned to the map's specific alignment.
2404 		 * The caller should know what it's doing (i.e. most likely
2405 		 * mapping some fragmented copy map, transferring memory from
2406 		 * a VM map with a different alignment), so clear map_aligned
2407 		 * for this new VM map entry and proceed.
2408 		 */
2409 		clear_map_aligned = TRUE;
2410 	}
2411 
2412 	/*
2413 	 * Only zero-fill objects are allowed to be purgable.
2414 	 * LP64todo - limit purgable objects to 32-bits for now
2415 	 */
2416 	if (purgable &&
2417 	    (offset != 0 ||
2418 	    (object != VM_OBJECT_NULL &&
2419 	    (object->vo_size != size ||
2420 	    object->purgable == VM_PURGABLE_DENY))
2421 	    || size > ANON_MAX_SIZE)) { /* LP64todo: remove when dp capable */
2422 		return KERN_INVALID_ARGUMENT;
2423 	}
2424 
2425 	if (!anywhere && overwrite) {
2426 		/*
2427 		 * Create a temporary VM map to hold the old mappings in the
2428 		 * affected area while we create the new one.
2429 		 * This avoids releasing the VM map lock in
2430 		 * vm_map_entry_delete() and allows atomicity
2431 		 * when we want to replace some mappings with a new one.
2432 		 * It also allows us to restore the old VM mappings if the
2433 		 * new mapping fails.
2434 		 */
2435 		zap_old_map = vm_map_create(PMAP_NULL,
2436 		    *address,
2437 		    *address + size,
2438 		    map->hdr.entries_pageable);
2439 		vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
2440 		vm_map_disable_hole_optimization(zap_old_map);
2441 	}
2442 
2443 StartAgain:;
2444 
2445 	start = *address;
2446 
2447 	if (anywhere) {
2448 		vm_map_lock(map);
2449 		map_locked = TRUE;
2450 
2451 		if (entry_for_jit) {
2452 			if (map->jit_entry_exists &&
2453 			    !VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map)) {
2454 				result = KERN_INVALID_ARGUMENT;
2455 				goto BailOut;
2456 			}
2457 			if (VM_MAP_POLICY_ALLOW_JIT_RANDOM_ADDRESS(map)) {
2458 				random_address = TRUE;
2459 			}
2460 		}
2461 
2462 		if (random_address) {
2463 			/*
2464 			 * Get a random start address.
2465 			 */
2466 			result = vm_map_random_address_for_size(map, address, size);
2467 			if (result != KERN_SUCCESS) {
2468 				goto BailOut;
2469 			}
2470 			start = *address;
2471 		}
2472 #if XNU_TARGET_OS_OSX
2473 		else if ((start == 0 || start == vm_map_min(map)) &&
2474 		    !map->disable_vmentry_reuse &&
2475 		    map->vmmap_high_start != 0) {
2476 			start = map->vmmap_high_start;
2477 		}
2478 #endif /* XNU_TARGET_OS_OSX */
2479 
2480 
2481 		/*
2482 		 *	Calculate the first possible address.
2483 		 */
2484 
2485 		if (start < effective_min_offset) {
2486 			start = effective_min_offset;
2487 		}
2488 		if (start > effective_max_offset) {
2489 			RETURN(KERN_NO_SPACE);
2490 		}
2491 
2492 		/*
2493 		 *	Look for the first possible address;
2494 		 *	if there's already something at this
2495 		 *	address, we have to start after it.
2496 		 */
2497 
2498 		if (map->disable_vmentry_reuse == TRUE) {
2499 			VM_MAP_HIGHEST_ENTRY(map, entry, start);
2500 		} else {
2501 			if (map->holelistenabled) {
2502 				hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
2503 
2504 				if (hole_entry == NULL) {
2505 					/*
2506 					 * No more space in the map?
2507 					 */
2508 					result = KERN_NO_SPACE;
2509 					goto BailOut;
2510 				} else {
2511 					boolean_t found_hole = FALSE;
2512 
2513 					do {
2514 						if (hole_entry->vme_start >= start) {
2515 							start = hole_entry->vme_start;
2516 							found_hole = TRUE;
2517 							break;
2518 						}
2519 
2520 						if (hole_entry->vme_end > start) {
2521 							found_hole = TRUE;
2522 							break;
2523 						}
2524 						hole_entry = hole_entry->vme_next;
2525 					} while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
2526 
2527 					if (found_hole == FALSE) {
2528 						result = KERN_NO_SPACE;
2529 						goto BailOut;
2530 					}
2531 
2532 					entry = hole_entry;
2533 
2534 					if (start == 0) {
2535 						start += PAGE_SIZE_64;
2536 					}
2537 				}
2538 			} else {
2539 				assert(first_free_is_valid(map));
2540 
2541 				entry = map->first_free;
2542 
2543 				if (entry == vm_map_to_entry(map)) {
2544 					entry = NULL;
2545 				} else {
2546 					if (entry->vme_next == vm_map_to_entry(map)) {
2547 						/*
2548 						 * Hole at the end of the map.
2549 						 */
2550 						entry = NULL;
2551 					} else {
2552 						if (start < (entry->vme_next)->vme_start) {
2553 							start = entry->vme_end;
2554 							start = vm_map_round_page(start,
2555 							    VM_MAP_PAGE_MASK(map));
2556 						} else {
2557 							/*
2558 							 * Need to do a lookup.
2559 							 */
2560 							entry = NULL;
2561 						}
2562 					}
2563 				}
2564 
2565 				if (entry == NULL) {
2566 					vm_map_entry_t  tmp_entry;
2567 					if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2568 						assert(!entry_for_jit);
2569 						start = tmp_entry->vme_end;
2570 						start = vm_map_round_page(start,
2571 						    VM_MAP_PAGE_MASK(map));
2572 					}
2573 					entry = tmp_entry;
2574 				}
2575 			}
2576 		}
2577 
2578 		/*
2579 		 *	In any case, the "entry" always precedes
2580 		 *	the proposed new region throughout the
2581 		 *	loop:
2582 		 */
2583 
2584 		while (TRUE) {
2585 			vm_map_entry_t  next;
2586 
2587 			/*
2588 			 *	Find the end of the proposed new region.
2589 			 *	Be sure we didn't go beyond the end, or
2590 			 *	wrap around the address.
2591 			 */
2592 
2593 			end = ((start + mask) & ~mask);
2594 			end = vm_map_round_page(end,
2595 			    VM_MAP_PAGE_MASK(map));
2596 			if (end < start) {
2597 				RETURN(KERN_NO_SPACE);
2598 			}
2599 			start = end;
2600 			assert(VM_MAP_PAGE_ALIGNED(start,
2601 			    VM_MAP_PAGE_MASK(map)));
2602 			end += size;
2603 
2604 			/* We want an entire page of empty space, but don't increase the allocation size. */
2605 			desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
2606 
2607 			if ((desired_empty_end > effective_max_offset) || (desired_empty_end < start)) {
2608 				if (map->wait_for_space) {
2609 					assert(!keep_map_locked);
2610 					if (size <= (effective_max_offset -
2611 					    effective_min_offset)) {
2612 						assert_wait((event_t)map,
2613 						    THREAD_ABORTSAFE);
2614 						vm_map_unlock(map);
2615 						map_locked = FALSE;
2616 						thread_block(THREAD_CONTINUE_NULL);
2617 						goto StartAgain;
2618 					}
2619 				}
2620 				RETURN(KERN_NO_SPACE);
2621 			}
2622 
2623 			next = entry->vme_next;
2624 
2625 			if (map->holelistenabled) {
2626 				if (entry->vme_end >= desired_empty_end) {
2627 					break;
2628 				}
2629 			} else {
2630 				/*
2631 				 *	If there are no more entries, we must win.
2632 				 *
2633 				 *	OR
2634 				 *
2635 				 *	If there is another entry, it must be
2636 				 *	after the end of the potential new region.
2637 				 */
2638 
2639 				if (next == vm_map_to_entry(map)) {
2640 					break;
2641 				}
2642 
2643 				if (next->vme_start >= desired_empty_end) {
2644 					break;
2645 				}
2646 			}
2647 
2648 			/*
2649 			 *	Didn't fit -- move to the next entry.
2650 			 */
2651 
2652 			entry = next;
2653 
2654 			if (map->holelistenabled) {
2655 				if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
2656 					/*
2657 					 * Wrapped around
2658 					 */
2659 					result = KERN_NO_SPACE;
2660 					goto BailOut;
2661 				}
2662 				start = entry->vme_start;
2663 			} else {
2664 				start = entry->vme_end;
2665 			}
2666 
2667 			start = vm_map_round_page(start,
2668 			    VM_MAP_PAGE_MASK(map));
2669 		}
2670 
2671 		if (map->holelistenabled) {
2672 			if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2673 				panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.", entry, (unsigned long long)entry->vme_start);
2674 			}
2675 		}
2676 
2677 		*address = start;
2678 		assert(VM_MAP_PAGE_ALIGNED(*address,
2679 		    VM_MAP_PAGE_MASK(map)));
2680 	} else {
2681 		if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT &&
2682 		    !overwrite &&
2683 		    user_alias == VM_MEMORY_REALLOC) {
2684 			/*
2685 			 * Force realloc() to switch to a new allocation,
2686 			 * to prevent 4k-fragmented virtual ranges.
2687 			 */
2688 //			DEBUG4K_ERROR("no realloc in place");
2689 			return KERN_NO_SPACE;
2690 		}
2691 
2692 		/*
2693 		 *	Verify that:
2694 		 *		the address doesn't itself violate
2695 		 *		the mask requirement.
2696 		 */
2697 
2698 		vm_map_lock(map);
2699 		map_locked = TRUE;
2700 		if ((start & mask) != 0) {
2701 			RETURN(KERN_NO_SPACE);
2702 		}
2703 
2704 		/*
2705 		 *	...	the address is within bounds
2706 		 */
2707 
2708 		end = start + size;
2709 
2710 		if ((start < effective_min_offset) ||
2711 		    (end > effective_max_offset) ||
2712 		    (start >= end)) {
2713 			RETURN(KERN_INVALID_ADDRESS);
2714 		}
2715 
2716 		if (overwrite && zap_old_map != VM_MAP_NULL) {
2717 			int remove_flags;
2718 			/*
2719 			 * Fixed mapping and "overwrite" flag: attempt to
2720 			 * remove all existing mappings in the specified
2721 			 * address range, saving them in our "zap_old_map".
2722 			 */
2723 			remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES;
2724 			remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
2725 			if (vmk_flags.vmkf_overwrite_immutable) {
2726 				/* we can overwrite immutable mappings */
2727 				remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2728 			}
2729 			(void) vm_map_delete(map, start, end,
2730 			    remove_flags,
2731 			    zap_old_map);
2732 		}
2733 
2734 		/*
2735 		 *	...	the starting address isn't allocated
2736 		 */
2737 
2738 		if (vm_map_lookup_entry(map, start, &entry)) {
2739 			if (!(vmk_flags.vmkf_already)) {
2740 				RETURN(KERN_NO_SPACE);
2741 			}
2742 			/*
2743 			 * Check if what's already there is what we want.
2744 			 */
2745 			tmp_start = start;
2746 			tmp_offset = offset;
2747 			if (entry->vme_start < start) {
2748 				tmp_start -= start - entry->vme_start;
2749 				tmp_offset -= start - entry->vme_start;
2750 			}
2751 			for (; entry->vme_start < end;
2752 			    entry = entry->vme_next) {
2753 				/*
2754 				 * Check if the mapping's attributes
2755 				 * match the existing map entry.
2756 				 */
2757 				if (entry == vm_map_to_entry(map) ||
2758 				    entry->vme_start != tmp_start ||
2759 				    entry->is_sub_map != is_submap ||
2760 				    VME_OFFSET(entry) != tmp_offset ||
2761 				    entry->needs_copy != needs_copy ||
2762 				    entry->protection != cur_protection ||
2763 				    entry->max_protection != max_protection ||
2764 				    entry->inheritance != inheritance ||
2765 				    entry->iokit_acct != iokit_acct ||
2766 				    VME_ALIAS(entry) != alias) {
2767 					/* not the same mapping ! */
2768 					RETURN(KERN_NO_SPACE);
2769 				}
2770 				/*
2771 				 * Check if the same object is being mapped.
2772 				 */
2773 				if (is_submap) {
2774 					if (VME_SUBMAP(entry) !=
2775 					    (vm_map_t) object) {
2776 						/* not the same submap */
2777 						RETURN(KERN_NO_SPACE);
2778 					}
2779 				} else {
2780 					if (VME_OBJECT(entry) != object) {
2781 						/* not the same VM object... */
2782 						vm_object_t obj2;
2783 
2784 						obj2 = VME_OBJECT(entry);
2785 						if ((obj2 == VM_OBJECT_NULL ||
2786 						    obj2->internal) &&
2787 						    (object == VM_OBJECT_NULL ||
2788 						    object->internal)) {
2789 							/*
2790 							 * ... but both are
2791 							 * anonymous memory,
2792 							 * so equivalent.
2793 							 */
2794 						} else {
2795 							RETURN(KERN_NO_SPACE);
2796 						}
2797 					}
2798 				}
2799 
2800 				tmp_offset += entry->vme_end - entry->vme_start;
2801 				tmp_start += entry->vme_end - entry->vme_start;
2802 				if (entry->vme_end >= end) {
2803 					/* reached the end of our mapping */
2804 					break;
2805 				}
2806 			}
2807 			/* it all matches:  let's use what's already there ! */
2808 			RETURN(KERN_MEMORY_PRESENT);
2809 		}
2810 
2811 		/*
2812 		 *	...	the next region doesn't overlap the
2813 		 *		end point.
2814 		 */
2815 
2816 		if ((entry->vme_next != vm_map_to_entry(map)) &&
2817 		    (entry->vme_next->vme_start < end)) {
2818 			RETURN(KERN_NO_SPACE);
2819 		}
2820 	}
2821 
2822 	/*
2823 	 *	At this point,
2824 	 *		"start" and "end" should define the endpoints of the
2825 	 *			available new range, and
2826 	 *		"entry" should refer to the region before the new
2827 	 *			range, and
2828 	 *
2829 	 *		the map should be locked.
2830 	 */
2831 
2832 	/*
2833 	 *	See whether we can avoid creating a new entry (and object) by
2834 	 *	extending one of our neighbors.  [So far, we only attempt to
2835 	 *	extend from below.]  Note that we can never extend/join
2836 	 *	purgable objects because they need to remain distinct
2837 	 *	entities in order to implement their "volatile object"
2838 	 *	semantics.
2839 	 */
2840 
2841 	if (purgable ||
2842 	    entry_for_jit ||
2843 	    vm_memory_malloc_no_cow(user_alias)) {
2844 		if (object == VM_OBJECT_NULL) {
2845 			object = vm_object_allocate(size);
2846 			object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2847 			object->true_share = FALSE;
2848 			if (purgable) {
2849 				task_t owner;
2850 				object->purgable = VM_PURGABLE_NONVOLATILE;
2851 				if (map->pmap == kernel_pmap) {
2852 					/*
2853 					 * Purgeable mappings made in a kernel
2854 					 * map are "owned" by the kernel itself
2855 					 * rather than the current user task
2856 					 * because they're likely to be used by
2857 					 * more than this user task (see
2858 					 * execargs_purgeable_allocate(), for
2859 					 * example).
2860 					 */
2861 					owner = kernel_task;
2862 				} else {
2863 					owner = current_task();
2864 				}
2865 				assert(object->vo_owner == NULL);
2866 				assert(object->resident_page_count == 0);
2867 				assert(object->wired_page_count == 0);
2868 				vm_object_lock(object);
2869 				vm_purgeable_nonvolatile_enqueue(object, owner);
2870 				vm_object_unlock(object);
2871 			}
2872 			offset = (vm_object_offset_t)0;
2873 		}
2874 	} else if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
2875 		/* no coalescing if address space uses sub-pages */
2876 	} else if ((is_submap == FALSE) &&
2877 	    (object == VM_OBJECT_NULL) &&
2878 	    (entry != vm_map_to_entry(map)) &&
2879 	    (entry->vme_end == start) &&
2880 	    (!entry->is_shared) &&
2881 	    (!entry->is_sub_map) &&
2882 	    (!entry->in_transition) &&
2883 	    (!entry->needs_wakeup) &&
2884 	    (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2885 	    (entry->protection == cur_protection) &&
2886 	    (entry->max_protection == max_protection) &&
2887 	    (entry->inheritance == inheritance) &&
2888 	    ((user_alias == VM_MEMORY_REALLOC) ||
2889 	    (VME_ALIAS(entry) == alias)) &&
2890 	    (entry->no_cache == no_cache) &&
2891 	    (entry->permanent == permanent) &&
2892 	    /* no coalescing for immutable executable mappings */
2893 	    !((entry->protection & VM_PROT_EXECUTE) &&
2894 	    entry->permanent) &&
2895 	    (!entry->superpage_size && !superpage_size) &&
2896 	    /*
2897 	     * No coalescing if not map-aligned, to avoid propagating
2898 	     * that condition any further than needed:
2899 	     */
2900 	    (!entry->map_aligned || !clear_map_aligned) &&
2901 	    (!entry->zero_wired_pages) &&
2902 	    (!entry->used_for_jit && !entry_for_jit) &&
2903 	    (!entry->pmap_cs_associated) &&
2904 	    (entry->iokit_acct == iokit_acct) &&
2905 	    (!entry->vme_resilient_codesign) &&
2906 	    (!entry->vme_resilient_media) &&
2907 	    (!entry->vme_atomic) &&
2908 	    (entry->vme_no_copy_on_read == no_copy_on_read) &&
2909 
2910 	    ((entry->vme_end - entry->vme_start) + size <=
2911 	    (user_alias == VM_MEMORY_REALLOC ?
2912 	    ANON_CHUNK_SIZE :
2913 	    NO_COALESCE_LIMIT)) &&
2914 
2915 	    (entry->wired_count == 0)) {        /* implies user_wired_count == 0 */
2916 		if (vm_object_coalesce(VME_OBJECT(entry),
2917 		    VM_OBJECT_NULL,
2918 		    VME_OFFSET(entry),
2919 		    (vm_object_offset_t) 0,
2920 		    (vm_map_size_t)(entry->vme_end - entry->vme_start),
2921 		    (vm_map_size_t)(end - entry->vme_end))) {
2922 			/*
2923 			 *	Coalesced the two objects - can extend
2924 			 *	the previous map entry to include the
2925 			 *	new range.
2926 			 */
2927 			map->size += (end - entry->vme_end);
2928 			assert(entry->vme_start < end);
2929 			assert(VM_MAP_PAGE_ALIGNED(end,
2930 			    VM_MAP_PAGE_MASK(map)));
2931 			if (__improbable(vm_debug_events)) {
2932 				DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
2933 			}
2934 			entry->vme_end = end;
2935 			if (map->holelistenabled) {
2936 				vm_map_store_update_first_free(map, entry, TRUE);
2937 			} else {
2938 				vm_map_store_update_first_free(map, map->first_free, TRUE);
2939 			}
2940 			new_mapping_established = TRUE;
2941 			RETURN(KERN_SUCCESS);
2942 		}
2943 	}
2944 
2945 	step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2946 	new_entry = NULL;
2947 
2948 	for (tmp2_start = start; tmp2_start < end; tmp2_start += step) {
2949 		tmp2_end = tmp2_start + step;
2950 		/*
2951 		 *	Create a new entry
2952 		 *
2953 		 * XXX FBDP
2954 		 * The reserved "page zero" in each process's address space can
2955 		 * be arbitrarily large.  Splitting it into separate objects and
2956 		 * therefore different VM map entries serves no purpose and just
2957 		 * slows down operations on the VM map, so let's not split the
2958 		 * allocation into chunks if the max protection is NONE.  That
2959 		 * memory should never be accessible, so it will never get to the
2960 		 * default pager.
2961 		 */
2962 		tmp_start = tmp2_start;
2963 		if (object == VM_OBJECT_NULL &&
2964 		    size > chunk_size &&
2965 		    max_protection != VM_PROT_NONE &&
2966 		    superpage_size == 0) {
2967 			tmp_end = tmp_start + chunk_size;
2968 		} else {
2969 			tmp_end = tmp2_end;
2970 		}
2971 		do {
2972 			if (!is_submap &&
2973 			    object != VM_OBJECT_NULL &&
2974 			    object->internal &&
2975 			    offset + (tmp_end - tmp_start) > object->vo_size) {
2976 //				printf("FBDP object %p size 0x%llx overmapping offset 0x%llx size 0x%llx\n", object, object->vo_size, offset, (uint64_t)(tmp_end - tmp_start));
2977 				DTRACE_VM5(vm_map_enter_overmap,
2978 				    vm_map_t, map,
2979 				    vm_map_address_t, tmp_start,
2980 				    vm_map_address_t, tmp_end,
2981 				    vm_object_offset_t, offset,
2982 				    vm_object_size_t, object->vo_size);
2983 			}
2984 			new_entry = vm_map_entry_insert(map,
2985 			    entry, tmp_start, tmp_end,
2986 			    object, offset, vmk_flags,
2987 			    needs_copy, FALSE, FALSE,
2988 			    cur_protection, max_protection,
2989 			    VM_BEHAVIOR_DEFAULT,
2990 			    (entry_for_jit && !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map) ?
2991 			    VM_INHERIT_NONE : inheritance),
2992 			    0,
2993 			    no_cache,
2994 			    permanent,
2995 			    no_copy_on_read,
2996 			    superpage_size,
2997 			    clear_map_aligned,
2998 			    is_submap,
2999 			    entry_for_jit,
3000 			    alias,
3001 			    translated_allow_execute);
3002 
3003 			assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
3004 
3005 			if (resilient_codesign) {
3006 				int reject_prot = (needs_copy ? VM_PROT_ALLEXEC : (VM_PROT_WRITE | VM_PROT_ALLEXEC));
3007 				if (!((cur_protection | max_protection) & reject_prot)) {
3008 					new_entry->vme_resilient_codesign = TRUE;
3009 				}
3010 			}
3011 
3012 			if (resilient_media &&
3013 			    (object == VM_OBJECT_NULL ||
3014 			    object->internal)) {
3015 				new_entry->vme_resilient_media = TRUE;
3016 			}
3017 
3018 			assert(!new_entry->iokit_acct);
3019 			if (!is_submap &&
3020 			    object != VM_OBJECT_NULL &&
3021 			    (object->purgable != VM_PURGABLE_DENY ||
3022 			    object->vo_ledger_tag)) {
3023 				assert(new_entry->use_pmap);
3024 				assert(!new_entry->iokit_acct);
3025 				/*
3026 				 * Turn off pmap accounting since
3027 				 * purgeable (or tagged) objects have their
3028 				 * own ledgers.
3029 				 */
3030 				new_entry->use_pmap = FALSE;
3031 			} else if (!is_submap &&
3032 			    iokit_acct &&
3033 			    object != VM_OBJECT_NULL &&
3034 			    object->internal) {
3035 				/* alternate accounting */
3036 				assert(!new_entry->iokit_acct);
3037 				assert(new_entry->use_pmap);
3038 				new_entry->iokit_acct = TRUE;
3039 				new_entry->use_pmap = FALSE;
3040 				DTRACE_VM4(
3041 					vm_map_iokit_mapped_region,
3042 					vm_map_t, map,
3043 					vm_map_offset_t, new_entry->vme_start,
3044 					vm_map_offset_t, new_entry->vme_end,
3045 					int, VME_ALIAS(new_entry));
3046 				vm_map_iokit_mapped_region(
3047 					map,
3048 					(new_entry->vme_end -
3049 					new_entry->vme_start));
3050 			} else if (!is_submap) {
3051 				assert(!new_entry->iokit_acct);
3052 				assert(new_entry->use_pmap);
3053 			}
3054 
3055 			if (is_submap) {
3056 				vm_map_t        submap;
3057 				boolean_t       submap_is_64bit;
3058 				boolean_t       use_pmap;
3059 
3060 				assert(new_entry->is_sub_map);
3061 				assert(!new_entry->use_pmap);
3062 				assert(!new_entry->iokit_acct);
3063 				submap = (vm_map_t) object;
3064 				submap_is_64bit = vm_map_is_64bit(submap);
3065 				use_pmap = vmk_flags.vmkf_nested_pmap;
3066 #ifndef NO_NESTED_PMAP
3067 				if (use_pmap && submap->pmap == NULL) {
3068 					ledger_t ledger = map->pmap->ledger;
3069 					/* we need a sub pmap to nest... */
3070 					submap->pmap = pmap_create_options(ledger, 0,
3071 					    submap_is_64bit ? PMAP_CREATE_64BIT : 0);
3072 					if (submap->pmap == NULL) {
3073 						/* let's proceed without nesting... */
3074 					}
3075 #if     defined(__arm__) || defined(__arm64__)
3076 					else {
3077 						pmap_set_nested(submap->pmap);
3078 					}
3079 #endif
3080 				}
3081 				if (use_pmap && submap->pmap != NULL) {
3082 					if (VM_MAP_PAGE_SHIFT(map) != VM_MAP_PAGE_SHIFT(submap)) {
3083 						DEBUG4K_ERROR("map %p (%d) submap %p (%d): incompatible page sizes\n", map, VM_MAP_PAGE_SHIFT(map), submap, VM_MAP_PAGE_SHIFT(submap));
3084 						kr = KERN_FAILURE;
3085 					} else {
3086 						kr = pmap_nest(map->pmap,
3087 						    submap->pmap,
3088 						    tmp_start,
3089 						    tmp_end - tmp_start);
3090 					}
3091 					if (kr != KERN_SUCCESS) {
3092 						printf("vm_map_enter: "
3093 						    "pmap_nest(0x%llx,0x%llx) "
3094 						    "error 0x%x\n",
3095 						    (long long)tmp_start,
3096 						    (long long)tmp_end,
3097 						    kr);
3098 					} else {
3099 						/* we're now nested ! */
3100 						new_entry->use_pmap = TRUE;
3101 						pmap_empty = FALSE;
3102 					}
3103 				}
3104 #endif /* NO_NESTED_PMAP */
3105 			}
3106 			entry = new_entry;
3107 
3108 			if (superpage_size) {
3109 				vm_page_t pages, m;
3110 				vm_object_t sp_object;
3111 				vm_object_offset_t sp_offset;
3112 
3113 				VME_OFFSET_SET(entry, 0);
3114 
3115 				/* allocate one superpage */
3116 				kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES - 1, TRUE, 0);
3117 				if (kr != KERN_SUCCESS) {
3118 					/* deallocate whole range... */
3119 					new_mapping_established = TRUE;
3120 					/* ... but only up to "tmp_end" */
3121 					size -= end - tmp_end;
3122 					RETURN(kr);
3123 				}
3124 
3125 				/* create one vm_object per superpage */
3126 				sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
3127 				sp_object->phys_contiguous = TRUE;
3128 				sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages) * PAGE_SIZE;
3129 				VME_OBJECT_SET(entry, sp_object);
3130 				assert(entry->use_pmap);
3131 
3132 				/* enter the base pages into the object */
3133 				vm_object_lock(sp_object);
3134 				for (sp_offset = 0;
3135 				    sp_offset < SUPERPAGE_SIZE;
3136 				    sp_offset += PAGE_SIZE) {
3137 					m = pages;
3138 					pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
3139 					pages = NEXT_PAGE(m);
3140 					*(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3141 					vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
3142 				}
3143 				vm_object_unlock(sp_object);
3144 			}
3145 		} while (tmp_end != tmp2_end &&
3146 		    (tmp_start = tmp_end) &&
3147 		    (tmp_end = (tmp2_end - tmp_end > chunk_size) ?
3148 		    tmp_end + chunk_size : tmp2_end));
3149 	}
3150 
3151 	new_mapping_established = TRUE;
3152 
3153 BailOut:
3154 	assert(map_locked == TRUE);
3155 
3156 	/*
3157 	 * Address space limit enforcement (RLIMIT_AS and RLIMIT_DATA):
3158 	 * If we have identified and possibly established the new mapping(s),
3159 	 * make sure we did not go beyond the address space limit.
3160 	 */
3161 	if (result == KERN_SUCCESS) {
3162 		if (map->size_limit != RLIM_INFINITY &&
3163 		    map->size > map->size_limit) {
3164 			/*
3165 			 * Establishing the requested mappings would exceed
3166 			 * the process's RLIMIT_AS limit: fail with
3167 			 * KERN_NO_SPACE.
3168 			 */
3169 			result = KERN_NO_SPACE;
3170 			printf("%d[%s] %s: map size 0x%llx over RLIMIT_AS 0x%llx\n",
3171 			    proc_selfpid(),
3172 			    (current_task()->bsd_info
3173 			    ? proc_name_address(current_task()->bsd_info)
3174 			    : "?"),
3175 			    __FUNCTION__,
3176 			    (uint64_t) map->size,
3177 			    (uint64_t) map->size_limit);
3178 			DTRACE_VM2(vm_map_enter_RLIMIT_AS,
3179 			    vm_map_size_t, map->size,
3180 			    uint64_t, map->size_limit);
3181 			vm_map_enter_RLIMIT_AS_count++;
3182 		} else if (map->data_limit != RLIM_INFINITY &&
3183 		    map->size > map->data_limit) {
3184 			/*
3185 			 * Establishing the requested mappings would exceed
3186 			 * the process's RLIMIT_DATA limit: fail with
3187 			 * KERN_NO_SPACE.
3188 			 */
3189 			result = KERN_NO_SPACE;
3190 			printf("%d[%s] %s: map size 0x%llx over RLIMIT_DATA 0x%llx\n",
3191 			    proc_selfpid(),
3192 			    (current_task()->bsd_info
3193 			    ? proc_name_address(current_task()->bsd_info)
3194 			    : "?"),
3195 			    __FUNCTION__,
3196 			    (uint64_t) map->size,
3197 			    (uint64_t) map->data_limit);
3198 			DTRACE_VM2(vm_map_enter_RLIMIT_DATA,
3199 			    vm_map_size_t, map->size,
3200 			    uint64_t, map->data_limit);
3201 			vm_map_enter_RLIMIT_DATA_count++;
3202 		}
3203 	}
3204 
3205 	if (result == KERN_SUCCESS) {
3206 		vm_prot_t pager_prot;
3207 		memory_object_t pager;
3208 
3209 #if DEBUG
3210 		if (pmap_empty &&
3211 		    !(vmk_flags.vmkf_no_pmap_check)) {
3212 			assert(pmap_is_empty(map->pmap,
3213 			    *address,
3214 			    *address + size));
3215 		}
3216 #endif /* DEBUG */
3217 
3218 		/*
3219 		 * For "named" VM objects, let the pager know that the
3220 		 * memory object is being mapped.  Some pagers need to keep
3221 		 * track of this, to know when they can reclaim the memory
3222 		 * object, for example.
3223 		 * VM calls memory_object_map() for each mapping (specifying
3224 		 * the protection of each mapping) and calls
3225 		 * memory_object_last_unmap() when all the mappings are gone.
3226 		 */
3227 		pager_prot = max_protection;
3228 		if (needs_copy) {
3229 			/*
3230 			 * Copy-On-Write mapping: won't modify
3231 			 * the memory object.
3232 			 */
3233 			pager_prot &= ~VM_PROT_WRITE;
3234 		}
3235 		if (!is_submap &&
3236 		    object != VM_OBJECT_NULL &&
3237 		    object->named &&
3238 		    object->pager != MEMORY_OBJECT_NULL) {
3239 			vm_object_lock(object);
3240 			pager = object->pager;
3241 			if (object->named &&
3242 			    pager != MEMORY_OBJECT_NULL) {
3243 				assert(object->pager_ready);
3244 				vm_object_mapping_wait(object, THREAD_UNINT);
3245 				vm_object_mapping_begin(object);
3246 				vm_object_unlock(object);
3247 
3248 				kr = memory_object_map(pager, pager_prot);
3249 				assert(kr == KERN_SUCCESS);
3250 
3251 				vm_object_lock(object);
3252 				vm_object_mapping_end(object);
3253 			}
3254 			vm_object_unlock(object);
3255 		}
3256 	}
3257 
3258 	assert(map_locked == TRUE);
3259 
3260 	if (!keep_map_locked) {
3261 		vm_map_unlock(map);
3262 		map_locked = FALSE;
3263 	}
3264 
3265 	/*
3266 	 * We can't hold the map lock if we enter this block.
3267 	 */
3268 
3269 	if (result == KERN_SUCCESS) {
3270 		/*	Wire down the new entry if the user
3271 		 *	requested all new map entries be wired.
3272 		 */
3273 		if ((map->wiring_required) || (superpage_size)) {
3274 			assert(!keep_map_locked);
3275 			pmap_empty = FALSE; /* pmap won't be empty */
3276 			kr = vm_map_wire_kernel(map, start, end,
3277 			    new_entry->protection, VM_KERN_MEMORY_MLOCK,
3278 			    TRUE);
3279 			result = kr;
3280 		}
3281 
3282 	}
3283 
3284 	if (result != KERN_SUCCESS) {
3285 		if (new_mapping_established) {
3286 			/*
3287 			 * The caller had an extra reference on the VM object
3288 			 * it gave us.
3289 			 * We've transferred that reference to the mapping we
3290 			 * just established but we're about to undo that mapping
3291 			 * and release that reference.
3292 			 * The caller expects its reference to be consumed on
3293 			 * success only, so we have to get the extra reference
3294 			 * back for the caller.
3295 			 */
3296 			vm_object_reference(caller_object);
3297 
3298 			/*
3299 			 * We have to get rid of the new mappings since we
3300 			 * won't make them available to the user.
3301 			 * Try and do that atomically, to minimize the risk
3302 			 * that someone else create new mappings that range.
3303 			 */
3304 			zap_new_map = vm_map_create(PMAP_NULL,
3305 			    *address,
3306 			    *address + size,
3307 			    map->hdr.entries_pageable);
3308 			vm_map_set_page_shift(zap_new_map,
3309 			    VM_MAP_PAGE_SHIFT(map));
3310 			vm_map_disable_hole_optimization(zap_new_map);
3311 
3312 			if (!map_locked) {
3313 				vm_map_lock(map);
3314 				map_locked = TRUE;
3315 			}
3316 			(void) vm_map_delete(map, *address, *address + size,
3317 			    (VM_MAP_REMOVE_SAVE_ENTRIES |
3318 			    VM_MAP_REMOVE_NO_MAP_ALIGN),
3319 			    zap_new_map);
3320 		}
3321 		if (zap_old_map != VM_MAP_NULL &&
3322 		    zap_old_map->hdr.nentries != 0) {
3323 			vm_map_entry_t  entry1, entry2;
3324 
3325 			/*
3326 			 * The new mapping failed.  Attempt to restore
3327 			 * the old mappings, saved in the "zap_old_map".
3328 			 */
3329 			if (!map_locked) {
3330 				vm_map_lock(map);
3331 				map_locked = TRUE;
3332 			}
3333 
3334 			/* first check if the coast is still clear */
3335 			start = vm_map_first_entry(zap_old_map)->vme_start;
3336 			end = vm_map_last_entry(zap_old_map)->vme_end;
3337 			if (vm_map_lookup_entry(map, start, &entry1) ||
3338 			    vm_map_lookup_entry(map, end, &entry2) ||
3339 			    entry1 != entry2) {
3340 				/*
3341 				 * Part of that range has already been
3342 				 * re-mapped:  we can't restore the old
3343 				 * mappings...
3344 				 */
3345 				vm_map_enter_restore_failures++;
3346 			} else {
3347 				/*
3348 				 * Transfer the saved map entries from
3349 				 * "zap_old_map" to the original "map",
3350 				 * inserting them all after "entry1".
3351 				 */
3352 				for (entry2 = vm_map_first_entry(zap_old_map);
3353 				    entry2 != vm_map_to_entry(zap_old_map);
3354 				    entry2 = vm_map_first_entry(zap_old_map)) {
3355 					vm_map_size_t entry_size;
3356 
3357 					entry_size = (entry2->vme_end -
3358 					    entry2->vme_start);
3359 					vm_map_store_entry_unlink(zap_old_map,
3360 					    entry2);
3361 					zap_old_map->size -= entry_size;
3362 					vm_map_store_entry_link(map, entry1, entry2,
3363 					    VM_MAP_KERNEL_FLAGS_NONE);
3364 					map->size += entry_size;
3365 					entry1 = entry2;
3366 				}
3367 				if (map->wiring_required) {
3368 					/*
3369 					 * XXX TODO: we should rewire the
3370 					 * old pages here...
3371 					 */
3372 				}
3373 				vm_map_enter_restore_successes++;
3374 			}
3375 		}
3376 	}
3377 
3378 	/*
3379 	 * The caller is responsible for releasing the lock if it requested to
3380 	 * keep the map locked.
3381 	 */
3382 	if (map_locked && !keep_map_locked) {
3383 		vm_map_unlock(map);
3384 	}
3385 
3386 	/*
3387 	 * Get rid of the "zap_maps" and all the map entries that
3388 	 * they may still contain.
3389 	 */
3390 	if (zap_old_map != VM_MAP_NULL) {
3391 		vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3392 		zap_old_map = VM_MAP_NULL;
3393 	}
3394 	if (zap_new_map != VM_MAP_NULL) {
3395 		vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3396 		zap_new_map = VM_MAP_NULL;
3397 	}
3398 
3399 	return result;
3400 
3401 #undef  RETURN
3402 }
3403 
3404 #if __arm64__
3405 extern const struct memory_object_pager_ops fourk_pager_ops;
3406 kern_return_t
vm_map_enter_fourk(vm_map_t map,vm_map_offset_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t alias,vm_object_t object,vm_object_offset_t offset,boolean_t needs_copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)3407 vm_map_enter_fourk(
3408 	vm_map_t                map,
3409 	vm_map_offset_t         *address,       /* IN/OUT */
3410 	vm_map_size_t           size,
3411 	vm_map_offset_t         mask,
3412 	int                     flags,
3413 	vm_map_kernel_flags_t   vmk_flags,
3414 	vm_tag_t                alias,
3415 	vm_object_t             object,
3416 	vm_object_offset_t      offset,
3417 	boolean_t               needs_copy,
3418 	vm_prot_t               cur_protection,
3419 	vm_prot_t               max_protection,
3420 	vm_inherit_t            inheritance)
3421 {
3422 	vm_map_entry_t          entry, new_entry;
3423 	vm_map_offset_t         start, fourk_start;
3424 	vm_map_offset_t         end, fourk_end;
3425 	vm_map_size_t           fourk_size;
3426 	kern_return_t           result = KERN_SUCCESS;
3427 	vm_map_t                zap_old_map = VM_MAP_NULL;
3428 	vm_map_t                zap_new_map = VM_MAP_NULL;
3429 	boolean_t               map_locked = FALSE;
3430 	boolean_t               pmap_empty = TRUE;
3431 	boolean_t               new_mapping_established = FALSE;
3432 	boolean_t               keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3433 	boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
3434 	boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
3435 	boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
3436 	boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
3437 	boolean_t               is_submap = vmk_flags.vmkf_submap;
3438 	boolean_t               permanent = vmk_flags.vmkf_permanent;
3439 	boolean_t               no_copy_on_read = vmk_flags.vmkf_permanent;
3440 	boolean_t               entry_for_jit = vmk_flags.vmkf_map_jit;
3441 //	boolean_t		iokit_acct = vmk_flags.vmkf_iokit_acct;
3442 	boolean_t               translated_allow_execute = vmk_flags.vmkf_translated_allow_execute;
3443 	unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3444 	vm_map_offset_t         effective_min_offset, effective_max_offset;
3445 	kern_return_t           kr;
3446 	boolean_t               clear_map_aligned = FALSE;
3447 	memory_object_t         fourk_mem_obj;
3448 	vm_object_t             fourk_object;
3449 	vm_map_offset_t         fourk_pager_offset;
3450 	int                     fourk_pager_index_start, fourk_pager_index_num;
3451 	int                     cur_idx;
3452 	boolean_t               fourk_copy;
3453 	vm_object_t             copy_object;
3454 	vm_object_offset_t      copy_offset;
3455 
3456 	if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
3457 		panic("%s:%d", __FUNCTION__, __LINE__);
3458 	}
3459 	fourk_mem_obj = MEMORY_OBJECT_NULL;
3460 	fourk_object = VM_OBJECT_NULL;
3461 
3462 	if (superpage_size) {
3463 		return KERN_NOT_SUPPORTED;
3464 	}
3465 
3466 	if ((cur_protection & VM_PROT_WRITE) &&
3467 	    (cur_protection & VM_PROT_EXECUTE) &&
3468 #if XNU_TARGET_OS_OSX
3469 	    map->pmap != kernel_pmap &&
3470 	    (vm_map_cs_enforcement(map)
3471 #if __arm64__
3472 	    || !VM_MAP_IS_EXOTIC(map)
3473 #endif /* __arm64__ */
3474 	    ) &&
3475 #endif /* XNU_TARGET_OS_OSX */
3476 	    !entry_for_jit) {
3477 		DTRACE_VM3(cs_wx,
3478 		    uint64_t, 0,
3479 		    uint64_t, 0,
3480 		    vm_prot_t, cur_protection);
3481 		printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
3482 		    "turning off execute\n",
3483 		    proc_selfpid(),
3484 		    (current_task()->bsd_info
3485 		    ? proc_name_address(current_task()->bsd_info)
3486 		    : "?"),
3487 		    __FUNCTION__);
3488 		cur_protection &= ~VM_PROT_EXECUTE;
3489 	}
3490 
3491 	/*
3492 	 * If the task has requested executable lockdown,
3493 	 * deny any new executable mapping.
3494 	 */
3495 	if (map->map_disallow_new_exec == TRUE) {
3496 		if (cur_protection & VM_PROT_EXECUTE) {
3497 			return KERN_PROTECTION_FAILURE;
3498 		}
3499 	}
3500 
3501 	if (is_submap) {
3502 		return KERN_NOT_SUPPORTED;
3503 	}
3504 	if (vmk_flags.vmkf_already) {
3505 		return KERN_NOT_SUPPORTED;
3506 	}
3507 	if (purgable || entry_for_jit) {
3508 		return KERN_NOT_SUPPORTED;
3509 	}
3510 
3511 	effective_min_offset = map->min_offset;
3512 
3513 	if (vmk_flags.vmkf_beyond_max) {
3514 		return KERN_NOT_SUPPORTED;
3515 	} else {
3516 		effective_max_offset = map->max_offset;
3517 	}
3518 
3519 	if (size == 0 ||
3520 	    (offset & FOURK_PAGE_MASK) != 0) {
3521 		*address = 0;
3522 		return KERN_INVALID_ARGUMENT;
3523 	}
3524 
3525 #define RETURN(value)   { result = value; goto BailOut; }
3526 
3527 	assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3528 	assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3529 
3530 	if (!anywhere && overwrite) {
3531 		return KERN_NOT_SUPPORTED;
3532 	}
3533 	if (!anywhere && overwrite) {
3534 		/*
3535 		 * Create a temporary VM map to hold the old mappings in the
3536 		 * affected area while we create the new one.
3537 		 * This avoids releasing the VM map lock in
3538 		 * vm_map_entry_delete() and allows atomicity
3539 		 * when we want to replace some mappings with a new one.
3540 		 * It also allows us to restore the old VM mappings if the
3541 		 * new mapping fails.
3542 		 */
3543 		zap_old_map = vm_map_create(PMAP_NULL,
3544 		    *address,
3545 		    *address + size,
3546 		    map->hdr.entries_pageable);
3547 		vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3548 		vm_map_disable_hole_optimization(zap_old_map);
3549 	}
3550 
3551 	fourk_start = *address;
3552 	fourk_size = size;
3553 	fourk_end = fourk_start + fourk_size;
3554 
3555 	start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3556 	end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3557 	size = end - start;
3558 
3559 	if (anywhere) {
3560 		return KERN_NOT_SUPPORTED;
3561 	} else {
3562 		/*
3563 		 *	Verify that:
3564 		 *		the address doesn't itself violate
3565 		 *		the mask requirement.
3566 		 */
3567 
3568 		vm_map_lock(map);
3569 		map_locked = TRUE;
3570 		if ((start & mask) != 0) {
3571 			RETURN(KERN_NO_SPACE);
3572 		}
3573 
3574 		/*
3575 		 *	...	the address is within bounds
3576 		 */
3577 
3578 		end = start + size;
3579 
3580 		if ((start < effective_min_offset) ||
3581 		    (end > effective_max_offset) ||
3582 		    (start >= end)) {
3583 			RETURN(KERN_INVALID_ADDRESS);
3584 		}
3585 
3586 		if (overwrite && zap_old_map != VM_MAP_NULL) {
3587 			/*
3588 			 * Fixed mapping and "overwrite" flag: attempt to
3589 			 * remove all existing mappings in the specified
3590 			 * address range, saving them in our "zap_old_map".
3591 			 */
3592 			(void) vm_map_delete(map, start, end,
3593 			    (VM_MAP_REMOVE_SAVE_ENTRIES |
3594 			    VM_MAP_REMOVE_NO_MAP_ALIGN),
3595 			    zap_old_map);
3596 		}
3597 
3598 		/*
3599 		 *	...	the starting address isn't allocated
3600 		 */
3601 		if (vm_map_lookup_entry(map, start, &entry)) {
3602 			vm_object_t cur_object, shadow_object;
3603 
3604 			/*
3605 			 * We might already some 4K mappings
3606 			 * in a 16K page here.
3607 			 */
3608 
3609 			if (entry->vme_end - entry->vme_start
3610 			    != SIXTEENK_PAGE_SIZE) {
3611 				RETURN(KERN_NO_SPACE);
3612 			}
3613 			if (entry->is_sub_map) {
3614 				RETURN(KERN_NO_SPACE);
3615 			}
3616 			if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3617 				RETURN(KERN_NO_SPACE);
3618 			}
3619 
3620 			/* go all the way down the shadow chain */
3621 			cur_object = VME_OBJECT(entry);
3622 			vm_object_lock(cur_object);
3623 			while (cur_object->shadow != VM_OBJECT_NULL) {
3624 				shadow_object = cur_object->shadow;
3625 				vm_object_lock(shadow_object);
3626 				vm_object_unlock(cur_object);
3627 				cur_object = shadow_object;
3628 				shadow_object = VM_OBJECT_NULL;
3629 			}
3630 			if (cur_object->internal ||
3631 			    cur_object->pager == NULL) {
3632 				vm_object_unlock(cur_object);
3633 				RETURN(KERN_NO_SPACE);
3634 			}
3635 			if (cur_object->pager->mo_pager_ops
3636 			    != &fourk_pager_ops) {
3637 				vm_object_unlock(cur_object);
3638 				RETURN(KERN_NO_SPACE);
3639 			}
3640 			fourk_object = cur_object;
3641 			fourk_mem_obj = fourk_object->pager;
3642 
3643 			/* keep the "4K" object alive */
3644 			vm_object_reference_locked(fourk_object);
3645 			memory_object_reference(fourk_mem_obj);
3646 			vm_object_unlock(fourk_object);
3647 
3648 			/* merge permissions */
3649 			entry->protection |= cur_protection;
3650 			entry->max_protection |= max_protection;
3651 
3652 			if ((entry->protection & VM_PROT_WRITE) &&
3653 			    (entry->protection & VM_PROT_ALLEXEC) &&
3654 			    fourk_binary_compatibility_unsafe &&
3655 			    fourk_binary_compatibility_allow_wx) {
3656 				/* write+execute: need to be "jit" */
3657 				entry->used_for_jit = TRUE;
3658 			}
3659 			goto map_in_fourk_pager;
3660 		}
3661 
3662 		/*
3663 		 *	...	the next region doesn't overlap the
3664 		 *		end point.
3665 		 */
3666 
3667 		if ((entry->vme_next != vm_map_to_entry(map)) &&
3668 		    (entry->vme_next->vme_start < end)) {
3669 			RETURN(KERN_NO_SPACE);
3670 		}
3671 	}
3672 
3673 	/*
3674 	 *	At this point,
3675 	 *		"start" and "end" should define the endpoints of the
3676 	 *			available new range, and
3677 	 *		"entry" should refer to the region before the new
3678 	 *			range, and
3679 	 *
3680 	 *		the map should be locked.
3681 	 */
3682 
3683 	/* create a new "4K" pager */
3684 	fourk_mem_obj = fourk_pager_create();
3685 	fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3686 	assert(fourk_object);
3687 
3688 	/* keep the "4" object alive */
3689 	vm_object_reference(fourk_object);
3690 
3691 	/* create a "copy" object, to map the "4K" object copy-on-write */
3692 	fourk_copy = TRUE;
3693 	result = vm_object_copy_strategically(fourk_object,
3694 	    0,
3695 	    end - start,
3696 	    &copy_object,
3697 	    &copy_offset,
3698 	    &fourk_copy);
3699 	assert(result == KERN_SUCCESS);
3700 	assert(copy_object != VM_OBJECT_NULL);
3701 	assert(copy_offset == 0);
3702 
3703 	/* map the "4K" pager's copy object */
3704 	new_entry =
3705 	    vm_map_entry_insert(map, entry,
3706 	    vm_map_trunc_page(start,
3707 	    VM_MAP_PAGE_MASK(map)),
3708 	    vm_map_round_page(end,
3709 	    VM_MAP_PAGE_MASK(map)),
3710 	    copy_object,
3711 	    0,                         /* offset */
3712 	    vmk_flags,
3713 	    FALSE,                         /* needs_copy */
3714 	    FALSE,
3715 	    FALSE,
3716 	    cur_protection, max_protection,
3717 	    VM_BEHAVIOR_DEFAULT,
3718 	    (entry_for_jit && !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map) ?
3719 	    VM_INHERIT_NONE : inheritance),
3720 	    0,
3721 	    no_cache,
3722 	    permanent,
3723 	    no_copy_on_read,
3724 	    superpage_size,
3725 	    clear_map_aligned,
3726 	    is_submap,
3727 	    FALSE,                         /* jit */
3728 	    alias,
3729 	    translated_allow_execute);
3730 	entry = new_entry;
3731 
3732 #if VM_MAP_DEBUG_FOURK
3733 	if (vm_map_debug_fourk) {
3734 		printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3735 		    map,
3736 		    (uint64_t) entry->vme_start,
3737 		    (uint64_t) entry->vme_end,
3738 		    fourk_mem_obj);
3739 	}
3740 #endif /* VM_MAP_DEBUG_FOURK */
3741 
3742 	new_mapping_established = TRUE;
3743 
3744 map_in_fourk_pager:
3745 	/* "map" the original "object" where it belongs in the "4K" pager */
3746 	fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3747 	fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3748 	if (fourk_size > SIXTEENK_PAGE_SIZE) {
3749 		fourk_pager_index_num = 4;
3750 	} else {
3751 		fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3752 	}
3753 	if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3754 		fourk_pager_index_num = 4 - fourk_pager_index_start;
3755 	}
3756 	for (cur_idx = 0;
3757 	    cur_idx < fourk_pager_index_num;
3758 	    cur_idx++) {
3759 		vm_object_t             old_object;
3760 		vm_object_offset_t      old_offset;
3761 
3762 		kr = fourk_pager_populate(fourk_mem_obj,
3763 		    TRUE,                       /* overwrite */
3764 		    fourk_pager_index_start + cur_idx,
3765 		    object,
3766 		    (object
3767 		    ? (offset +
3768 		    (cur_idx * FOURK_PAGE_SIZE))
3769 		    : 0),
3770 		    &old_object,
3771 		    &old_offset);
3772 #if VM_MAP_DEBUG_FOURK
3773 		if (vm_map_debug_fourk) {
3774 			if (old_object == (vm_object_t) -1 &&
3775 			    old_offset == (vm_object_offset_t) -1) {
3776 				printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3777 				    "pager [%p:0x%llx] "
3778 				    "populate[%d] "
3779 				    "[object:%p,offset:0x%llx]\n",
3780 				    map,
3781 				    (uint64_t) entry->vme_start,
3782 				    (uint64_t) entry->vme_end,
3783 				    fourk_mem_obj,
3784 				    VME_OFFSET(entry),
3785 				    fourk_pager_index_start + cur_idx,
3786 				    object,
3787 				    (object
3788 				    ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3789 				    : 0));
3790 			} else {
3791 				printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3792 				    "pager [%p:0x%llx] "
3793 				    "populate[%d] [object:%p,offset:0x%llx] "
3794 				    "old [%p:0x%llx]\n",
3795 				    map,
3796 				    (uint64_t) entry->vme_start,
3797 				    (uint64_t) entry->vme_end,
3798 				    fourk_mem_obj,
3799 				    VME_OFFSET(entry),
3800 				    fourk_pager_index_start + cur_idx,
3801 				    object,
3802 				    (object
3803 				    ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3804 				    : 0),
3805 				    old_object,
3806 				    old_offset);
3807 			}
3808 		}
3809 #endif /* VM_MAP_DEBUG_FOURK */
3810 
3811 		assert(kr == KERN_SUCCESS);
3812 		if (object != old_object &&
3813 		    object != VM_OBJECT_NULL &&
3814 		    object != (vm_object_t) -1) {
3815 			vm_object_reference(object);
3816 		}
3817 		if (object != old_object &&
3818 		    old_object != VM_OBJECT_NULL &&
3819 		    old_object != (vm_object_t) -1) {
3820 			vm_object_deallocate(old_object);
3821 		}
3822 	}
3823 
3824 BailOut:
3825 	assert(map_locked == TRUE);
3826 
3827 	if (result == KERN_SUCCESS) {
3828 		vm_prot_t pager_prot;
3829 		memory_object_t pager;
3830 
3831 #if DEBUG
3832 		if (pmap_empty &&
3833 		    !(vmk_flags.vmkf_no_pmap_check)) {
3834 			assert(pmap_is_empty(map->pmap,
3835 			    *address,
3836 			    *address + size));
3837 		}
3838 #endif /* DEBUG */
3839 
3840 		/*
3841 		 * For "named" VM objects, let the pager know that the
3842 		 * memory object is being mapped.  Some pagers need to keep
3843 		 * track of this, to know when they can reclaim the memory
3844 		 * object, for example.
3845 		 * VM calls memory_object_map() for each mapping (specifying
3846 		 * the protection of each mapping) and calls
3847 		 * memory_object_last_unmap() when all the mappings are gone.
3848 		 */
3849 		pager_prot = max_protection;
3850 		if (needs_copy) {
3851 			/*
3852 			 * Copy-On-Write mapping: won't modify
3853 			 * the memory object.
3854 			 */
3855 			pager_prot &= ~VM_PROT_WRITE;
3856 		}
3857 		if (!is_submap &&
3858 		    object != VM_OBJECT_NULL &&
3859 		    object->named &&
3860 		    object->pager != MEMORY_OBJECT_NULL) {
3861 			vm_object_lock(object);
3862 			pager = object->pager;
3863 			if (object->named &&
3864 			    pager != MEMORY_OBJECT_NULL) {
3865 				assert(object->pager_ready);
3866 				vm_object_mapping_wait(object, THREAD_UNINT);
3867 				vm_object_mapping_begin(object);
3868 				vm_object_unlock(object);
3869 
3870 				kr = memory_object_map(pager, pager_prot);
3871 				assert(kr == KERN_SUCCESS);
3872 
3873 				vm_object_lock(object);
3874 				vm_object_mapping_end(object);
3875 			}
3876 			vm_object_unlock(object);
3877 		}
3878 		if (!is_submap &&
3879 		    fourk_object != VM_OBJECT_NULL &&
3880 		    fourk_object->named &&
3881 		    fourk_object->pager != MEMORY_OBJECT_NULL) {
3882 			vm_object_lock(fourk_object);
3883 			pager = fourk_object->pager;
3884 			if (fourk_object->named &&
3885 			    pager != MEMORY_OBJECT_NULL) {
3886 				assert(fourk_object->pager_ready);
3887 				vm_object_mapping_wait(fourk_object,
3888 				    THREAD_UNINT);
3889 				vm_object_mapping_begin(fourk_object);
3890 				vm_object_unlock(fourk_object);
3891 
3892 				kr = memory_object_map(pager, VM_PROT_READ);
3893 				assert(kr == KERN_SUCCESS);
3894 
3895 				vm_object_lock(fourk_object);
3896 				vm_object_mapping_end(fourk_object);
3897 			}
3898 			vm_object_unlock(fourk_object);
3899 		}
3900 	}
3901 
3902 	if (fourk_object != VM_OBJECT_NULL) {
3903 		vm_object_deallocate(fourk_object);
3904 		fourk_object = VM_OBJECT_NULL;
3905 		memory_object_deallocate(fourk_mem_obj);
3906 		fourk_mem_obj = MEMORY_OBJECT_NULL;
3907 	}
3908 
3909 	assert(map_locked == TRUE);
3910 
3911 	if (!keep_map_locked) {
3912 		vm_map_unlock(map);
3913 		map_locked = FALSE;
3914 	}
3915 
3916 	/*
3917 	 * We can't hold the map lock if we enter this block.
3918 	 */
3919 
3920 	if (result == KERN_SUCCESS) {
3921 		/*	Wire down the new entry if the user
3922 		 *	requested all new map entries be wired.
3923 		 */
3924 		if ((map->wiring_required) || (superpage_size)) {
3925 			assert(!keep_map_locked);
3926 			pmap_empty = FALSE; /* pmap won't be empty */
3927 			kr = vm_map_wire_kernel(map, start, end,
3928 			    new_entry->protection, VM_KERN_MEMORY_MLOCK,
3929 			    TRUE);
3930 			result = kr;
3931 		}
3932 
3933 	}
3934 
3935 	if (result != KERN_SUCCESS) {
3936 		if (new_mapping_established) {
3937 			/*
3938 			 * We have to get rid of the new mappings since we
3939 			 * won't make them available to the user.
3940 			 * Try and do that atomically, to minimize the risk
3941 			 * that someone else create new mappings that range.
3942 			 */
3943 			zap_new_map = vm_map_create(PMAP_NULL,
3944 			    *address,
3945 			    *address + size,
3946 			    map->hdr.entries_pageable);
3947 			vm_map_set_page_shift(zap_new_map,
3948 			    VM_MAP_PAGE_SHIFT(map));
3949 			vm_map_disable_hole_optimization(zap_new_map);
3950 
3951 			if (!map_locked) {
3952 				vm_map_lock(map);
3953 				map_locked = TRUE;
3954 			}
3955 			(void) vm_map_delete(map, *address, *address + size,
3956 			    (VM_MAP_REMOVE_SAVE_ENTRIES |
3957 			    VM_MAP_REMOVE_NO_MAP_ALIGN),
3958 			    zap_new_map);
3959 		}
3960 		if (zap_old_map != VM_MAP_NULL &&
3961 		    zap_old_map->hdr.nentries != 0) {
3962 			vm_map_entry_t  entry1, entry2;
3963 
3964 			/*
3965 			 * The new mapping failed.  Attempt to restore
3966 			 * the old mappings, saved in the "zap_old_map".
3967 			 */
3968 			if (!map_locked) {
3969 				vm_map_lock(map);
3970 				map_locked = TRUE;
3971 			}
3972 
3973 			/* first check if the coast is still clear */
3974 			start = vm_map_first_entry(zap_old_map)->vme_start;
3975 			end = vm_map_last_entry(zap_old_map)->vme_end;
3976 			if (vm_map_lookup_entry(map, start, &entry1) ||
3977 			    vm_map_lookup_entry(map, end, &entry2) ||
3978 			    entry1 != entry2) {
3979 				/*
3980 				 * Part of that range has already been
3981 				 * re-mapped:  we can't restore the old
3982 				 * mappings...
3983 				 */
3984 				vm_map_enter_restore_failures++;
3985 			} else {
3986 				/*
3987 				 * Transfer the saved map entries from
3988 				 * "zap_old_map" to the original "map",
3989 				 * inserting them all after "entry1".
3990 				 */
3991 				for (entry2 = vm_map_first_entry(zap_old_map);
3992 				    entry2 != vm_map_to_entry(zap_old_map);
3993 				    entry2 = vm_map_first_entry(zap_old_map)) {
3994 					vm_map_size_t entry_size;
3995 
3996 					entry_size = (entry2->vme_end -
3997 					    entry2->vme_start);
3998 					vm_map_store_entry_unlink(zap_old_map,
3999 					    entry2);
4000 					zap_old_map->size -= entry_size;
4001 					vm_map_store_entry_link(map, entry1, entry2,
4002 					    VM_MAP_KERNEL_FLAGS_NONE);
4003 					map->size += entry_size;
4004 					entry1 = entry2;
4005 				}
4006 				if (map->wiring_required) {
4007 					/*
4008 					 * XXX TODO: we should rewire the
4009 					 * old pages here...
4010 					 */
4011 				}
4012 				vm_map_enter_restore_successes++;
4013 			}
4014 		}
4015 	}
4016 
4017 	/*
4018 	 * The caller is responsible for releasing the lock if it requested to
4019 	 * keep the map locked.
4020 	 */
4021 	if (map_locked && !keep_map_locked) {
4022 		vm_map_unlock(map);
4023 	}
4024 
4025 	/*
4026 	 * Get rid of the "zap_maps" and all the map entries that
4027 	 * they may still contain.
4028 	 */
4029 	if (zap_old_map != VM_MAP_NULL) {
4030 		vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
4031 		zap_old_map = VM_MAP_NULL;
4032 	}
4033 	if (zap_new_map != VM_MAP_NULL) {
4034 		vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
4035 		zap_new_map = VM_MAP_NULL;
4036 	}
4037 
4038 	return result;
4039 
4040 #undef  RETURN
4041 }
4042 #endif /* __arm64__ */
4043 
4044 /*
4045  * Counters for the prefault optimization.
4046  */
4047 int64_t vm_prefault_nb_pages = 0;
4048 int64_t vm_prefault_nb_bailout = 0;
4049 
4050 static kern_return_t
vm_map_enter_mem_object_helper(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,ipc_port_t port,vm_object_offset_t offset,boolean_t copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance,upl_page_list_ptr_t page_list,unsigned int page_list_count)4051 vm_map_enter_mem_object_helper(
4052 	vm_map_t                target_map,
4053 	vm_map_offset_t         *address,
4054 	vm_map_size_t           initial_size,
4055 	vm_map_offset_t         mask,
4056 	int                     flags,
4057 	vm_map_kernel_flags_t   vmk_flags,
4058 	vm_tag_t                tag,
4059 	ipc_port_t              port,
4060 	vm_object_offset_t      offset,
4061 	boolean_t               copy,
4062 	vm_prot_t               cur_protection,
4063 	vm_prot_t               max_protection,
4064 	vm_inherit_t            inheritance,
4065 	upl_page_list_ptr_t     page_list,
4066 	unsigned int            page_list_count)
4067 {
4068 	vm_map_address_t        map_addr;
4069 	vm_map_size_t           map_size;
4070 	vm_object_t             object;
4071 	vm_object_size_t        size;
4072 	kern_return_t           result;
4073 	boolean_t               mask_cur_protection, mask_max_protection;
4074 	boolean_t               kernel_prefault, try_prefault = (page_list_count != 0);
4075 	vm_map_offset_t         offset_in_mapping = 0;
4076 #if __arm64__
4077 	boolean_t               fourk = vmk_flags.vmkf_fourk;
4078 #endif /* __arm64__ */
4079 
4080 	if (VM_MAP_PAGE_SHIFT(target_map) < PAGE_SHIFT) {
4081 		/* XXX TODO4K prefaulting depends on page size... */
4082 		try_prefault = FALSE;
4083 	}
4084 
4085 	assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
4086 
4087 	mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
4088 	mask_max_protection = max_protection & VM_PROT_IS_MASK;
4089 	cur_protection &= ~VM_PROT_IS_MASK;
4090 	max_protection &= ~VM_PROT_IS_MASK;
4091 
4092 	/*
4093 	 * Check arguments for validity
4094 	 */
4095 	if ((target_map == VM_MAP_NULL) ||
4096 	    (cur_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4097 	    (max_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4098 	    (inheritance > VM_INHERIT_LAST_VALID) ||
4099 	    (try_prefault && (copy || !page_list)) ||
4100 	    initial_size == 0) {
4101 		return KERN_INVALID_ARGUMENT;
4102 	}
4103 
4104 #if __arm64__
4105 	if (fourk && VM_MAP_PAGE_SHIFT(target_map) < PAGE_SHIFT) {
4106 		/* no "fourk" if map is using a sub-page page size */
4107 		fourk = FALSE;
4108 	}
4109 	if (fourk) {
4110 		map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
4111 		map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
4112 	} else
4113 #endif /* __arm64__ */
4114 	{
4115 		map_addr = vm_map_trunc_page(*address,
4116 		    VM_MAP_PAGE_MASK(target_map));
4117 		map_size = vm_map_round_page(initial_size,
4118 		    VM_MAP_PAGE_MASK(target_map));
4119 	}
4120 	size = vm_object_round_page(initial_size);
4121 
4122 	/*
4123 	 * Find the vm object (if any) corresponding to this port.
4124 	 */
4125 	if (!IP_VALID(port)) {
4126 		object = VM_OBJECT_NULL;
4127 		offset = 0;
4128 		copy = FALSE;
4129 	} else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
4130 		vm_named_entry_t        named_entry;
4131 		vm_object_offset_t      data_offset;
4132 
4133 		named_entry = mach_memory_entry_from_port(port);
4134 
4135 		if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4136 		    VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4137 			data_offset = named_entry->data_offset;
4138 			offset += named_entry->data_offset;
4139 		} else {
4140 			data_offset = 0;
4141 		}
4142 
4143 		/* a few checks to make sure user is obeying rules */
4144 		if (size == 0) {
4145 			if (offset >= named_entry->size) {
4146 				return KERN_INVALID_RIGHT;
4147 			}
4148 			size = named_entry->size - offset;
4149 		}
4150 		if (mask_max_protection) {
4151 			max_protection &= named_entry->protection;
4152 		}
4153 		if (mask_cur_protection) {
4154 			cur_protection &= named_entry->protection;
4155 		}
4156 		if ((named_entry->protection & max_protection) !=
4157 		    max_protection) {
4158 			return KERN_INVALID_RIGHT;
4159 		}
4160 		if ((named_entry->protection & cur_protection) !=
4161 		    cur_protection) {
4162 			return KERN_INVALID_RIGHT;
4163 		}
4164 		if (offset + size < offset) {
4165 			/* overflow */
4166 			return KERN_INVALID_ARGUMENT;
4167 		}
4168 		if (named_entry->size < (offset + initial_size)) {
4169 			return KERN_INVALID_ARGUMENT;
4170 		}
4171 
4172 		if (named_entry->is_copy) {
4173 			/* for a vm_map_copy, we can only map it whole */
4174 			if ((size != named_entry->size) &&
4175 			    (vm_map_round_page(size,
4176 			    VM_MAP_PAGE_MASK(target_map)) ==
4177 			    named_entry->size)) {
4178 				/* XXX FBDP use the rounded size... */
4179 				size = vm_map_round_page(
4180 					size,
4181 					VM_MAP_PAGE_MASK(target_map));
4182 			}
4183 		}
4184 
4185 		/* the callers parameter offset is defined to be the */
4186 		/* offset from beginning of named entry offset in object */
4187 		offset = offset + named_entry->offset;
4188 
4189 		if (!VM_MAP_PAGE_ALIGNED(size,
4190 		    VM_MAP_PAGE_MASK(target_map))) {
4191 			/*
4192 			 * Let's not map more than requested;
4193 			 * vm_map_enter() will handle this "not map-aligned"
4194 			 * case.
4195 			 */
4196 			map_size = size;
4197 		}
4198 
4199 		named_entry_lock(named_entry);
4200 		if (named_entry->is_sub_map) {
4201 			vm_map_t                submap;
4202 
4203 			if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4204 			    VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4205 				panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
4206 			}
4207 
4208 			submap = named_entry->backing.map;
4209 			vm_map_reference(submap);
4210 			named_entry_unlock(named_entry);
4211 
4212 			vmk_flags.vmkf_submap = TRUE;
4213 
4214 			result = vm_map_enter(target_map,
4215 			    &map_addr,
4216 			    map_size,
4217 			    mask,
4218 			    flags,
4219 			    vmk_flags,
4220 			    tag,
4221 			    (vm_object_t)(uintptr_t) submap,
4222 			    offset,
4223 			    copy,
4224 			    cur_protection,
4225 			    max_protection,
4226 			    inheritance);
4227 			if (result != KERN_SUCCESS) {
4228 				vm_map_deallocate(submap);
4229 			} else {
4230 				/*
4231 				 * No need to lock "submap" just to check its
4232 				 * "mapped" flag: that flag is never reset
4233 				 * once it's been set and if we race, we'll
4234 				 * just end up setting it twice, which is OK.
4235 				 */
4236 				if (submap->mapped_in_other_pmaps == FALSE &&
4237 				    vm_map_pmap(submap) != PMAP_NULL &&
4238 				    vm_map_pmap(submap) !=
4239 				    vm_map_pmap(target_map)) {
4240 					/*
4241 					 * This submap is being mapped in a map
4242 					 * that uses a different pmap.
4243 					 * Set its "mapped_in_other_pmaps" flag
4244 					 * to indicate that we now need to
4245 					 * remove mappings from all pmaps rather
4246 					 * than just the submap's pmap.
4247 					 */
4248 					vm_map_lock(submap);
4249 					submap->mapped_in_other_pmaps = TRUE;
4250 					vm_map_unlock(submap);
4251 				}
4252 				*address = map_addr;
4253 			}
4254 			return result;
4255 		} else if (named_entry->is_copy) {
4256 			kern_return_t   kr;
4257 			vm_map_copy_t   copy_map;
4258 			vm_map_entry_t  copy_entry;
4259 			vm_map_offset_t copy_addr;
4260 			vm_map_copy_t   target_copy_map;
4261 			vm_map_offset_t overmap_start, overmap_end;
4262 			vm_map_offset_t trimmed_start;
4263 			vm_map_size_t   target_size;
4264 
4265 			if (flags & ~(VM_FLAGS_FIXED |
4266 			    VM_FLAGS_ANYWHERE |
4267 			    VM_FLAGS_OVERWRITE |
4268 			    VM_FLAGS_RETURN_4K_DATA_ADDR |
4269 			    VM_FLAGS_RETURN_DATA_ADDR |
4270 			    VM_FLAGS_ALIAS_MASK)) {
4271 				named_entry_unlock(named_entry);
4272 				return KERN_INVALID_ARGUMENT;
4273 			}
4274 
4275 			copy_map = named_entry->backing.copy;
4276 			assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
4277 			if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
4278 				/* unsupported type; should not happen */
4279 				printf("vm_map_enter_mem_object: "
4280 				    "memory_entry->backing.copy "
4281 				    "unsupported type 0x%x\n",
4282 				    copy_map->type);
4283 				named_entry_unlock(named_entry);
4284 				return KERN_INVALID_ARGUMENT;
4285 			}
4286 
4287 			if (VM_MAP_PAGE_SHIFT(target_map) != copy_map->cpy_hdr.page_shift) {
4288 				DEBUG4K_SHARE("copy_map %p offset %llx size 0x%llx pgshift %d -> target_map %p pgshift %d\n", copy_map, offset, (uint64_t)map_size, copy_map->cpy_hdr.page_shift, target_map, VM_MAP_PAGE_SHIFT(target_map));
4289 			}
4290 
4291 			if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4292 			    VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4293 				offset_in_mapping = offset & VM_MAP_PAGE_MASK(target_map);
4294 				if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4295 					offset_in_mapping &= ~((signed)(0xFFF));
4296 				}
4297 			}
4298 
4299 			target_copy_map = VM_MAP_COPY_NULL;
4300 			target_size = copy_map->size;
4301 			overmap_start = 0;
4302 			overmap_end = 0;
4303 			trimmed_start = 0;
4304 			if (copy_map->cpy_hdr.page_shift != VM_MAP_PAGE_SHIFT(target_map)) {
4305 				DEBUG4K_ADJUST("adjusting...\n");
4306 				kr = vm_map_copy_adjust_to_target(
4307 					copy_map,
4308 					offset /* includes data_offset */,
4309 					initial_size,
4310 					target_map,
4311 					copy,
4312 					&target_copy_map,
4313 					&overmap_start,
4314 					&overmap_end,
4315 					&trimmed_start);
4316 				if (kr != KERN_SUCCESS) {
4317 					named_entry_unlock(named_entry);
4318 					return kr;
4319 				}
4320 				target_size = target_copy_map->size;
4321 				if (trimmed_start >= data_offset) {
4322 					data_offset = offset & VM_MAP_PAGE_MASK(target_map);
4323 				} else {
4324 					data_offset -= trimmed_start;
4325 				}
4326 			} else {
4327 				target_copy_map = copy_map;
4328 			}
4329 
4330 			/* reserve a contiguous range */
4331 			kr = vm_map_enter(target_map,
4332 			    &map_addr,
4333 			    vm_map_round_page(target_size, VM_MAP_PAGE_MASK(target_map)),
4334 			    mask,
4335 			    flags & (VM_FLAGS_ANYWHERE |
4336 			    VM_FLAGS_OVERWRITE |
4337 			    VM_FLAGS_RETURN_4K_DATA_ADDR |
4338 			    VM_FLAGS_RETURN_DATA_ADDR),
4339 			    vmk_flags,
4340 			    tag,
4341 			    VM_OBJECT_NULL,
4342 			    0,
4343 			    FALSE,               /* copy */
4344 			    cur_protection,
4345 			    max_protection,
4346 			    inheritance);
4347 			if (kr != KERN_SUCCESS) {
4348 				DEBUG4K_ERROR("kr 0x%x\n", kr);
4349 				if (target_copy_map != copy_map) {
4350 					vm_map_copy_discard(target_copy_map);
4351 					target_copy_map = VM_MAP_COPY_NULL;
4352 				}
4353 				named_entry_unlock(named_entry);
4354 				return kr;
4355 			}
4356 
4357 			copy_addr = map_addr;
4358 
4359 			for (copy_entry = vm_map_copy_first_entry(target_copy_map);
4360 			    copy_entry != vm_map_copy_to_entry(target_copy_map);
4361 			    copy_entry = copy_entry->vme_next) {
4362 				int                     remap_flags;
4363 				vm_map_kernel_flags_t   vmk_remap_flags;
4364 				vm_map_t                copy_submap;
4365 				vm_object_t             copy_object;
4366 				vm_map_size_t           copy_size;
4367 				vm_object_offset_t      copy_offset;
4368 				int                     copy_vm_alias;
4369 
4370 				remap_flags = 0;
4371 				vmk_remap_flags = VM_MAP_KERNEL_FLAGS_NONE;
4372 
4373 				copy_object = VME_OBJECT(copy_entry);
4374 				copy_offset = VME_OFFSET(copy_entry);
4375 				copy_size = (copy_entry->vme_end -
4376 				    copy_entry->vme_start);
4377 				VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
4378 				if (copy_vm_alias == 0) {
4379 					/*
4380 					 * Caller does not want a specific
4381 					 * alias for this new mapping:  use
4382 					 * the alias of the original mapping.
4383 					 */
4384 					copy_vm_alias = VME_ALIAS(copy_entry);
4385 				}
4386 
4387 				/* sanity check */
4388 				if ((copy_addr + copy_size) >
4389 				    (map_addr +
4390 				    overmap_start + overmap_end +
4391 				    named_entry->size /* XXX full size */)) {
4392 					/* over-mapping too much !? */
4393 					kr = KERN_INVALID_ARGUMENT;
4394 					DEBUG4K_ERROR("kr 0x%x\n", kr);
4395 					/* abort */
4396 					break;
4397 				}
4398 
4399 				/* take a reference on the object */
4400 				if (copy_entry->is_sub_map) {
4401 					vmk_remap_flags.vmkf_submap = TRUE;
4402 					copy_submap = VME_SUBMAP(copy_entry);
4403 					vm_map_lock(copy_submap);
4404 					vm_map_reference(copy_submap);
4405 					vm_map_unlock(copy_submap);
4406 					copy_object = (vm_object_t)(uintptr_t) copy_submap;
4407 				} else if (!copy &&
4408 				    copy_object != VM_OBJECT_NULL &&
4409 				    (copy_entry->needs_copy ||
4410 				    copy_object->shadowed ||
4411 				    (!copy_object->true_share &&
4412 				    !copy_entry->is_shared &&
4413 				    copy_object->vo_size > copy_size))) {
4414 					/*
4415 					 * We need to resolve our side of this
4416 					 * "symmetric" copy-on-write now; we
4417 					 * need a new object to map and share,
4418 					 * instead of the current one which
4419 					 * might still be shared with the
4420 					 * original mapping.
4421 					 *
4422 					 * Note: A "vm_map_copy_t" does not
4423 					 * have a lock but we're protected by
4424 					 * the named entry's lock here.
4425 					 */
4426 					// assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4427 					VME_OBJECT_SHADOW(copy_entry, copy_size);
4428 					if (!copy_entry->needs_copy &&
4429 					    copy_entry->protection & VM_PROT_WRITE) {
4430 						vm_prot_t prot;
4431 
4432 						prot = copy_entry->protection & ~VM_PROT_WRITE;
4433 						vm_object_pmap_protect(copy_object,
4434 						    copy_offset,
4435 						    copy_size,
4436 						    PMAP_NULL,
4437 						    PAGE_SIZE,
4438 						    0,
4439 						    prot);
4440 					}
4441 
4442 					copy_entry->needs_copy = FALSE;
4443 					copy_entry->is_shared = TRUE;
4444 					copy_object = VME_OBJECT(copy_entry);
4445 					copy_offset = VME_OFFSET(copy_entry);
4446 					vm_object_lock(copy_object);
4447 					vm_object_reference_locked(copy_object);
4448 					if (copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4449 						/* we're about to make a shared mapping of this object */
4450 						copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4451 						copy_object->true_share = TRUE;
4452 					}
4453 					vm_object_unlock(copy_object);
4454 				} else {
4455 					/*
4456 					 * We already have the right object
4457 					 * to map.
4458 					 */
4459 					copy_object = VME_OBJECT(copy_entry);
4460 					vm_object_reference(copy_object);
4461 				}
4462 
4463 				/* over-map the object into destination */
4464 				remap_flags |= flags;
4465 				remap_flags |= VM_FLAGS_FIXED;
4466 				remap_flags |= VM_FLAGS_OVERWRITE;
4467 				remap_flags &= ~VM_FLAGS_ANYWHERE;
4468 				if (!copy && !copy_entry->is_sub_map) {
4469 					/*
4470 					 * copy-on-write should have been
4471 					 * resolved at this point, or we would
4472 					 * end up sharing instead of copying.
4473 					 */
4474 					assert(!copy_entry->needs_copy);
4475 				}
4476 #if XNU_TARGET_OS_OSX
4477 				if (copy_entry->used_for_jit) {
4478 					vmk_remap_flags.vmkf_map_jit = TRUE;
4479 				}
4480 #endif /* XNU_TARGET_OS_OSX */
4481 
4482 				assertf((copy_vm_alias & VME_ALIAS_MASK) == copy_vm_alias,
4483 				    "VM Tag truncated from 0x%x to 0x%x\n", copy_vm_alias, (copy_vm_alias & VME_ALIAS_MASK));
4484 				kr = vm_map_enter(target_map,
4485 				    &copy_addr,
4486 				    copy_size,
4487 				    (vm_map_offset_t) 0,
4488 				    remap_flags,
4489 				    vmk_remap_flags,
4490 				    (vm_tag_t) copy_vm_alias, /* see comment at end of vm_fault_unwire re. cast*/
4491 				    copy_object,
4492 				    copy_offset,
4493 				    ((copy_object == NULL)
4494 				    ? FALSE
4495 				    : (copy || copy_entry->needs_copy)),
4496 				    cur_protection,
4497 				    max_protection,
4498 				    inheritance);
4499 				if (kr != KERN_SUCCESS) {
4500 					DEBUG4K_SHARE("failed kr 0x%x\n", kr);
4501 					if (copy_entry->is_sub_map) {
4502 						vm_map_deallocate(copy_submap);
4503 					} else {
4504 						vm_object_deallocate(copy_object);
4505 					}
4506 					/* abort */
4507 					break;
4508 				}
4509 
4510 				/* next mapping */
4511 				copy_addr += copy_size;
4512 			}
4513 
4514 			if (kr == KERN_SUCCESS) {
4515 				if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4516 				    VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4517 					*address = map_addr + offset_in_mapping;
4518 				} else {
4519 					*address = map_addr;
4520 				}
4521 				if (overmap_start) {
4522 					*address += overmap_start;
4523 					DEBUG4K_SHARE("map %p map_addr 0x%llx offset_in_mapping 0x%llx overmap_start 0x%llx -> *address 0x%llx\n", target_map, (uint64_t)map_addr, (uint64_t) offset_in_mapping, (uint64_t)overmap_start, (uint64_t)*address);
4524 				}
4525 			}
4526 			named_entry_unlock(named_entry);
4527 			if (target_copy_map != copy_map) {
4528 				vm_map_copy_discard(target_copy_map);
4529 				target_copy_map = VM_MAP_COPY_NULL;
4530 			}
4531 
4532 			if (kr != KERN_SUCCESS) {
4533 				if (!(flags & VM_FLAGS_OVERWRITE)) {
4534 					/* deallocate the contiguous range */
4535 					(void) vm_deallocate(target_map,
4536 					    map_addr,
4537 					    map_size);
4538 				}
4539 			}
4540 
4541 			return kr;
4542 		}
4543 
4544 		if (named_entry->is_object) {
4545 			unsigned int    access;
4546 			vm_prot_t       protections;
4547 			unsigned int    wimg_mode;
4548 
4549 			/* we are mapping a VM object */
4550 
4551 			protections = named_entry->protection & (VM_PROT_ALL | VM_PROT_ALLEXEC);
4552 			access = GET_MAP_MEM(named_entry->protection);
4553 
4554 			if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4555 			    VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4556 				offset_in_mapping = offset - VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(target_map));
4557 				if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4558 					offset_in_mapping &= ~((signed)(0xFFF));
4559 				}
4560 				offset = VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(target_map));
4561 				map_size = VM_MAP_ROUND_PAGE((offset + offset_in_mapping + initial_size) - offset, VM_MAP_PAGE_MASK(target_map));
4562 			}
4563 
4564 			object = vm_named_entry_to_vm_object(named_entry);
4565 			assert(object != VM_OBJECT_NULL);
4566 			vm_object_lock(object);
4567 			named_entry_unlock(named_entry);
4568 
4569 			vm_object_reference_locked(object);
4570 
4571 			wimg_mode = object->wimg_bits;
4572 			vm_prot_to_wimg(access, &wimg_mode);
4573 			if (object->wimg_bits != wimg_mode) {
4574 				vm_object_change_wimg_mode(object, wimg_mode);
4575 			}
4576 
4577 			vm_object_unlock(object);
4578 		} else {
4579 			panic("invalid VM named entry %p", named_entry);
4580 		}
4581 	} else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4582 		/*
4583 		 * JMM - This is temporary until we unify named entries
4584 		 * and raw memory objects.
4585 		 *
4586 		 * Detected fake ip_kotype for a memory object.  In
4587 		 * this case, the port isn't really a port at all, but
4588 		 * instead is just a raw memory object.
4589 		 */
4590 		if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4591 		    VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4592 			panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4593 		}
4594 
4595 		object = memory_object_to_vm_object((memory_object_t)port);
4596 		if (object == VM_OBJECT_NULL) {
4597 			return KERN_INVALID_OBJECT;
4598 		}
4599 		vm_object_reference(object);
4600 
4601 		/* wait for object (if any) to be ready */
4602 		if (object != VM_OBJECT_NULL) {
4603 			if (object == kernel_object) {
4604 				printf("Warning: Attempt to map kernel object"
4605 				    " by a non-private kernel entity\n");
4606 				return KERN_INVALID_OBJECT;
4607 			}
4608 			if (!object->pager_ready) {
4609 				vm_object_lock(object);
4610 
4611 				while (!object->pager_ready) {
4612 					vm_object_wait(object,
4613 					    VM_OBJECT_EVENT_PAGER_READY,
4614 					    THREAD_UNINT);
4615 					vm_object_lock(object);
4616 				}
4617 				vm_object_unlock(object);
4618 			}
4619 		}
4620 	} else {
4621 		return KERN_INVALID_OBJECT;
4622 	}
4623 
4624 	if (object != VM_OBJECT_NULL &&
4625 	    object->named &&
4626 	    object->pager != MEMORY_OBJECT_NULL &&
4627 	    object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4628 		memory_object_t pager;
4629 		vm_prot_t       pager_prot;
4630 		kern_return_t   kr;
4631 
4632 		/*
4633 		 * For "named" VM objects, let the pager know that the
4634 		 * memory object is being mapped.  Some pagers need to keep
4635 		 * track of this, to know when they can reclaim the memory
4636 		 * object, for example.
4637 		 * VM calls memory_object_map() for each mapping (specifying
4638 		 * the protection of each mapping) and calls
4639 		 * memory_object_last_unmap() when all the mappings are gone.
4640 		 */
4641 		pager_prot = max_protection;
4642 		if (copy) {
4643 			/*
4644 			 * Copy-On-Write mapping: won't modify the
4645 			 * memory object.
4646 			 */
4647 			pager_prot &= ~VM_PROT_WRITE;
4648 		}
4649 		vm_object_lock(object);
4650 		pager = object->pager;
4651 		if (object->named &&
4652 		    pager != MEMORY_OBJECT_NULL &&
4653 		    object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4654 			assert(object->pager_ready);
4655 			vm_object_mapping_wait(object, THREAD_UNINT);
4656 			vm_object_mapping_begin(object);
4657 			vm_object_unlock(object);
4658 
4659 			kr = memory_object_map(pager, pager_prot);
4660 			assert(kr == KERN_SUCCESS);
4661 
4662 			vm_object_lock(object);
4663 			vm_object_mapping_end(object);
4664 		}
4665 		vm_object_unlock(object);
4666 	}
4667 
4668 	/*
4669 	 *	Perform the copy if requested
4670 	 */
4671 
4672 	if (copy) {
4673 		vm_object_t             new_object;
4674 		vm_object_offset_t      new_offset;
4675 
4676 		result = vm_object_copy_strategically(object, offset,
4677 		    map_size,
4678 		    &new_object, &new_offset,
4679 		    &copy);
4680 
4681 
4682 		if (result == KERN_MEMORY_RESTART_COPY) {
4683 			boolean_t success;
4684 			boolean_t src_needs_copy;
4685 
4686 			/*
4687 			 * XXX
4688 			 * We currently ignore src_needs_copy.
4689 			 * This really is the issue of how to make
4690 			 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4691 			 * non-kernel users to use. Solution forthcoming.
4692 			 * In the meantime, since we don't allow non-kernel
4693 			 * memory managers to specify symmetric copy,
4694 			 * we won't run into problems here.
4695 			 */
4696 			new_object = object;
4697 			new_offset = offset;
4698 			success = vm_object_copy_quickly(&new_object,
4699 			    new_offset,
4700 			    map_size,
4701 			    &src_needs_copy,
4702 			    &copy);
4703 			assert(success);
4704 			result = KERN_SUCCESS;
4705 		}
4706 		/*
4707 		 *	Throw away the reference to the
4708 		 *	original object, as it won't be mapped.
4709 		 */
4710 
4711 		vm_object_deallocate(object);
4712 
4713 		if (result != KERN_SUCCESS) {
4714 			return result;
4715 		}
4716 
4717 		object = new_object;
4718 		offset = new_offset;
4719 	}
4720 
4721 	/*
4722 	 * If non-kernel users want to try to prefault pages, the mapping and prefault
4723 	 * needs to be atomic.
4724 	 */
4725 	kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4726 	vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4727 
4728 #if __arm64__
4729 	if (fourk) {
4730 		/* map this object in a "4K" pager */
4731 		result = vm_map_enter_fourk(target_map,
4732 		    &map_addr,
4733 		    map_size,
4734 		    (vm_map_offset_t) mask,
4735 		    flags,
4736 		    vmk_flags,
4737 		    tag,
4738 		    object,
4739 		    offset,
4740 		    copy,
4741 		    cur_protection,
4742 		    max_protection,
4743 		    inheritance);
4744 	} else
4745 #endif /* __arm64__ */
4746 	{
4747 		result = vm_map_enter(target_map,
4748 		    &map_addr, map_size,
4749 		    (vm_map_offset_t)mask,
4750 		    flags,
4751 		    vmk_flags,
4752 		    tag,
4753 		    object, offset,
4754 		    copy,
4755 		    cur_protection, max_protection,
4756 		    inheritance);
4757 	}
4758 	if (result != KERN_SUCCESS) {
4759 		vm_object_deallocate(object);
4760 	}
4761 
4762 	/*
4763 	 * Try to prefault, and do not forget to release the vm map lock.
4764 	 */
4765 	if (result == KERN_SUCCESS && try_prefault) {
4766 		mach_vm_address_t va = map_addr;
4767 		kern_return_t kr = KERN_SUCCESS;
4768 		unsigned int i = 0;
4769 		int pmap_options;
4770 
4771 		pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
4772 		if (object->internal) {
4773 			pmap_options |= PMAP_OPTIONS_INTERNAL;
4774 		}
4775 
4776 		for (i = 0; i < page_list_count; ++i) {
4777 			if (!UPL_VALID_PAGE(page_list, i)) {
4778 				if (kernel_prefault) {
4779 					assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4780 					result = KERN_MEMORY_ERROR;
4781 					break;
4782 				}
4783 			} else {
4784 				/*
4785 				 * If this function call failed, we should stop
4786 				 * trying to optimize, other calls are likely
4787 				 * going to fail too.
4788 				 *
4789 				 * We are not gonna report an error for such
4790 				 * failure though. That's an optimization, not
4791 				 * something critical.
4792 				 */
4793 				kr = pmap_enter_options(target_map->pmap,
4794 				    va, UPL_PHYS_PAGE(page_list, i),
4795 				    cur_protection, VM_PROT_NONE,
4796 				    0, TRUE, pmap_options, NULL);
4797 				if (kr != KERN_SUCCESS) {
4798 					OSIncrementAtomic64(&vm_prefault_nb_bailout);
4799 					if (kernel_prefault) {
4800 						result = kr;
4801 					}
4802 					break;
4803 				}
4804 				OSIncrementAtomic64(&vm_prefault_nb_pages);
4805 			}
4806 
4807 			/* Next virtual address */
4808 			va += PAGE_SIZE;
4809 		}
4810 		if (vmk_flags.vmkf_keep_map_locked) {
4811 			vm_map_unlock(target_map);
4812 		}
4813 	}
4814 
4815 	if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4816 	    VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4817 		*address = map_addr + offset_in_mapping;
4818 	} else {
4819 		*address = map_addr;
4820 	}
4821 	return result;
4822 }
4823 
4824 kern_return_t
vm_map_enter_mem_object(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,ipc_port_t port,vm_object_offset_t offset,boolean_t copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)4825 vm_map_enter_mem_object(
4826 	vm_map_t                target_map,
4827 	vm_map_offset_t         *address,
4828 	vm_map_size_t           initial_size,
4829 	vm_map_offset_t         mask,
4830 	int                     flags,
4831 	vm_map_kernel_flags_t   vmk_flags,
4832 	vm_tag_t                tag,
4833 	ipc_port_t              port,
4834 	vm_object_offset_t      offset,
4835 	boolean_t               copy,
4836 	vm_prot_t               cur_protection,
4837 	vm_prot_t               max_protection,
4838 	vm_inherit_t            inheritance)
4839 {
4840 	kern_return_t ret;
4841 
4842 	ret = vm_map_enter_mem_object_helper(target_map,
4843 	    address,
4844 	    initial_size,
4845 	    mask,
4846 	    flags,
4847 	    vmk_flags,
4848 	    tag,
4849 	    port,
4850 	    offset,
4851 	    copy,
4852 	    cur_protection,
4853 	    max_protection,
4854 	    inheritance,
4855 	    NULL,
4856 	    0);
4857 
4858 #if KASAN
4859 	if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4860 		kasan_notify_address(*address, initial_size);
4861 	}
4862 #endif
4863 
4864 	return ret;
4865 }
4866 
4867 kern_return_t
vm_map_enter_mem_object_prefault(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,ipc_port_t port,vm_object_offset_t offset,vm_prot_t cur_protection,vm_prot_t max_protection,upl_page_list_ptr_t page_list,unsigned int page_list_count)4868 vm_map_enter_mem_object_prefault(
4869 	vm_map_t                target_map,
4870 	vm_map_offset_t         *address,
4871 	vm_map_size_t           initial_size,
4872 	vm_map_offset_t         mask,
4873 	int                     flags,
4874 	vm_map_kernel_flags_t   vmk_flags,
4875 	vm_tag_t                tag,
4876 	ipc_port_t              port,
4877 	vm_object_offset_t      offset,
4878 	vm_prot_t               cur_protection,
4879 	vm_prot_t               max_protection,
4880 	upl_page_list_ptr_t     page_list,
4881 	unsigned int            page_list_count)
4882 {
4883 	kern_return_t ret;
4884 
4885 	ret = vm_map_enter_mem_object_helper(target_map,
4886 	    address,
4887 	    initial_size,
4888 	    mask,
4889 	    flags,
4890 	    vmk_flags,
4891 	    tag,
4892 	    port,
4893 	    offset,
4894 	    FALSE,
4895 	    cur_protection,
4896 	    max_protection,
4897 	    VM_INHERIT_DEFAULT,
4898 	    page_list,
4899 	    page_list_count);
4900 
4901 #if KASAN
4902 	if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4903 		kasan_notify_address(*address, initial_size);
4904 	}
4905 #endif
4906 
4907 	return ret;
4908 }
4909 
4910 
4911 kern_return_t
vm_map_enter_mem_object_control(vm_map_t target_map,vm_map_offset_t * address,vm_map_size_t initial_size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,memory_object_control_t control,vm_object_offset_t offset,boolean_t copy,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)4912 vm_map_enter_mem_object_control(
4913 	vm_map_t                target_map,
4914 	vm_map_offset_t         *address,
4915 	vm_map_size_t           initial_size,
4916 	vm_map_offset_t         mask,
4917 	int                     flags,
4918 	vm_map_kernel_flags_t   vmk_flags,
4919 	vm_tag_t                tag,
4920 	memory_object_control_t control,
4921 	vm_object_offset_t      offset,
4922 	boolean_t               copy,
4923 	vm_prot_t               cur_protection,
4924 	vm_prot_t               max_protection,
4925 	vm_inherit_t            inheritance)
4926 {
4927 	vm_map_address_t        map_addr;
4928 	vm_map_size_t           map_size;
4929 	vm_object_t             object;
4930 	vm_object_size_t        size;
4931 	kern_return_t           result;
4932 	memory_object_t         pager;
4933 	vm_prot_t               pager_prot;
4934 	kern_return_t           kr;
4935 #if __arm64__
4936 	boolean_t               fourk = vmk_flags.vmkf_fourk;
4937 #endif /* __arm64__ */
4938 
4939 	/*
4940 	 * Check arguments for validity
4941 	 */
4942 	if ((target_map == VM_MAP_NULL) ||
4943 	    (cur_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4944 	    (max_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) ||
4945 	    (inheritance > VM_INHERIT_LAST_VALID) ||
4946 	    initial_size == 0) {
4947 		return KERN_INVALID_ARGUMENT;
4948 	}
4949 
4950 #if __arm64__
4951 	if (fourk && VM_MAP_PAGE_MASK(target_map) < PAGE_MASK) {
4952 		fourk = FALSE;
4953 	}
4954 
4955 	if (fourk) {
4956 		map_addr = vm_map_trunc_page(*address,
4957 		    FOURK_PAGE_MASK);
4958 		map_size = vm_map_round_page(initial_size,
4959 		    FOURK_PAGE_MASK);
4960 	} else
4961 #endif /* __arm64__ */
4962 	{
4963 		map_addr = vm_map_trunc_page(*address,
4964 		    VM_MAP_PAGE_MASK(target_map));
4965 		map_size = vm_map_round_page(initial_size,
4966 		    VM_MAP_PAGE_MASK(target_map));
4967 	}
4968 	size = vm_object_round_page(initial_size);
4969 
4970 	object = memory_object_control_to_vm_object(control);
4971 
4972 	if (object == VM_OBJECT_NULL) {
4973 		return KERN_INVALID_OBJECT;
4974 	}
4975 
4976 	if (object == kernel_object) {
4977 		printf("Warning: Attempt to map kernel object"
4978 		    " by a non-private kernel entity\n");
4979 		return KERN_INVALID_OBJECT;
4980 	}
4981 
4982 	vm_object_lock(object);
4983 	object->ref_count++;
4984 
4985 	/*
4986 	 * For "named" VM objects, let the pager know that the
4987 	 * memory object is being mapped.  Some pagers need to keep
4988 	 * track of this, to know when they can reclaim the memory
4989 	 * object, for example.
4990 	 * VM calls memory_object_map() for each mapping (specifying
4991 	 * the protection of each mapping) and calls
4992 	 * memory_object_last_unmap() when all the mappings are gone.
4993 	 */
4994 	pager_prot = max_protection;
4995 	if (copy) {
4996 		pager_prot &= ~VM_PROT_WRITE;
4997 	}
4998 	pager = object->pager;
4999 	if (object->named &&
5000 	    pager != MEMORY_OBJECT_NULL &&
5001 	    object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
5002 		assert(object->pager_ready);
5003 		vm_object_mapping_wait(object, THREAD_UNINT);
5004 		vm_object_mapping_begin(object);
5005 		vm_object_unlock(object);
5006 
5007 		kr = memory_object_map(pager, pager_prot);
5008 		assert(kr == KERN_SUCCESS);
5009 
5010 		vm_object_lock(object);
5011 		vm_object_mapping_end(object);
5012 	}
5013 	vm_object_unlock(object);
5014 
5015 	/*
5016 	 *	Perform the copy if requested
5017 	 */
5018 
5019 	if (copy) {
5020 		vm_object_t             new_object;
5021 		vm_object_offset_t      new_offset;
5022 
5023 		result = vm_object_copy_strategically(object, offset, size,
5024 		    &new_object, &new_offset,
5025 		    &copy);
5026 
5027 
5028 		if (result == KERN_MEMORY_RESTART_COPY) {
5029 			boolean_t success;
5030 			boolean_t src_needs_copy;
5031 
5032 			/*
5033 			 * XXX
5034 			 * We currently ignore src_needs_copy.
5035 			 * This really is the issue of how to make
5036 			 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
5037 			 * non-kernel users to use. Solution forthcoming.
5038 			 * In the meantime, since we don't allow non-kernel
5039 			 * memory managers to specify symmetric copy,
5040 			 * we won't run into problems here.
5041 			 */
5042 			new_object = object;
5043 			new_offset = offset;
5044 			success = vm_object_copy_quickly(&new_object,
5045 			    new_offset, size,
5046 			    &src_needs_copy,
5047 			    &copy);
5048 			assert(success);
5049 			result = KERN_SUCCESS;
5050 		}
5051 		/*
5052 		 *	Throw away the reference to the
5053 		 *	original object, as it won't be mapped.
5054 		 */
5055 
5056 		vm_object_deallocate(object);
5057 
5058 		if (result != KERN_SUCCESS) {
5059 			return result;
5060 		}
5061 
5062 		object = new_object;
5063 		offset = new_offset;
5064 	}
5065 
5066 #if __arm64__
5067 	if (fourk) {
5068 		result = vm_map_enter_fourk(target_map,
5069 		    &map_addr,
5070 		    map_size,
5071 		    (vm_map_offset_t)mask,
5072 		    flags,
5073 		    vmk_flags,
5074 		    tag,
5075 		    object, offset,
5076 		    copy,
5077 		    cur_protection, max_protection,
5078 		    inheritance);
5079 	} else
5080 #endif /* __arm64__ */
5081 	{
5082 		result = vm_map_enter(target_map,
5083 		    &map_addr, map_size,
5084 		    (vm_map_offset_t)mask,
5085 		    flags,
5086 		    vmk_flags,
5087 		    tag,
5088 		    object, offset,
5089 		    copy,
5090 		    cur_protection, max_protection,
5091 		    inheritance);
5092 	}
5093 	if (result != KERN_SUCCESS) {
5094 		vm_object_deallocate(object);
5095 	}
5096 	*address = map_addr;
5097 
5098 	return result;
5099 }
5100 
5101 
5102 #if     VM_CPM
5103 
5104 #ifdef MACH_ASSERT
5105 extern pmap_paddr_t     avail_start, avail_end;
5106 #endif
5107 
5108 /*
5109  *	Allocate memory in the specified map, with the caveat that
5110  *	the memory is physically contiguous.  This call may fail
5111  *	if the system can't find sufficient contiguous memory.
5112  *	This call may cause or lead to heart-stopping amounts of
5113  *	paging activity.
5114  *
5115  *	Memory obtained from this call should be freed in the
5116  *	normal way, viz., via vm_deallocate.
5117  */
5118 kern_return_t
vm_map_enter_cpm(vm_map_t map,vm_map_offset_t * addr,vm_map_size_t size,int flags)5119 vm_map_enter_cpm(
5120 	vm_map_t                map,
5121 	vm_map_offset_t *addr,
5122 	vm_map_size_t           size,
5123 	int                     flags)
5124 {
5125 	vm_object_t             cpm_obj;
5126 	pmap_t                  pmap;
5127 	vm_page_t               m, pages;
5128 	kern_return_t           kr;
5129 	vm_map_offset_t         va, start, end, offset;
5130 #if     MACH_ASSERT
5131 	vm_map_offset_t         prev_addr = 0;
5132 #endif  /* MACH_ASSERT */
5133 
5134 	boolean_t               anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
5135 	vm_tag_t tag;
5136 
5137 	if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
5138 		/* XXX TODO4K do we need to support this? */
5139 		*addr = 0;
5140 		return KERN_NOT_SUPPORTED;
5141 	}
5142 
5143 	VM_GET_FLAGS_ALIAS(flags, tag);
5144 
5145 	if (size == 0) {
5146 		*addr = 0;
5147 		return KERN_SUCCESS;
5148 	}
5149 	if (anywhere) {
5150 		*addr = vm_map_min(map);
5151 	} else {
5152 		*addr = vm_map_trunc_page(*addr,
5153 		    VM_MAP_PAGE_MASK(map));
5154 	}
5155 	size = vm_map_round_page(size,
5156 	    VM_MAP_PAGE_MASK(map));
5157 
5158 	/*
5159 	 * LP64todo - cpm_allocate should probably allow
5160 	 * allocations of >4GB, but not with the current
5161 	 * algorithm, so just cast down the size for now.
5162 	 */
5163 	if (size > VM_MAX_ADDRESS) {
5164 		return KERN_RESOURCE_SHORTAGE;
5165 	}
5166 	if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
5167 	    &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS) {
5168 		return kr;
5169 	}
5170 
5171 	cpm_obj = vm_object_allocate((vm_object_size_t)size);
5172 	assert(cpm_obj != VM_OBJECT_NULL);
5173 	assert(cpm_obj->internal);
5174 	assert(cpm_obj->vo_size == (vm_object_size_t)size);
5175 	assert(cpm_obj->can_persist == FALSE);
5176 	assert(cpm_obj->pager_created == FALSE);
5177 	assert(cpm_obj->pageout == FALSE);
5178 	assert(cpm_obj->shadow == VM_OBJECT_NULL);
5179 
5180 	/*
5181 	 *	Insert pages into object.
5182 	 */
5183 
5184 	vm_object_lock(cpm_obj);
5185 	for (offset = 0; offset < size; offset += PAGE_SIZE) {
5186 		m = pages;
5187 		pages = NEXT_PAGE(m);
5188 		*(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
5189 
5190 		assert(!m->vmp_gobbled);
5191 		assert(!m->vmp_wanted);
5192 		assert(!m->vmp_pageout);
5193 		assert(!m->vmp_tabled);
5194 		assert(VM_PAGE_WIRED(m));
5195 		assert(m->vmp_busy);
5196 		assert(VM_PAGE_GET_PHYS_PAGE(m) >= (avail_start >> PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m) <= (avail_end >> PAGE_SHIFT));
5197 
5198 		m->vmp_busy = FALSE;
5199 		vm_page_insert(m, cpm_obj, offset);
5200 	}
5201 	assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
5202 	vm_object_unlock(cpm_obj);
5203 
5204 	/*
5205 	 *	Hang onto a reference on the object in case a
5206 	 *	multi-threaded application for some reason decides
5207 	 *	to deallocate the portion of the address space into
5208 	 *	which we will insert this object.
5209 	 *
5210 	 *	Unfortunately, we must insert the object now before
5211 	 *	we can talk to the pmap module about which addresses
5212 	 *	must be wired down.  Hence, the race with a multi-
5213 	 *	threaded app.
5214 	 */
5215 	vm_object_reference(cpm_obj);
5216 
5217 	/*
5218 	 *	Insert object into map.
5219 	 */
5220 
5221 	kr = vm_map_enter(
5222 		map,
5223 		addr,
5224 		size,
5225 		(vm_map_offset_t)0,
5226 		flags,
5227 		VM_MAP_KERNEL_FLAGS_NONE,
5228 		cpm_obj,
5229 		(vm_object_offset_t)0,
5230 		FALSE,
5231 		VM_PROT_ALL,
5232 		VM_PROT_ALL,
5233 		VM_INHERIT_DEFAULT);
5234 
5235 	if (kr != KERN_SUCCESS) {
5236 		/*
5237 		 *	A CPM object doesn't have can_persist set,
5238 		 *	so all we have to do is deallocate it to
5239 		 *	free up these pages.
5240 		 */
5241 		assert(cpm_obj->pager_created == FALSE);
5242 		assert(cpm_obj->can_persist == FALSE);
5243 		assert(cpm_obj->pageout == FALSE);
5244 		assert(cpm_obj->shadow == VM_OBJECT_NULL);
5245 		vm_object_deallocate(cpm_obj); /* kill acquired ref */
5246 		vm_object_deallocate(cpm_obj); /* kill creation ref */
5247 	}
5248 
5249 	/*
5250 	 *	Inform the physical mapping system that the
5251 	 *	range of addresses may not fault, so that
5252 	 *	page tables and such can be locked down as well.
5253 	 */
5254 	start = *addr;
5255 	end = start + size;
5256 	pmap = vm_map_pmap(map);
5257 	pmap_pageable(pmap, start, end, FALSE);
5258 
5259 	/*
5260 	 *	Enter each page into the pmap, to avoid faults.
5261 	 *	Note that this loop could be coded more efficiently,
5262 	 *	if the need arose, rather than looking up each page
5263 	 *	again.
5264 	 */
5265 	for (offset = 0, va = start; offset < size;
5266 	    va += PAGE_SIZE, offset += PAGE_SIZE) {
5267 		int type_of_fault;
5268 
5269 		vm_object_lock(cpm_obj);
5270 		m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5271 		assert(m != VM_PAGE_NULL);
5272 
5273 		vm_page_zero_fill(m);
5274 
5275 		type_of_fault = DBG_ZERO_FILL_FAULT;
5276 
5277 		vm_fault_enter(m, pmap, va,
5278 		    PAGE_SIZE, 0,
5279 		    VM_PROT_ALL, VM_PROT_WRITE,
5280 		    VM_PAGE_WIRED(m),
5281 		    FALSE,                             /* change_wiring */
5282 		    VM_KERN_MEMORY_NONE,                             /* tag - not wiring */
5283 		    FALSE,                             /* no_cache */
5284 		    FALSE,                             /* cs_bypass */
5285 		    0,                                 /* user_tag */
5286 		    0,                             /* pmap_options */
5287 		    NULL,                              /* need_retry */
5288 		    &type_of_fault);
5289 
5290 		vm_object_unlock(cpm_obj);
5291 	}
5292 
5293 #if     MACH_ASSERT
5294 	/*
5295 	 *	Verify ordering in address space.
5296 	 */
5297 	for (offset = 0; offset < size; offset += PAGE_SIZE) {
5298 		vm_object_lock(cpm_obj);
5299 		m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5300 		vm_object_unlock(cpm_obj);
5301 		if (m == VM_PAGE_NULL) {
5302 			panic("vm_allocate_cpm:  obj %p off 0x%llx no page",
5303 			    cpm_obj, (uint64_t)offset);
5304 		}
5305 		assert(m->vmp_tabled);
5306 		assert(!m->vmp_busy);
5307 		assert(!m->vmp_wanted);
5308 		assert(!m->vmp_fictitious);
5309 		assert(!m->vmp_private);
5310 		assert(!m->vmp_absent);
5311 		assert(!m->vmp_error);
5312 		assert(!m->vmp_cleaning);
5313 		assert(!m->vmp_laundry);
5314 		assert(!m->vmp_precious);
5315 		assert(!m->vmp_clustered);
5316 		if (offset != 0) {
5317 			if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
5318 				printf("start 0x%llx end 0x%llx va 0x%llx\n",
5319 				    (uint64_t)start, (uint64_t)end, (uint64_t)va);
5320 				printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
5321 				printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
5322 				panic("vm_allocate_cpm:  pages not contig!");
5323 			}
5324 		}
5325 		prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
5326 	}
5327 #endif  /* MACH_ASSERT */
5328 
5329 	vm_object_deallocate(cpm_obj); /* kill extra ref */
5330 
5331 	return kr;
5332 }
5333 
5334 
5335 #else   /* VM_CPM */
5336 
5337 /*
5338  *	Interface is defined in all cases, but unless the kernel
5339  *	is built explicitly for this option, the interface does
5340  *	nothing.
5341  */
5342 
5343 kern_return_t
vm_map_enter_cpm(__unused vm_map_t map,__unused vm_map_offset_t * addr,__unused vm_map_size_t size,__unused int flags)5344 vm_map_enter_cpm(
5345 	__unused vm_map_t       map,
5346 	__unused vm_map_offset_t        *addr,
5347 	__unused vm_map_size_t  size,
5348 	__unused int            flags)
5349 {
5350 	return KERN_FAILURE;
5351 }
5352 #endif /* VM_CPM */
5353 
5354 /* Not used without nested pmaps */
5355 #ifndef NO_NESTED_PMAP
5356 /*
5357  * Clip and unnest a portion of a nested submap mapping.
5358  */
5359 
5360 
5361 static void
vm_map_clip_unnest(vm_map_t map,vm_map_entry_t entry,vm_map_offset_t start_unnest,vm_map_offset_t end_unnest)5362 vm_map_clip_unnest(
5363 	vm_map_t        map,
5364 	vm_map_entry_t  entry,
5365 	vm_map_offset_t start_unnest,
5366 	vm_map_offset_t end_unnest)
5367 {
5368 	vm_map_offset_t old_start_unnest = start_unnest;
5369 	vm_map_offset_t old_end_unnest = end_unnest;
5370 
5371 	assert(entry->is_sub_map);
5372 	assert(VME_SUBMAP(entry) != NULL);
5373 	assert(entry->use_pmap);
5374 
5375 	/*
5376 	 * Query the platform for the optimal unnest range.
5377 	 * DRK: There's some duplication of effort here, since
5378 	 * callers may have adjusted the range to some extent. This
5379 	 * routine was introduced to support 1GiB subtree nesting
5380 	 * for x86 platforms, which can also nest on 2MiB boundaries
5381 	 * depending on size/alignment.
5382 	 */
5383 	if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
5384 		assert(VME_SUBMAP(entry)->is_nested_map);
5385 		assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
5386 		log_unnest_badness(map,
5387 		    old_start_unnest,
5388 		    old_end_unnest,
5389 		    VME_SUBMAP(entry)->is_nested_map,
5390 		    (entry->vme_start +
5391 		    VME_SUBMAP(entry)->lowest_unnestable_start -
5392 		    VME_OFFSET(entry)));
5393 	}
5394 
5395 	if (entry->vme_start > start_unnest ||
5396 	    entry->vme_end < end_unnest) {
5397 		panic("vm_map_clip_unnest(0x%llx,0x%llx): "
5398 		    "bad nested entry: start=0x%llx end=0x%llx\n",
5399 		    (long long)start_unnest, (long long)end_unnest,
5400 		    (long long)entry->vme_start, (long long)entry->vme_end);
5401 	}
5402 
5403 	if (start_unnest > entry->vme_start) {
5404 		_vm_map_clip_start(&map->hdr,
5405 		    entry,
5406 		    start_unnest);
5407 		if (map->holelistenabled) {
5408 			vm_map_store_update_first_free(map, NULL, FALSE);
5409 		} else {
5410 			vm_map_store_update_first_free(map, map->first_free, FALSE);
5411 		}
5412 	}
5413 	if (entry->vme_end > end_unnest) {
5414 		_vm_map_clip_end(&map->hdr,
5415 		    entry,
5416 		    end_unnest);
5417 		if (map->holelistenabled) {
5418 			vm_map_store_update_first_free(map, NULL, FALSE);
5419 		} else {
5420 			vm_map_store_update_first_free(map, map->first_free, FALSE);
5421 		}
5422 	}
5423 
5424 	pmap_unnest(map->pmap,
5425 	    entry->vme_start,
5426 	    entry->vme_end - entry->vme_start);
5427 	if ((map->mapped_in_other_pmaps) && os_ref_get_count(&map->map_refcnt) != 0) {
5428 		/* clean up parent map/maps */
5429 		vm_map_submap_pmap_clean(
5430 			map, entry->vme_start,
5431 			entry->vme_end,
5432 			VME_SUBMAP(entry),
5433 			VME_OFFSET(entry));
5434 	}
5435 	entry->use_pmap = FALSE;
5436 	if ((map->pmap != kernel_pmap) &&
5437 	    (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
5438 		VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
5439 	}
5440 }
5441 #endif  /* NO_NESTED_PMAP */
5442 
5443 /*
5444  *	vm_map_clip_start:	[ internal use only ]
5445  *
5446  *	Asserts that the given entry begins at or after
5447  *	the specified address; if necessary,
5448  *	it splits the entry into two.
5449  */
5450 void
vm_map_clip_start(vm_map_t map,vm_map_entry_t entry,vm_map_offset_t startaddr)5451 vm_map_clip_start(
5452 	vm_map_t        map,
5453 	vm_map_entry_t  entry,
5454 	vm_map_offset_t startaddr)
5455 {
5456 #ifndef NO_NESTED_PMAP
5457 	if (entry->is_sub_map &&
5458 	    entry->use_pmap &&
5459 	    startaddr >= entry->vme_start) {
5460 		vm_map_offset_t start_unnest, end_unnest;
5461 
5462 		/*
5463 		 * Make sure "startaddr" is no longer in a nested range
5464 		 * before we clip.  Unnest only the minimum range the platform
5465 		 * can handle.
5466 		 * vm_map_clip_unnest may perform additional adjustments to
5467 		 * the unnest range.
5468 		 */
5469 		start_unnest = startaddr & ~(pmap_shared_region_size_min(map->pmap) - 1);
5470 		end_unnest = start_unnest + pmap_shared_region_size_min(map->pmap);
5471 		vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5472 	}
5473 #endif /* NO_NESTED_PMAP */
5474 	if (startaddr > entry->vme_start) {
5475 		if (VME_OBJECT(entry) &&
5476 		    !entry->is_sub_map &&
5477 		    VME_OBJECT(entry)->phys_contiguous) {
5478 			pmap_remove(map->pmap,
5479 			    (addr64_t)(entry->vme_start),
5480 			    (addr64_t)(entry->vme_end));
5481 		}
5482 		if (entry->vme_atomic) {
5483 			panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)", map, entry);
5484 		}
5485 
5486 		DTRACE_VM5(
5487 			vm_map_clip_start,
5488 			vm_map_t, map,
5489 			vm_map_offset_t, entry->vme_start,
5490 			vm_map_offset_t, entry->vme_end,
5491 			vm_map_offset_t, startaddr,
5492 			int, VME_ALIAS(entry));
5493 
5494 		_vm_map_clip_start(&map->hdr, entry, startaddr);
5495 		if (map->holelistenabled) {
5496 			vm_map_store_update_first_free(map, NULL, FALSE);
5497 		} else {
5498 			vm_map_store_update_first_free(map, map->first_free, FALSE);
5499 		}
5500 	}
5501 }
5502 
5503 
5504 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5505 	MACRO_BEGIN \
5506 	if ((startaddr) > (entry)->vme_start) \
5507 	        _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5508 	MACRO_END
5509 
5510 /*
5511  *	This routine is called only when it is known that
5512  *	the entry must be split.
5513  */
5514 static void
_vm_map_clip_start(struct vm_map_header * map_header,vm_map_entry_t entry,vm_map_offset_t start)5515 _vm_map_clip_start(
5516 	struct vm_map_header    *map_header,
5517 	vm_map_entry_t          entry,
5518 	vm_map_offset_t         start)
5519 {
5520 	vm_map_entry_t  new_entry;
5521 
5522 	/*
5523 	 *	Split off the front portion --
5524 	 *	note that we must insert the new
5525 	 *	entry BEFORE this one, so that
5526 	 *	this entry has the specified starting
5527 	 *	address.
5528 	 */
5529 
5530 	if (entry->map_aligned) {
5531 		assert(VM_MAP_PAGE_ALIGNED(start,
5532 		    VM_MAP_HDR_PAGE_MASK(map_header)));
5533 	}
5534 
5535 	new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5536 	vm_map_entry_copy_full(new_entry, entry);
5537 
5538 	new_entry->vme_end = start;
5539 	assert(new_entry->vme_start < new_entry->vme_end);
5540 	VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
5541 	assert(start < entry->vme_end);
5542 	entry->vme_start = start;
5543 
5544 	_vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
5545 
5546 	if (entry->is_sub_map) {
5547 		vm_map_reference(VME_SUBMAP(new_entry));
5548 	} else {
5549 		vm_object_reference(VME_OBJECT(new_entry));
5550 	}
5551 }
5552 
5553 
5554 /*
5555  *	vm_map_clip_end:	[ internal use only ]
5556  *
5557  *	Asserts that the given entry ends at or before
5558  *	the specified address; if necessary,
5559  *	it splits the entry into two.
5560  */
5561 void
vm_map_clip_end(vm_map_t map,vm_map_entry_t entry,vm_map_offset_t endaddr)5562 vm_map_clip_end(
5563 	vm_map_t        map,
5564 	vm_map_entry_t  entry,
5565 	vm_map_offset_t endaddr)
5566 {
5567 	if (endaddr > entry->vme_end) {
5568 		/*
5569 		 * Within the scope of this clipping, limit "endaddr" to
5570 		 * the end of this map entry...
5571 		 */
5572 		endaddr = entry->vme_end;
5573 	}
5574 #ifndef NO_NESTED_PMAP
5575 	if (entry->is_sub_map && entry->use_pmap) {
5576 		vm_map_offset_t start_unnest, end_unnest;
5577 
5578 		/*
5579 		 * Make sure the range between the start of this entry and
5580 		 * the new "endaddr" is no longer nested before we clip.
5581 		 * Unnest only the minimum range the platform can handle.
5582 		 * vm_map_clip_unnest may perform additional adjustments to
5583 		 * the unnest range.
5584 		 */
5585 		start_unnest = entry->vme_start;
5586 		end_unnest =
5587 		    (endaddr + pmap_shared_region_size_min(map->pmap) - 1) &
5588 		    ~(pmap_shared_region_size_min(map->pmap) - 1);
5589 		vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5590 	}
5591 #endif /* NO_NESTED_PMAP */
5592 	if (endaddr < entry->vme_end) {
5593 		if (VME_OBJECT(entry) &&
5594 		    !entry->is_sub_map &&
5595 		    VME_OBJECT(entry)->phys_contiguous) {
5596 			pmap_remove(map->pmap,
5597 			    (addr64_t)(entry->vme_start),
5598 			    (addr64_t)(entry->vme_end));
5599 		}
5600 		if (entry->vme_atomic) {
5601 			panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)", map, entry);
5602 		}
5603 		DTRACE_VM5(
5604 			vm_map_clip_end,
5605 			vm_map_t, map,
5606 			vm_map_offset_t, entry->vme_start,
5607 			vm_map_offset_t, entry->vme_end,
5608 			vm_map_offset_t, endaddr,
5609 			int, VME_ALIAS(entry));
5610 
5611 		_vm_map_clip_end(&map->hdr, entry, endaddr);
5612 		if (map->holelistenabled) {
5613 			vm_map_store_update_first_free(map, NULL, FALSE);
5614 		} else {
5615 			vm_map_store_update_first_free(map, map->first_free, FALSE);
5616 		}
5617 	}
5618 }
5619 
5620 
5621 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5622 	MACRO_BEGIN \
5623 	if ((endaddr) < (entry)->vme_end) \
5624 	        _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5625 	MACRO_END
5626 
5627 /*
5628  *	This routine is called only when it is known that
5629  *	the entry must be split.
5630  */
5631 static void
_vm_map_clip_end(struct vm_map_header * map_header,vm_map_entry_t entry,vm_map_offset_t end)5632 _vm_map_clip_end(
5633 	struct vm_map_header    *map_header,
5634 	vm_map_entry_t          entry,
5635 	vm_map_offset_t         end)
5636 {
5637 	vm_map_entry_t  new_entry;
5638 
5639 	/*
5640 	 *	Create a new entry and insert it
5641 	 *	AFTER the specified entry
5642 	 */
5643 
5644 	if (entry->map_aligned) {
5645 		assert(VM_MAP_PAGE_ALIGNED(end,
5646 		    VM_MAP_HDR_PAGE_MASK(map_header)));
5647 	}
5648 
5649 	new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5650 	vm_map_entry_copy_full(new_entry, entry);
5651 
5652 	assert(entry->vme_start < end);
5653 	new_entry->vme_start = entry->vme_end = end;
5654 	VME_OFFSET_SET(new_entry,
5655 	    VME_OFFSET(new_entry) + (end - entry->vme_start));
5656 	assert(new_entry->vme_start < new_entry->vme_end);
5657 
5658 	_vm_map_store_entry_link(map_header, entry, new_entry);
5659 
5660 	if (entry->is_sub_map) {
5661 		vm_map_reference(VME_SUBMAP(new_entry));
5662 	} else {
5663 		vm_object_reference(VME_OBJECT(new_entry));
5664 	}
5665 }
5666 
5667 
5668 /*
5669  *	VM_MAP_RANGE_CHECK:	[ internal use only ]
5670  *
5671  *	Asserts that the starting and ending region
5672  *	addresses fall within the valid range of the map.
5673  */
5674 #define VM_MAP_RANGE_CHECK(map, start, end)     \
5675 	MACRO_BEGIN                             \
5676 	if (start < vm_map_min(map))            \
5677 	        start = vm_map_min(map);        \
5678 	if (end > vm_map_max(map))              \
5679 	        end = vm_map_max(map);          \
5680 	if (start > end)                        \
5681 	        start = end;                    \
5682 	MACRO_END
5683 
5684 /*
5685  *	vm_map_range_check:	[ internal use only ]
5686  *
5687  *	Check that the region defined by the specified start and
5688  *	end addresses are wholly contained within a single map
5689  *	entry or set of adjacent map entries of the spacified map,
5690  *	i.e. the specified region contains no unmapped space.
5691  *	If any or all of the region is unmapped, FALSE is returned.
5692  *	Otherwise, TRUE is returned and if the output argument 'entry'
5693  *	is not NULL it points to the map entry containing the start
5694  *	of the region.
5695  *
5696  *	The map is locked for reading on entry and is left locked.
5697  */
5698 static boolean_t
vm_map_range_check(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_map_entry_t * entry)5699 vm_map_range_check(
5700 	vm_map_t                map,
5701 	vm_map_offset_t         start,
5702 	vm_map_offset_t         end,
5703 	vm_map_entry_t          *entry)
5704 {
5705 	vm_map_entry_t          cur;
5706 	vm_map_offset_t         prev;
5707 
5708 	/*
5709 	 *      Basic sanity checks first
5710 	 */
5711 	if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
5712 		return FALSE;
5713 	}
5714 
5715 	/*
5716 	 *      Check first if the region starts within a valid
5717 	 *	mapping for the map.
5718 	 */
5719 	if (!vm_map_lookup_entry(map, start, &cur)) {
5720 		return FALSE;
5721 	}
5722 
5723 	/*
5724 	 *	Optimize for the case that the region is contained
5725 	 *	in a single map entry.
5726 	 */
5727 	if (entry != (vm_map_entry_t *) NULL) {
5728 		*entry = cur;
5729 	}
5730 	if (end <= cur->vme_end) {
5731 		return TRUE;
5732 	}
5733 
5734 	/*
5735 	 *      If the region is not wholly contained within a
5736 	 *      single entry, walk the entries looking for holes.
5737 	 */
5738 	prev = cur->vme_end;
5739 	cur = cur->vme_next;
5740 	while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
5741 		if (end <= cur->vme_end) {
5742 			return TRUE;
5743 		}
5744 		prev = cur->vme_end;
5745 		cur = cur->vme_next;
5746 	}
5747 	return FALSE;
5748 }
5749 
5750 /*
5751  *	vm_map_submap:		[ kernel use only ]
5752  *
5753  *	Mark the given range as handled by a subordinate map.
5754  *
5755  *	This range must have been created with vm_map_find using
5756  *	the vm_submap_object, and no other operations may have been
5757  *	performed on this range prior to calling vm_map_submap.
5758  *
5759  *	Only a limited number of operations can be performed
5760  *	within this rage after calling vm_map_submap:
5761  *		vm_fault
5762  *	[Don't try vm_map_copyin!]
5763  *
5764  *	To remove a submapping, one must first remove the
5765  *	range from the superior map, and then destroy the
5766  *	submap (if desired).  [Better yet, don't try it.]
5767  */
5768 kern_return_t
vm_map_submap(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_map_t submap,vm_map_offset_t offset,__unused boolean_t use_pmap)5769 vm_map_submap(
5770 	vm_map_t        map,
5771 	vm_map_offset_t start,
5772 	vm_map_offset_t end,
5773 	vm_map_t        submap,
5774 	vm_map_offset_t offset,
5775 #ifdef NO_NESTED_PMAP
5776 	__unused
5777 #endif  /* NO_NESTED_PMAP */
5778 	boolean_t       use_pmap)
5779 {
5780 	vm_map_entry_t          entry;
5781 	kern_return_t           result = KERN_INVALID_ARGUMENT;
5782 	vm_object_t             object;
5783 
5784 	vm_map_lock(map);
5785 
5786 	if (!vm_map_lookup_entry(map, start, &entry)) {
5787 		entry = entry->vme_next;
5788 	}
5789 
5790 	if (entry == vm_map_to_entry(map) ||
5791 	    entry->is_sub_map) {
5792 		vm_map_unlock(map);
5793 		return KERN_INVALID_ARGUMENT;
5794 	}
5795 
5796 	vm_map_clip_start(map, entry, start);
5797 	vm_map_clip_end(map, entry, end);
5798 
5799 	if ((entry->vme_start == start) && (entry->vme_end == end) &&
5800 	    (!entry->is_sub_map) &&
5801 	    ((object = VME_OBJECT(entry)) == vm_submap_object) &&
5802 	    (object->resident_page_count == 0) &&
5803 	    (object->copy == VM_OBJECT_NULL) &&
5804 	    (object->shadow == VM_OBJECT_NULL) &&
5805 	    (!object->pager_created)) {
5806 		VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
5807 		VME_OBJECT_SET(entry, VM_OBJECT_NULL);
5808 		vm_object_deallocate(object);
5809 		entry->is_sub_map = TRUE;
5810 		entry->use_pmap = FALSE;
5811 		VME_SUBMAP_SET(entry, submap);
5812 		vm_map_reference(submap);
5813 		if (submap->mapped_in_other_pmaps == FALSE &&
5814 		    vm_map_pmap(submap) != PMAP_NULL &&
5815 		    vm_map_pmap(submap) != vm_map_pmap(map)) {
5816 			/*
5817 			 * This submap is being mapped in a map
5818 			 * that uses a different pmap.
5819 			 * Set its "mapped_in_other_pmaps" flag
5820 			 * to indicate that we now need to
5821 			 * remove mappings from all pmaps rather
5822 			 * than just the submap's pmap.
5823 			 */
5824 			submap->mapped_in_other_pmaps = TRUE;
5825 		}
5826 
5827 #ifndef NO_NESTED_PMAP
5828 		if (use_pmap) {
5829 			/* nest if platform code will allow */
5830 			if (submap->pmap == NULL) {
5831 				ledger_t ledger = map->pmap->ledger;
5832 				submap->pmap = pmap_create_options(ledger,
5833 				    (vm_map_size_t) 0, 0);
5834 				if (submap->pmap == PMAP_NULL) {
5835 					vm_map_unlock(map);
5836 					return KERN_NO_SPACE;
5837 				}
5838 #if     defined(__arm__) || defined(__arm64__)
5839 				pmap_set_nested(submap->pmap);
5840 #endif
5841 			}
5842 			result = pmap_nest(map->pmap,
5843 			    (VME_SUBMAP(entry))->pmap,
5844 			    (addr64_t)start,
5845 			    (uint64_t)(end - start));
5846 			if (result) {
5847 				panic("vm_map_submap: pmap_nest failed, rc = %08X", result);
5848 			}
5849 			entry->use_pmap = TRUE;
5850 		}
5851 #else   /* NO_NESTED_PMAP */
5852 		pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
5853 #endif  /* NO_NESTED_PMAP */
5854 		result = KERN_SUCCESS;
5855 	}
5856 	vm_map_unlock(map);
5857 
5858 	return result;
5859 }
5860 
5861 /*
5862  *	vm_map_protect:
5863  *
5864  *	Sets the protection of the specified address
5865  *	region in the target map.  If "set_max" is
5866  *	specified, the maximum protection is to be set;
5867  *	otherwise, only the current protection is affected.
5868  */
5869 kern_return_t
vm_map_protect(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t new_prot,boolean_t set_max)5870 vm_map_protect(
5871 	vm_map_t        map,
5872 	vm_map_offset_t start,
5873 	vm_map_offset_t end,
5874 	vm_prot_t       new_prot,
5875 	boolean_t       set_max)
5876 {
5877 	vm_map_entry_t                  current;
5878 	vm_map_offset_t                 prev;
5879 	vm_map_entry_t                  entry;
5880 	vm_prot_t                       new_max;
5881 	int                             pmap_options = 0;
5882 	kern_return_t                   kr;
5883 
5884 	if (new_prot & VM_PROT_COPY) {
5885 		vm_map_offset_t         new_start;
5886 		vm_prot_t               cur_prot, max_prot;
5887 		vm_map_kernel_flags_t   kflags;
5888 
5889 		/* LP64todo - see below */
5890 		if (start >= map->max_offset) {
5891 			return KERN_INVALID_ADDRESS;
5892 		}
5893 
5894 		if ((new_prot & VM_PROT_ALLEXEC) &&
5895 		    map->pmap != kernel_pmap &&
5896 		    (vm_map_cs_enforcement(map)
5897 #if XNU_TARGET_OS_OSX && __arm64__
5898 		    || !VM_MAP_IS_EXOTIC(map)
5899 #endif /* XNU_TARGET_OS_OSX && __arm64__ */
5900 		    ) &&
5901 		    VM_MAP_POLICY_WX_FAIL(map)) {
5902 			DTRACE_VM3(cs_wx,
5903 			    uint64_t, (uint64_t) start,
5904 			    uint64_t, (uint64_t) end,
5905 			    vm_prot_t, new_prot);
5906 			printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5907 			    proc_selfpid(),
5908 			    (current_task()->bsd_info
5909 			    ? proc_name_address(current_task()->bsd_info)
5910 			    : "?"),
5911 			    __FUNCTION__);
5912 			return KERN_PROTECTION_FAILURE;
5913 		}
5914 
5915 		/*
5916 		 * Let vm_map_remap_extract() know that it will need to:
5917 		 * + make a copy of the mapping
5918 		 * + add VM_PROT_WRITE to the max protections
5919 		 * + remove any protections that are no longer allowed from the
5920 		 *   max protections (to avoid any WRITE/EXECUTE conflict, for
5921 		 *   example).
5922 		 * Note that "max_prot" is an IN/OUT parameter only for this
5923 		 * specific (VM_PROT_COPY) case.  It's usually an OUT parameter
5924 		 * only.
5925 		 */
5926 		max_prot = new_prot & (VM_PROT_ALL | VM_PROT_ALLEXEC);
5927 		cur_prot = VM_PROT_NONE;
5928 		kflags = VM_MAP_KERNEL_FLAGS_NONE;
5929 		kflags.vmkf_remap_prot_copy = TRUE;
5930 		kflags.vmkf_overwrite_immutable = TRUE;
5931 		new_start = start;
5932 		kr = vm_map_remap(map,
5933 		    &new_start,
5934 		    end - start,
5935 		    0, /* mask */
5936 		    VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
5937 		    kflags,
5938 		    0,
5939 		    map,
5940 		    start,
5941 		    TRUE, /* copy-on-write remapping! */
5942 		    &cur_prot, /* IN/OUT */
5943 		    &max_prot, /* IN/OUT */
5944 		    VM_INHERIT_DEFAULT);
5945 		if (kr != KERN_SUCCESS) {
5946 			return kr;
5947 		}
5948 		new_prot &= ~VM_PROT_COPY;
5949 	}
5950 
5951 	vm_map_lock(map);
5952 
5953 	/* LP64todo - remove this check when vm_map_commpage64()
5954 	 * no longer has to stuff in a map_entry for the commpage
5955 	 * above the map's max_offset.
5956 	 */
5957 	if (start >= map->max_offset) {
5958 		vm_map_unlock(map);
5959 		return KERN_INVALID_ADDRESS;
5960 	}
5961 
5962 	while (1) {
5963 		/*
5964 		 *      Lookup the entry.  If it doesn't start in a valid
5965 		 *	entry, return an error.
5966 		 */
5967 		if (!vm_map_lookup_entry(map, start, &entry)) {
5968 			vm_map_unlock(map);
5969 			return KERN_INVALID_ADDRESS;
5970 		}
5971 
5972 		if (entry->superpage_size && (start & (SUPERPAGE_SIZE - 1))) { /* extend request to whole entry */
5973 			start = SUPERPAGE_ROUND_DOWN(start);
5974 			continue;
5975 		}
5976 		break;
5977 	}
5978 	if (entry->superpage_size) {
5979 		end = SUPERPAGE_ROUND_UP(end);
5980 	}
5981 
5982 	/*
5983 	 *	Make a first pass to check for protection and address
5984 	 *	violations.
5985 	 */
5986 
5987 	current = entry;
5988 	prev = current->vme_start;
5989 	while ((current != vm_map_to_entry(map)) &&
5990 	    (current->vme_start < end)) {
5991 		/*
5992 		 * If there is a hole, return an error.
5993 		 */
5994 		if (current->vme_start != prev) {
5995 			vm_map_unlock(map);
5996 			return KERN_INVALID_ADDRESS;
5997 		}
5998 
5999 		new_max = current->max_protection;
6000 
6001 #if defined(__x86_64__)
6002 		/* Allow max mask to include execute prot bits if this map doesn't enforce CS */
6003 		if (set_max && (new_prot & VM_PROT_ALLEXEC) && !vm_map_cs_enforcement(map)) {
6004 			new_max = (new_max & ~VM_PROT_ALLEXEC) | (new_prot & VM_PROT_ALLEXEC);
6005 		}
6006 #endif
6007 		if ((new_prot & new_max) != new_prot) {
6008 			vm_map_unlock(map);
6009 			return KERN_PROTECTION_FAILURE;
6010 		}
6011 
6012 		if (current->used_for_jit &&
6013 		    pmap_has_prot_policy(map->pmap, current->translated_allow_execute, current->protection)) {
6014 			vm_map_unlock(map);
6015 			return KERN_PROTECTION_FAILURE;
6016 		}
6017 
6018 		if ((new_prot & VM_PROT_WRITE) &&
6019 		    (new_prot & VM_PROT_ALLEXEC) &&
6020 #if XNU_TARGET_OS_OSX
6021 		    map->pmap != kernel_pmap &&
6022 		    (vm_map_cs_enforcement(map)
6023 #if __arm64__
6024 		    || !VM_MAP_IS_EXOTIC(map)
6025 #endif /* __arm64__ */
6026 		    ) &&
6027 #endif /* XNU_TARGET_OS_OSX */
6028 		    !(current->used_for_jit)) {
6029 			DTRACE_VM3(cs_wx,
6030 			    uint64_t, (uint64_t) current->vme_start,
6031 			    uint64_t, (uint64_t) current->vme_end,
6032 			    vm_prot_t, new_prot);
6033 			printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
6034 			    proc_selfpid(),
6035 			    (current_task()->bsd_info
6036 			    ? proc_name_address(current_task()->bsd_info)
6037 			    : "?"),
6038 			    __FUNCTION__);
6039 			new_prot &= ~VM_PROT_ALLEXEC;
6040 			if (VM_MAP_POLICY_WX_FAIL(map)) {
6041 				vm_map_unlock(map);
6042 				return KERN_PROTECTION_FAILURE;
6043 			}
6044 		}
6045 
6046 		/*
6047 		 * If the task has requested executable lockdown,
6048 		 * deny both:
6049 		 * - adding executable protections OR
6050 		 * - adding write protections to an existing executable mapping.
6051 		 */
6052 		if (map->map_disallow_new_exec == TRUE) {
6053 			if ((new_prot & VM_PROT_ALLEXEC) ||
6054 			    ((current->protection & VM_PROT_EXECUTE) && (new_prot & VM_PROT_WRITE))) {
6055 				vm_map_unlock(map);
6056 				return KERN_PROTECTION_FAILURE;
6057 			}
6058 		}
6059 
6060 		prev = current->vme_end;
6061 		current = current->vme_next;
6062 	}
6063 
6064 #if __arm64__
6065 	if (end > prev &&
6066 	    end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
6067 		vm_map_entry_t prev_entry;
6068 
6069 		prev_entry = current->vme_prev;
6070 		if (prev_entry != vm_map_to_entry(map) &&
6071 		    !prev_entry->map_aligned &&
6072 		    (vm_map_round_page(prev_entry->vme_end,
6073 		    VM_MAP_PAGE_MASK(map))
6074 		    == end)) {
6075 			/*
6076 			 * The last entry in our range is not "map-aligned"
6077 			 * but it would have reached all the way to "end"
6078 			 * if it had been map-aligned, so this is not really
6079 			 * a hole in the range and we can proceed.
6080 			 */
6081 			prev = end;
6082 		}
6083 	}
6084 #endif /* __arm64__ */
6085 
6086 	if (end > prev) {
6087 		vm_map_unlock(map);
6088 		return KERN_INVALID_ADDRESS;
6089 	}
6090 
6091 	/*
6092 	 *	Go back and fix up protections.
6093 	 *	Clip to start here if the range starts within
6094 	 *	the entry.
6095 	 */
6096 
6097 	current = entry;
6098 	if (current != vm_map_to_entry(map)) {
6099 		/* clip and unnest if necessary */
6100 		vm_map_clip_start(map, current, start);
6101 	}
6102 
6103 	while ((current != vm_map_to_entry(map)) &&
6104 	    (current->vme_start < end)) {
6105 		vm_prot_t       old_prot;
6106 
6107 		vm_map_clip_end(map, current, end);
6108 
6109 		if (current->is_sub_map) {
6110 			/* clipping did unnest if needed */
6111 			assert(!current->use_pmap);
6112 		}
6113 
6114 		old_prot = current->protection;
6115 
6116 		if (set_max) {
6117 			current->max_protection = new_prot;
6118 			/* Consider either EXECUTE or UEXEC as EXECUTE for this masking */
6119 			current->protection = (new_prot & old_prot);
6120 		} else {
6121 			current->protection = new_prot;
6122 		}
6123 
6124 		/*
6125 		 *	Update physical map if necessary.
6126 		 *	If the request is to turn off write protection,
6127 		 *	we won't do it for real (in pmap). This is because
6128 		 *	it would cause copy-on-write to fail.  We've already
6129 		 *	set, the new protection in the map, so if a
6130 		 *	write-protect fault occurred, it will be fixed up
6131 		 *	properly, COW or not.
6132 		 */
6133 		if (current->protection != old_prot) {
6134 			/* Look one level in we support nested pmaps */
6135 			/* from mapped submaps which are direct entries */
6136 			/* in our map */
6137 
6138 			vm_prot_t prot;
6139 
6140 			prot = current->protection;
6141 			if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
6142 				prot &= ~VM_PROT_WRITE;
6143 			} else {
6144 				assert(!VME_OBJECT(current)->code_signed);
6145 				assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
6146 			}
6147 
6148 			if (override_nx(map, VME_ALIAS(current)) && prot) {
6149 				prot |= VM_PROT_EXECUTE;
6150 			}
6151 
6152 #if DEVELOPMENT || DEBUG
6153 			if (!(old_prot & VM_PROT_EXECUTE) &&
6154 			    (prot & VM_PROT_EXECUTE) &&
6155 			    panic_on_unsigned_execute &&
6156 			    (proc_selfcsflags() & CS_KILL)) {
6157 				panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
6158 			}
6159 #endif /* DEVELOPMENT || DEBUG */
6160 
6161 			if (pmap_has_prot_policy(map->pmap, current->translated_allow_execute, prot)) {
6162 				if (current->wired_count) {
6163 					panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x",
6164 					    map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
6165 				}
6166 
6167 				/* If the pmap layer cares about this
6168 				 * protection type, force a fault for
6169 				 * each page so that vm_fault will
6170 				 * repopulate the page with the full
6171 				 * set of protections.
6172 				 */
6173 				/*
6174 				 * TODO: We don't seem to need this,
6175 				 * but this is due to an internal
6176 				 * implementation detail of
6177 				 * pmap_protect.  Do we want to rely
6178 				 * on this?
6179 				 */
6180 				prot = VM_PROT_NONE;
6181 			}
6182 
6183 			if (current->is_sub_map && current->use_pmap) {
6184 				pmap_protect(VME_SUBMAP(current)->pmap,
6185 				    current->vme_start,
6186 				    current->vme_end,
6187 				    prot);
6188 			} else {
6189 				if (prot & VM_PROT_WRITE) {
6190 					if (VME_OBJECT(current) == compressor_object) {
6191 						/*
6192 						 * For write requests on the
6193 						 * compressor, we wil ask the
6194 						 * pmap layer to prevent us from
6195 						 * taking a write fault when we
6196 						 * attempt to access the mapping
6197 						 * next.
6198 						 */
6199 						pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
6200 					}
6201 				}
6202 
6203 				pmap_protect_options(map->pmap,
6204 				    current->vme_start,
6205 				    current->vme_end,
6206 				    prot,
6207 				    pmap_options,
6208 				    NULL);
6209 			}
6210 		}
6211 		current = current->vme_next;
6212 	}
6213 
6214 	current = entry;
6215 	while ((current != vm_map_to_entry(map)) &&
6216 	    (current->vme_start <= end)) {
6217 		vm_map_simplify_entry(map, current);
6218 		current = current->vme_next;
6219 	}
6220 
6221 	vm_map_unlock(map);
6222 	return KERN_SUCCESS;
6223 }
6224 
6225 /*
6226  *	vm_map_inherit:
6227  *
6228  *	Sets the inheritance of the specified address
6229  *	range in the target map.  Inheritance
6230  *	affects how the map will be shared with
6231  *	child maps at the time of vm_map_fork.
6232  */
6233 kern_return_t
vm_map_inherit(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_inherit_t new_inheritance)6234 vm_map_inherit(
6235 	vm_map_t        map,
6236 	vm_map_offset_t start,
6237 	vm_map_offset_t end,
6238 	vm_inherit_t    new_inheritance)
6239 {
6240 	vm_map_entry_t  entry;
6241 	vm_map_entry_t  temp_entry;
6242 
6243 	vm_map_lock(map);
6244 
6245 	VM_MAP_RANGE_CHECK(map, start, end);
6246 
6247 	if (vm_map_lookup_entry(map, start, &temp_entry)) {
6248 		entry = temp_entry;
6249 	} else {
6250 		temp_entry = temp_entry->vme_next;
6251 		entry = temp_entry;
6252 	}
6253 
6254 	/* first check entire range for submaps which can't support the */
6255 	/* given inheritance. */
6256 	while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6257 		if (entry->is_sub_map) {
6258 			if (new_inheritance == VM_INHERIT_COPY) {
6259 				vm_map_unlock(map);
6260 				return KERN_INVALID_ARGUMENT;
6261 			}
6262 		}
6263 
6264 		entry = entry->vme_next;
6265 	}
6266 
6267 	entry = temp_entry;
6268 	if (entry != vm_map_to_entry(map)) {
6269 		/* clip and unnest if necessary */
6270 		vm_map_clip_start(map, entry, start);
6271 	}
6272 
6273 	while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6274 		vm_map_clip_end(map, entry, end);
6275 		if (entry->is_sub_map) {
6276 			/* clip did unnest if needed */
6277 			assert(!entry->use_pmap);
6278 		}
6279 
6280 		entry->inheritance = new_inheritance;
6281 
6282 		entry = entry->vme_next;
6283 	}
6284 
6285 	vm_map_unlock(map);
6286 	return KERN_SUCCESS;
6287 }
6288 
6289 /*
6290  * Update the accounting for the amount of wired memory in this map.  If the user has
6291  * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
6292  */
6293 
6294 static kern_return_t
add_wire_counts(vm_map_t map,vm_map_entry_t entry,boolean_t user_wire)6295 add_wire_counts(
6296 	vm_map_t        map,
6297 	vm_map_entry_t  entry,
6298 	boolean_t       user_wire)
6299 {
6300 	vm_map_size_t   size;
6301 
6302 	if (user_wire) {
6303 		unsigned int total_wire_count =  vm_page_wire_count + vm_lopage_free_count;
6304 
6305 		/*
6306 		 * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
6307 		 * this map entry.
6308 		 */
6309 
6310 		if (entry->user_wired_count == 0) {
6311 			size = entry->vme_end - entry->vme_start;
6312 
6313 			/*
6314 			 * Since this is the first time the user is wiring this map entry, check to see if we're
6315 			 * exceeding the user wire limits.  There is a per map limit which is the smaller of either
6316 			 * the process's rlimit or the global vm_per_task_user_wire_limit which caps this value.  There is also
6317 			 * a system-wide limit on the amount of memory all users can wire.  If the user is over either
6318 			 * limit, then we fail.
6319 			 */
6320 
6321 			if (size + map->user_wire_size > MIN(map->user_wire_limit, vm_per_task_user_wire_limit) ||
6322 			    size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6323 				if (size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6324 #if DEVELOPMENT || DEBUG
6325 					if (panic_on_mlock_failure) {
6326 						panic("mlock: Over global wire limit. %llu bytes wired and requested to wire %llu bytes more", ptoa_64(total_wire_count), (uint64_t) size);
6327 					}
6328 #endif /* DEVELOPMENT || DEBUG */
6329 					os_atomic_inc(&vm_add_wire_count_over_global_limit, relaxed);
6330 				} else {
6331 					os_atomic_inc(&vm_add_wire_count_over_user_limit, relaxed);
6332 #if DEVELOPMENT || DEBUG
6333 					if (panic_on_mlock_failure) {
6334 						panic("mlock: Over process wire limit. %llu bytes wired and requested to wire %llu bytes more", (uint64_t) map->user_wire_size, (uint64_t) size);
6335 					}
6336 #endif /* DEVELOPMENT || DEBUG */
6337 				}
6338 				return KERN_RESOURCE_SHORTAGE;
6339 			}
6340 
6341 			/*
6342 			 * The first time the user wires an entry, we also increment the wired_count and add this to
6343 			 * the total that has been wired in the map.
6344 			 */
6345 
6346 			if (entry->wired_count >= MAX_WIRE_COUNT) {
6347 				return KERN_FAILURE;
6348 			}
6349 
6350 			entry->wired_count++;
6351 			map->user_wire_size += size;
6352 		}
6353 
6354 		if (entry->user_wired_count >= MAX_WIRE_COUNT) {
6355 			return KERN_FAILURE;
6356 		}
6357 
6358 		entry->user_wired_count++;
6359 	} else {
6360 		/*
6361 		 * The kernel's wiring the memory.  Just bump the count and continue.
6362 		 */
6363 
6364 		if (entry->wired_count >= MAX_WIRE_COUNT) {
6365 			panic("vm_map_wire: too many wirings");
6366 		}
6367 
6368 		entry->wired_count++;
6369 	}
6370 
6371 	return KERN_SUCCESS;
6372 }
6373 
6374 /*
6375  * Update the memory wiring accounting now that the given map entry is being unwired.
6376  */
6377 
6378 static void
subtract_wire_counts(vm_map_t map,vm_map_entry_t entry,boolean_t user_wire)6379 subtract_wire_counts(
6380 	vm_map_t        map,
6381 	vm_map_entry_t  entry,
6382 	boolean_t       user_wire)
6383 {
6384 	if (user_wire) {
6385 		/*
6386 		 * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
6387 		 */
6388 
6389 		if (entry->user_wired_count == 1) {
6390 			/*
6391 			 * We're removing the last user wire reference.  Decrement the wired_count and the total
6392 			 * user wired memory for this map.
6393 			 */
6394 
6395 			assert(entry->wired_count >= 1);
6396 			entry->wired_count--;
6397 			map->user_wire_size -= entry->vme_end - entry->vme_start;
6398 		}
6399 
6400 		assert(entry->user_wired_count >= 1);
6401 		entry->user_wired_count--;
6402 	} else {
6403 		/*
6404 		 * The kernel is unwiring the memory.   Just update the count.
6405 		 */
6406 
6407 		assert(entry->wired_count >= 1);
6408 		entry->wired_count--;
6409 	}
6410 }
6411 
6412 int cs_executable_wire = 0;
6413 
6414 /*
6415  *	vm_map_wire:
6416  *
6417  *	Sets the pageability of the specified address range in the
6418  *	target map as wired.  Regions specified as not pageable require
6419  *	locked-down physical memory and physical page maps.  The
6420  *	access_type variable indicates types of accesses that must not
6421  *	generate page faults.  This is checked against protection of
6422  *	memory being locked-down.
6423  *
6424  *	The map must not be locked, but a reference must remain to the
6425  *	map throughout the call.
6426  */
6427 static kern_return_t
vm_map_wire_nested(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t caller_prot,vm_tag_t tag,boolean_t user_wire,pmap_t map_pmap,vm_map_offset_t pmap_addr,ppnum_t * physpage_p)6428 vm_map_wire_nested(
6429 	vm_map_t                map,
6430 	vm_map_offset_t         start,
6431 	vm_map_offset_t         end,
6432 	vm_prot_t               caller_prot,
6433 	vm_tag_t                tag,
6434 	boolean_t               user_wire,
6435 	pmap_t                  map_pmap,
6436 	vm_map_offset_t         pmap_addr,
6437 	ppnum_t                 *physpage_p)
6438 {
6439 	vm_map_entry_t          entry;
6440 	vm_prot_t               access_type;
6441 	struct vm_map_entry     *first_entry, tmp_entry;
6442 	vm_map_t                real_map;
6443 	vm_map_offset_t         s, e;
6444 	kern_return_t           rc;
6445 	boolean_t               need_wakeup;
6446 	boolean_t               main_map = FALSE;
6447 	wait_interrupt_t        interruptible_state;
6448 	thread_t                cur_thread;
6449 	unsigned int            last_timestamp;
6450 	vm_map_size_t           size;
6451 	boolean_t               wire_and_extract;
6452 	vm_prot_t               extra_prots;
6453 
6454 	extra_prots = VM_PROT_COPY;
6455 	extra_prots |= VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6456 #if XNU_TARGET_OS_OSX
6457 	if (map->pmap == kernel_pmap ||
6458 	    !vm_map_cs_enforcement(map)) {
6459 		extra_prots &= ~VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6460 	}
6461 #endif /* XNU_TARGET_OS_OSX */
6462 
6463 	access_type = (caller_prot & (VM_PROT_ALL | VM_PROT_ALLEXEC));
6464 
6465 	wire_and_extract = FALSE;
6466 	if (physpage_p != NULL) {
6467 		/*
6468 		 * The caller wants the physical page number of the
6469 		 * wired page.  We return only one physical page number
6470 		 * so this works for only one page at a time.
6471 		 */
6472 		if ((end - start) != PAGE_SIZE) {
6473 			return KERN_INVALID_ARGUMENT;
6474 		}
6475 		wire_and_extract = TRUE;
6476 		*physpage_p = 0;
6477 	}
6478 
6479 	vm_map_lock(map);
6480 	if (map_pmap == NULL) {
6481 		main_map = TRUE;
6482 	}
6483 	last_timestamp = map->timestamp;
6484 
6485 	VM_MAP_RANGE_CHECK(map, start, end);
6486 	assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6487 	assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
6488 
6489 	if (start == end) {
6490 		/* We wired what the caller asked for, zero pages */
6491 		vm_map_unlock(map);
6492 		return KERN_SUCCESS;
6493 	}
6494 
6495 	need_wakeup = FALSE;
6496 	cur_thread = current_thread();
6497 
6498 	s = start;
6499 	rc = KERN_SUCCESS;
6500 
6501 	if (vm_map_lookup_entry(map, s, &first_entry)) {
6502 		entry = first_entry;
6503 		/*
6504 		 * vm_map_clip_start will be done later.
6505 		 * We don't want to unnest any nested submaps here !
6506 		 */
6507 	} else {
6508 		/* Start address is not in map */
6509 		rc = KERN_INVALID_ADDRESS;
6510 		goto done;
6511 	}
6512 
6513 	while ((entry != vm_map_to_entry(map)) && (s < end)) {
6514 		/*
6515 		 * At this point, we have wired from "start" to "s".
6516 		 * We still need to wire from "s" to "end".
6517 		 *
6518 		 * "entry" hasn't been clipped, so it could start before "s"
6519 		 * and/or end after "end".
6520 		 */
6521 
6522 		/* "e" is how far we want to wire in this entry */
6523 		e = entry->vme_end;
6524 		if (e > end) {
6525 			e = end;
6526 		}
6527 
6528 		/*
6529 		 * If another thread is wiring/unwiring this entry then
6530 		 * block after informing other thread to wake us up.
6531 		 */
6532 		if (entry->in_transition) {
6533 			wait_result_t wait_result;
6534 
6535 			/*
6536 			 * We have not clipped the entry.  Make sure that
6537 			 * the start address is in range so that the lookup
6538 			 * below will succeed.
6539 			 * "s" is the current starting point: we've already
6540 			 * wired from "start" to "s" and we still have
6541 			 * to wire from "s" to "end".
6542 			 */
6543 
6544 			entry->needs_wakeup = TRUE;
6545 
6546 			/*
6547 			 * wake up anybody waiting on entries that we have
6548 			 * already wired.
6549 			 */
6550 			if (need_wakeup) {
6551 				vm_map_entry_wakeup(map);
6552 				need_wakeup = FALSE;
6553 			}
6554 			/*
6555 			 * User wiring is interruptible
6556 			 */
6557 			wait_result = vm_map_entry_wait(map,
6558 			    (user_wire) ? THREAD_ABORTSAFE :
6559 			    THREAD_UNINT);
6560 			if (user_wire && wait_result == THREAD_INTERRUPTED) {
6561 				/*
6562 				 * undo the wirings we have done so far
6563 				 * We do not clear the needs_wakeup flag,
6564 				 * because we cannot tell if we were the
6565 				 * only one waiting.
6566 				 */
6567 				rc = KERN_FAILURE;
6568 				goto done;
6569 			}
6570 
6571 			/*
6572 			 * Cannot avoid a lookup here. reset timestamp.
6573 			 */
6574 			last_timestamp = map->timestamp;
6575 
6576 			/*
6577 			 * The entry could have been clipped, look it up again.
6578 			 * Worse that can happen is, it may not exist anymore.
6579 			 */
6580 			if (!vm_map_lookup_entry(map, s, &first_entry)) {
6581 				/*
6582 				 * User: undo everything upto the previous
6583 				 * entry.  let vm_map_unwire worry about
6584 				 * checking the validity of the range.
6585 				 */
6586 				rc = KERN_FAILURE;
6587 				goto done;
6588 			}
6589 			entry = first_entry;
6590 			continue;
6591 		}
6592 
6593 		if (entry->is_sub_map) {
6594 			vm_map_offset_t sub_start;
6595 			vm_map_offset_t sub_end;
6596 			vm_map_offset_t local_start;
6597 			vm_map_offset_t local_end;
6598 			pmap_t          pmap;
6599 
6600 			if (wire_and_extract) {
6601 				/*
6602 				 * Wiring would result in copy-on-write
6603 				 * which would not be compatible with
6604 				 * the sharing we have with the original
6605 				 * provider of this memory.
6606 				 */
6607 				rc = KERN_INVALID_ARGUMENT;
6608 				goto done;
6609 			}
6610 
6611 			vm_map_clip_start(map, entry, s);
6612 			vm_map_clip_end(map, entry, end);
6613 
6614 			sub_start = VME_OFFSET(entry);
6615 			sub_end = entry->vme_end;
6616 			sub_end += VME_OFFSET(entry) - entry->vme_start;
6617 
6618 			local_end = entry->vme_end;
6619 			if (map_pmap == NULL) {
6620 				vm_object_t             object;
6621 				vm_object_offset_t      offset;
6622 				vm_prot_t               prot;
6623 				boolean_t               wired;
6624 				vm_map_entry_t          local_entry;
6625 				vm_map_version_t         version;
6626 				vm_map_t                lookup_map;
6627 
6628 				if (entry->use_pmap) {
6629 					pmap = VME_SUBMAP(entry)->pmap;
6630 					/* ppc implementation requires that */
6631 					/* submaps pmap address ranges line */
6632 					/* up with parent map */
6633 #ifdef notdef
6634 					pmap_addr = sub_start;
6635 #endif
6636 					pmap_addr = s;
6637 				} else {
6638 					pmap = map->pmap;
6639 					pmap_addr = s;
6640 				}
6641 
6642 				if (entry->wired_count) {
6643 					if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6644 						goto done;
6645 					}
6646 
6647 					/*
6648 					 * The map was not unlocked:
6649 					 * no need to goto re-lookup.
6650 					 * Just go directly to next entry.
6651 					 */
6652 					entry = entry->vme_next;
6653 					s = entry->vme_start;
6654 					continue;
6655 				}
6656 
6657 				/* call vm_map_lookup_locked to */
6658 				/* cause any needs copy to be   */
6659 				/* evaluated */
6660 				local_start = entry->vme_start;
6661 				lookup_map = map;
6662 				vm_map_lock_write_to_read(map);
6663 				rc = vm_map_lookup_locked(
6664 					&lookup_map, local_start,
6665 					(access_type | extra_prots),
6666 					OBJECT_LOCK_EXCLUSIVE,
6667 					&version, &object,
6668 					&offset, &prot, &wired,
6669 					NULL,
6670 					&real_map, NULL);
6671 				if (rc != KERN_SUCCESS) {
6672 					vm_map_unlock_read(lookup_map);
6673 					assert(map_pmap == NULL);
6674 					vm_map_unwire(map, start,
6675 					    s, user_wire);
6676 					return rc;
6677 				}
6678 				vm_object_unlock(object);
6679 				if (real_map != lookup_map) {
6680 					vm_map_unlock(real_map);
6681 				}
6682 				vm_map_unlock_read(lookup_map);
6683 				vm_map_lock(map);
6684 
6685 				/* we unlocked, so must re-lookup */
6686 				if (!vm_map_lookup_entry(map,
6687 				    local_start,
6688 				    &local_entry)) {
6689 					rc = KERN_FAILURE;
6690 					goto done;
6691 				}
6692 
6693 				/*
6694 				 * entry could have been "simplified",
6695 				 * so re-clip
6696 				 */
6697 				entry = local_entry;
6698 				assert(s == local_start);
6699 				vm_map_clip_start(map, entry, s);
6700 				vm_map_clip_end(map, entry, end);
6701 				/* re-compute "e" */
6702 				e = entry->vme_end;
6703 				if (e > end) {
6704 					e = end;
6705 				}
6706 
6707 				/* did we have a change of type? */
6708 				if (!entry->is_sub_map) {
6709 					last_timestamp = map->timestamp;
6710 					continue;
6711 				}
6712 			} else {
6713 				local_start = entry->vme_start;
6714 				pmap = map_pmap;
6715 			}
6716 
6717 			if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6718 				goto done;
6719 			}
6720 
6721 			entry->in_transition = TRUE;
6722 
6723 			vm_map_unlock(map);
6724 			rc = vm_map_wire_nested(VME_SUBMAP(entry),
6725 			    sub_start, sub_end,
6726 			    caller_prot, tag,
6727 			    user_wire, pmap, pmap_addr,
6728 			    NULL);
6729 			vm_map_lock(map);
6730 
6731 			/*
6732 			 * Find the entry again.  It could have been clipped
6733 			 * after we unlocked the map.
6734 			 */
6735 			if (!vm_map_lookup_entry(map, local_start,
6736 			    &first_entry)) {
6737 				panic("vm_map_wire: re-lookup failed");
6738 			}
6739 			entry = first_entry;
6740 
6741 			assert(local_start == s);
6742 			/* re-compute "e" */
6743 			e = entry->vme_end;
6744 			if (e > end) {
6745 				e = end;
6746 			}
6747 
6748 			last_timestamp = map->timestamp;
6749 			while ((entry != vm_map_to_entry(map)) &&
6750 			    (entry->vme_start < e)) {
6751 				assert(entry->in_transition);
6752 				entry->in_transition = FALSE;
6753 				if (entry->needs_wakeup) {
6754 					entry->needs_wakeup = FALSE;
6755 					need_wakeup = TRUE;
6756 				}
6757 				if (rc != KERN_SUCCESS) {/* from vm_*_wire */
6758 					subtract_wire_counts(map, entry, user_wire);
6759 				}
6760 				entry = entry->vme_next;
6761 			}
6762 			if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
6763 				goto done;
6764 			}
6765 
6766 			/* no need to relookup again */
6767 			s = entry->vme_start;
6768 			continue;
6769 		}
6770 
6771 		/*
6772 		 * If this entry is already wired then increment
6773 		 * the appropriate wire reference count.
6774 		 */
6775 		if (entry->wired_count) {
6776 			if ((entry->protection & access_type) != access_type) {
6777 				/* found a protection problem */
6778 
6779 				/*
6780 				 * XXX FBDP
6781 				 * We should always return an error
6782 				 * in this case but since we didn't
6783 				 * enforce it before, let's do
6784 				 * it only for the new "wire_and_extract"
6785 				 * code path for now...
6786 				 */
6787 				if (wire_and_extract) {
6788 					rc = KERN_PROTECTION_FAILURE;
6789 					goto done;
6790 				}
6791 			}
6792 
6793 			/*
6794 			 * entry is already wired down, get our reference
6795 			 * after clipping to our range.
6796 			 */
6797 			vm_map_clip_start(map, entry, s);
6798 			vm_map_clip_end(map, entry, end);
6799 
6800 			if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6801 				goto done;
6802 			}
6803 
6804 			if (wire_and_extract) {
6805 				vm_object_t             object;
6806 				vm_object_offset_t      offset;
6807 				vm_page_t               m;
6808 
6809 				/*
6810 				 * We don't have to "wire" the page again
6811 				 * bit we still have to "extract" its
6812 				 * physical page number, after some sanity
6813 				 * checks.
6814 				 */
6815 				assert((entry->vme_end - entry->vme_start)
6816 				    == PAGE_SIZE);
6817 				assert(!entry->needs_copy);
6818 				assert(!entry->is_sub_map);
6819 				assert(VME_OBJECT(entry));
6820 				if (((entry->vme_end - entry->vme_start)
6821 				    != PAGE_SIZE) ||
6822 				    entry->needs_copy ||
6823 				    entry->is_sub_map ||
6824 				    VME_OBJECT(entry) == VM_OBJECT_NULL) {
6825 					rc = KERN_INVALID_ARGUMENT;
6826 					goto done;
6827 				}
6828 
6829 				object = VME_OBJECT(entry);
6830 				offset = VME_OFFSET(entry);
6831 				/* need exclusive lock to update m->dirty */
6832 				if (entry->protection & VM_PROT_WRITE) {
6833 					vm_object_lock(object);
6834 				} else {
6835 					vm_object_lock_shared(object);
6836 				}
6837 				m = vm_page_lookup(object, offset);
6838 				assert(m != VM_PAGE_NULL);
6839 				assert(VM_PAGE_WIRED(m));
6840 				if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6841 					*physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
6842 					if (entry->protection & VM_PROT_WRITE) {
6843 						vm_object_lock_assert_exclusive(
6844 							object);
6845 						m->vmp_dirty = TRUE;
6846 					}
6847 				} else {
6848 					/* not already wired !? */
6849 					*physpage_p = 0;
6850 				}
6851 				vm_object_unlock(object);
6852 			}
6853 
6854 			/* map was not unlocked: no need to relookup */
6855 			entry = entry->vme_next;
6856 			s = entry->vme_start;
6857 			continue;
6858 		}
6859 
6860 		/*
6861 		 * Unwired entry or wire request transmitted via submap
6862 		 */
6863 
6864 		/*
6865 		 * Wiring would copy the pages to the shadow object.
6866 		 * The shadow object would not be code-signed so
6867 		 * attempting to execute code from these copied pages
6868 		 * would trigger a code-signing violation.
6869 		 */
6870 
6871 		if ((entry->protection & VM_PROT_EXECUTE)
6872 #if XNU_TARGET_OS_OSX
6873 		    &&
6874 		    map->pmap != kernel_pmap &&
6875 		    (vm_map_cs_enforcement(map)
6876 #if __arm64__
6877 		    || !VM_MAP_IS_EXOTIC(map)
6878 #endif /* __arm64__ */
6879 		    )
6880 #endif /* XNU_TARGET_OS_OSX */
6881 		    ) {
6882 #if MACH_ASSERT
6883 			printf("pid %d[%s] wiring executable range from "
6884 			    "0x%llx to 0x%llx: rejected to preserve "
6885 			    "code-signing\n",
6886 			    proc_selfpid(),
6887 			    (current_task()->bsd_info
6888 			    ? proc_name_address(current_task()->bsd_info)
6889 			    : "?"),
6890 			    (uint64_t) entry->vme_start,
6891 			    (uint64_t) entry->vme_end);
6892 #endif /* MACH_ASSERT */
6893 			DTRACE_VM2(cs_executable_wire,
6894 			    uint64_t, (uint64_t)entry->vme_start,
6895 			    uint64_t, (uint64_t)entry->vme_end);
6896 			cs_executable_wire++;
6897 			rc = KERN_PROTECTION_FAILURE;
6898 			goto done;
6899 		}
6900 
6901 		/*
6902 		 * Perform actions of vm_map_lookup that need the write
6903 		 * lock on the map: create a shadow object for a
6904 		 * copy-on-write region, or an object for a zero-fill
6905 		 * region.
6906 		 */
6907 		size = entry->vme_end - entry->vme_start;
6908 		/*
6909 		 * If wiring a copy-on-write page, we need to copy it now
6910 		 * even if we're only (currently) requesting read access.
6911 		 * This is aggressive, but once it's wired we can't move it.
6912 		 */
6913 		if (entry->needs_copy) {
6914 			if (wire_and_extract) {
6915 				/*
6916 				 * We're supposed to share with the original
6917 				 * provider so should not be "needs_copy"
6918 				 */
6919 				rc = KERN_INVALID_ARGUMENT;
6920 				goto done;
6921 			}
6922 
6923 			VME_OBJECT_SHADOW(entry, size);
6924 			entry->needs_copy = FALSE;
6925 		} else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
6926 			if (wire_and_extract) {
6927 				/*
6928 				 * We're supposed to share with the original
6929 				 * provider so should already have an object.
6930 				 */
6931 				rc = KERN_INVALID_ARGUMENT;
6932 				goto done;
6933 			}
6934 			VME_OBJECT_SET(entry, vm_object_allocate(size));
6935 			VME_OFFSET_SET(entry, (vm_object_offset_t)0);
6936 			assert(entry->use_pmap);
6937 		}
6938 
6939 		vm_map_clip_start(map, entry, s);
6940 		vm_map_clip_end(map, entry, end);
6941 
6942 		/* re-compute "e" */
6943 		e = entry->vme_end;
6944 		if (e > end) {
6945 			e = end;
6946 		}
6947 
6948 		/*
6949 		 * Check for holes and protection mismatch.
6950 		 * Holes: Next entry should be contiguous unless this
6951 		 *	  is the end of the region.
6952 		 * Protection: Access requested must be allowed, unless
6953 		 *	wiring is by protection class
6954 		 */
6955 		if ((entry->vme_end < end) &&
6956 		    ((entry->vme_next == vm_map_to_entry(map)) ||
6957 		    (entry->vme_next->vme_start > entry->vme_end))) {
6958 			/* found a hole */
6959 			rc = KERN_INVALID_ADDRESS;
6960 			goto done;
6961 		}
6962 		if ((entry->protection & access_type) != access_type) {
6963 			/* found a protection problem */
6964 			rc = KERN_PROTECTION_FAILURE;
6965 			goto done;
6966 		}
6967 
6968 		assert(entry->wired_count == 0 && entry->user_wired_count == 0);
6969 
6970 		if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6971 			goto done;
6972 		}
6973 
6974 		entry->in_transition = TRUE;
6975 
6976 		/*
6977 		 * This entry might get split once we unlock the map.
6978 		 * In vm_fault_wire(), we need the current range as
6979 		 * defined by this entry.  In order for this to work
6980 		 * along with a simultaneous clip operation, we make a
6981 		 * temporary copy of this entry and use that for the
6982 		 * wiring.  Note that the underlying objects do not
6983 		 * change during a clip.
6984 		 */
6985 		tmp_entry = *entry;
6986 
6987 		/*
6988 		 * The in_transition state guarentees that the entry
6989 		 * (or entries for this range, if split occured) will be
6990 		 * there when the map lock is acquired for the second time.
6991 		 */
6992 		vm_map_unlock(map);
6993 
6994 		if (!user_wire && cur_thread != THREAD_NULL) {
6995 			interruptible_state = thread_interrupt_level(THREAD_UNINT);
6996 		} else {
6997 			interruptible_state = THREAD_UNINT;
6998 		}
6999 
7000 		if (map_pmap) {
7001 			rc = vm_fault_wire(map,
7002 			    &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
7003 			    physpage_p);
7004 		} else {
7005 			rc = vm_fault_wire(map,
7006 			    &tmp_entry, caller_prot, tag, map->pmap,
7007 			    tmp_entry.vme_start,
7008 			    physpage_p);
7009 		}
7010 
7011 		if (!user_wire && cur_thread != THREAD_NULL) {
7012 			thread_interrupt_level(interruptible_state);
7013 		}
7014 
7015 		vm_map_lock(map);
7016 
7017 		if (last_timestamp + 1 != map->timestamp) {
7018 			/*
7019 			 * Find the entry again.  It could have been clipped
7020 			 * after we unlocked the map.
7021 			 */
7022 			if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7023 			    &first_entry)) {
7024 				panic("vm_map_wire: re-lookup failed");
7025 			}
7026 
7027 			entry = first_entry;
7028 		}
7029 
7030 		last_timestamp = map->timestamp;
7031 
7032 		while ((entry != vm_map_to_entry(map)) &&
7033 		    (entry->vme_start < tmp_entry.vme_end)) {
7034 			assert(entry->in_transition);
7035 			entry->in_transition = FALSE;
7036 			if (entry->needs_wakeup) {
7037 				entry->needs_wakeup = FALSE;
7038 				need_wakeup = TRUE;
7039 			}
7040 			if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
7041 				subtract_wire_counts(map, entry, user_wire);
7042 			}
7043 			entry = entry->vme_next;
7044 		}
7045 
7046 		if (rc != KERN_SUCCESS) {               /* from vm_*_wire */
7047 			goto done;
7048 		}
7049 
7050 		if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
7051 		    (tmp_entry.vme_end != end) &&    /* AND, we are not at the end of the requested range */
7052 		    (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
7053 			/* found a "new" hole */
7054 			s = tmp_entry.vme_end;
7055 			rc = KERN_INVALID_ADDRESS;
7056 			goto done;
7057 		}
7058 
7059 		s = entry->vme_start;
7060 	} /* end while loop through map entries */
7061 
7062 done:
7063 	if (rc == KERN_SUCCESS) {
7064 		/* repair any damage we may have made to the VM map */
7065 		vm_map_simplify_range(map, start, end);
7066 	}
7067 
7068 	vm_map_unlock(map);
7069 
7070 	/*
7071 	 * wake up anybody waiting on entries we wired.
7072 	 */
7073 	if (need_wakeup) {
7074 		vm_map_entry_wakeup(map);
7075 	}
7076 
7077 	if (rc != KERN_SUCCESS) {
7078 		/* undo what has been wired so far */
7079 		vm_map_unwire_nested(map, start, s, user_wire,
7080 		    map_pmap, pmap_addr);
7081 		if (physpage_p) {
7082 			*physpage_p = 0;
7083 		}
7084 	}
7085 
7086 	return rc;
7087 }
7088 
7089 kern_return_t
vm_map_wire_external(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t caller_prot,boolean_t user_wire)7090 vm_map_wire_external(
7091 	vm_map_t                map,
7092 	vm_map_offset_t         start,
7093 	vm_map_offset_t         end,
7094 	vm_prot_t               caller_prot,
7095 	boolean_t               user_wire)
7096 {
7097 	kern_return_t   kret;
7098 
7099 	kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
7100 	    user_wire, (pmap_t)NULL, 0, NULL);
7101 	return kret;
7102 }
7103 
7104 kern_return_t
vm_map_wire_kernel(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t caller_prot,vm_tag_t tag,boolean_t user_wire)7105 vm_map_wire_kernel(
7106 	vm_map_t                map,
7107 	vm_map_offset_t         start,
7108 	vm_map_offset_t         end,
7109 	vm_prot_t               caller_prot,
7110 	vm_tag_t                tag,
7111 	boolean_t               user_wire)
7112 {
7113 	kern_return_t   kret;
7114 
7115 	kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
7116 	    user_wire, (pmap_t)NULL, 0, NULL);
7117 	return kret;
7118 }
7119 
7120 kern_return_t
vm_map_wire_and_extract_external(vm_map_t map,vm_map_offset_t start,vm_prot_t caller_prot,boolean_t user_wire,ppnum_t * physpage_p)7121 vm_map_wire_and_extract_external(
7122 	vm_map_t        map,
7123 	vm_map_offset_t start,
7124 	vm_prot_t       caller_prot,
7125 	boolean_t       user_wire,
7126 	ppnum_t         *physpage_p)
7127 {
7128 	kern_return_t   kret;
7129 
7130 	kret = vm_map_wire_nested(map,
7131 	    start,
7132 	    start + VM_MAP_PAGE_SIZE(map),
7133 	    caller_prot,
7134 	    vm_tag_bt(),
7135 	    user_wire,
7136 	    (pmap_t)NULL,
7137 	    0,
7138 	    physpage_p);
7139 	if (kret != KERN_SUCCESS &&
7140 	    physpage_p != NULL) {
7141 		*physpage_p = 0;
7142 	}
7143 	return kret;
7144 }
7145 
7146 kern_return_t
vm_map_wire_and_extract_kernel(vm_map_t map,vm_map_offset_t start,vm_prot_t caller_prot,vm_tag_t tag,boolean_t user_wire,ppnum_t * physpage_p)7147 vm_map_wire_and_extract_kernel(
7148 	vm_map_t        map,
7149 	vm_map_offset_t start,
7150 	vm_prot_t       caller_prot,
7151 	vm_tag_t        tag,
7152 	boolean_t       user_wire,
7153 	ppnum_t         *physpage_p)
7154 {
7155 	kern_return_t   kret;
7156 
7157 	kret = vm_map_wire_nested(map,
7158 	    start,
7159 	    start + VM_MAP_PAGE_SIZE(map),
7160 	    caller_prot,
7161 	    tag,
7162 	    user_wire,
7163 	    (pmap_t)NULL,
7164 	    0,
7165 	    physpage_p);
7166 	if (kret != KERN_SUCCESS &&
7167 	    physpage_p != NULL) {
7168 		*physpage_p = 0;
7169 	}
7170 	return kret;
7171 }
7172 
7173 /*
7174  *	vm_map_unwire:
7175  *
7176  *	Sets the pageability of the specified address range in the target
7177  *	as pageable.  Regions specified must have been wired previously.
7178  *
7179  *	The map must not be locked, but a reference must remain to the map
7180  *	throughout the call.
7181  *
7182  *	Kernel will panic on failures.  User unwire ignores holes and
7183  *	unwired and intransition entries to avoid losing memory by leaving
7184  *	it unwired.
7185  */
7186 static kern_return_t
vm_map_unwire_nested(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,boolean_t user_wire,pmap_t map_pmap,vm_map_offset_t pmap_addr)7187 vm_map_unwire_nested(
7188 	vm_map_t                map,
7189 	vm_map_offset_t         start,
7190 	vm_map_offset_t         end,
7191 	boolean_t               user_wire,
7192 	pmap_t                  map_pmap,
7193 	vm_map_offset_t         pmap_addr)
7194 {
7195 	vm_map_entry_t          entry;
7196 	struct vm_map_entry     *first_entry, tmp_entry;
7197 	boolean_t               need_wakeup;
7198 	boolean_t               main_map = FALSE;
7199 	unsigned int            last_timestamp;
7200 
7201 	vm_map_lock(map);
7202 	if (map_pmap == NULL) {
7203 		main_map = TRUE;
7204 	}
7205 	last_timestamp = map->timestamp;
7206 
7207 	VM_MAP_RANGE_CHECK(map, start, end);
7208 	assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
7209 	assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
7210 
7211 	if (start == end) {
7212 		/* We unwired what the caller asked for: zero pages */
7213 		vm_map_unlock(map);
7214 		return KERN_SUCCESS;
7215 	}
7216 
7217 	if (vm_map_lookup_entry(map, start, &first_entry)) {
7218 		entry = first_entry;
7219 		/*
7220 		 * vm_map_clip_start will be done later.
7221 		 * We don't want to unnest any nested sub maps here !
7222 		 */
7223 	} else {
7224 		if (!user_wire) {
7225 			panic("vm_map_unwire: start not found");
7226 		}
7227 		/*	Start address is not in map. */
7228 		vm_map_unlock(map);
7229 		return KERN_INVALID_ADDRESS;
7230 	}
7231 
7232 	if (entry->superpage_size) {
7233 		/* superpages are always wired */
7234 		vm_map_unlock(map);
7235 		return KERN_INVALID_ADDRESS;
7236 	}
7237 
7238 	need_wakeup = FALSE;
7239 	while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
7240 		if (entry->in_transition) {
7241 			/*
7242 			 * 1)
7243 			 * Another thread is wiring down this entry. Note
7244 			 * that if it is not for the other thread we would
7245 			 * be unwiring an unwired entry.  This is not
7246 			 * permitted.  If we wait, we will be unwiring memory
7247 			 * we did not wire.
7248 			 *
7249 			 * 2)
7250 			 * Another thread is unwiring this entry.  We did not
7251 			 * have a reference to it, because if we did, this
7252 			 * entry will not be getting unwired now.
7253 			 */
7254 			if (!user_wire) {
7255 				/*
7256 				 * XXX FBDP
7257 				 * This could happen:  there could be some
7258 				 * overlapping vslock/vsunlock operations
7259 				 * going on.
7260 				 * We should probably just wait and retry,
7261 				 * but then we have to be careful that this
7262 				 * entry could get "simplified" after
7263 				 * "in_transition" gets unset and before
7264 				 * we re-lookup the entry, so we would
7265 				 * have to re-clip the entry to avoid
7266 				 * re-unwiring what we have already unwired...
7267 				 * See vm_map_wire_nested().
7268 				 *
7269 				 * Or we could just ignore "in_transition"
7270 				 * here and proceed to decement the wired
7271 				 * count(s) on this entry.  That should be fine
7272 				 * as long as "wired_count" doesn't drop all
7273 				 * the way to 0 (and we should panic if THAT
7274 				 * happens).
7275 				 */
7276 				panic("vm_map_unwire: in_transition entry");
7277 			}
7278 
7279 			entry = entry->vme_next;
7280 			continue;
7281 		}
7282 
7283 		if (entry->is_sub_map) {
7284 			vm_map_offset_t sub_start;
7285 			vm_map_offset_t sub_end;
7286 			vm_map_offset_t local_end;
7287 			pmap_t          pmap;
7288 
7289 			vm_map_clip_start(map, entry, start);
7290 			vm_map_clip_end(map, entry, end);
7291 
7292 			sub_start = VME_OFFSET(entry);
7293 			sub_end = entry->vme_end - entry->vme_start;
7294 			sub_end += VME_OFFSET(entry);
7295 			local_end = entry->vme_end;
7296 			if (map_pmap == NULL) {
7297 				if (entry->use_pmap) {
7298 					pmap = VME_SUBMAP(entry)->pmap;
7299 					pmap_addr = sub_start;
7300 				} else {
7301 					pmap = map->pmap;
7302 					pmap_addr = start;
7303 				}
7304 				if (entry->wired_count == 0 ||
7305 				    (user_wire && entry->user_wired_count == 0)) {
7306 					if (!user_wire) {
7307 						panic("vm_map_unwire: entry is unwired");
7308 					}
7309 					entry = entry->vme_next;
7310 					continue;
7311 				}
7312 
7313 				/*
7314 				 * Check for holes
7315 				 * Holes: Next entry should be contiguous unless
7316 				 * this is the end of the region.
7317 				 */
7318 				if (((entry->vme_end < end) &&
7319 				    ((entry->vme_next == vm_map_to_entry(map)) ||
7320 				    (entry->vme_next->vme_start
7321 				    > entry->vme_end)))) {
7322 					if (!user_wire) {
7323 						panic("vm_map_unwire: non-contiguous region");
7324 					}
7325 /*
7326  *                                       entry = entry->vme_next;
7327  *                                       continue;
7328  */
7329 				}
7330 
7331 				subtract_wire_counts(map, entry, user_wire);
7332 
7333 				if (entry->wired_count != 0) {
7334 					entry = entry->vme_next;
7335 					continue;
7336 				}
7337 
7338 				entry->in_transition = TRUE;
7339 				tmp_entry = *entry;/* see comment in vm_map_wire() */
7340 
7341 				/*
7342 				 * We can unlock the map now. The in_transition state
7343 				 * guarantees existance of the entry.
7344 				 */
7345 				vm_map_unlock(map);
7346 				vm_map_unwire_nested(VME_SUBMAP(entry),
7347 				    sub_start, sub_end, user_wire, pmap, pmap_addr);
7348 				vm_map_lock(map);
7349 
7350 				if (last_timestamp + 1 != map->timestamp) {
7351 					/*
7352 					 * Find the entry again.  It could have been
7353 					 * clipped or deleted after we unlocked the map.
7354 					 */
7355 					if (!vm_map_lookup_entry(map,
7356 					    tmp_entry.vme_start,
7357 					    &first_entry)) {
7358 						if (!user_wire) {
7359 							panic("vm_map_unwire: re-lookup failed");
7360 						}
7361 						entry = first_entry->vme_next;
7362 					} else {
7363 						entry = first_entry;
7364 					}
7365 				}
7366 				last_timestamp = map->timestamp;
7367 
7368 				/*
7369 				 * clear transition bit for all constituent entries
7370 				 * that were in the original entry (saved in
7371 				 * tmp_entry).  Also check for waiters.
7372 				 */
7373 				while ((entry != vm_map_to_entry(map)) &&
7374 				    (entry->vme_start < tmp_entry.vme_end)) {
7375 					assert(entry->in_transition);
7376 					entry->in_transition = FALSE;
7377 					if (entry->needs_wakeup) {
7378 						entry->needs_wakeup = FALSE;
7379 						need_wakeup = TRUE;
7380 					}
7381 					entry = entry->vme_next;
7382 				}
7383 				continue;
7384 			} else {
7385 				vm_map_unlock(map);
7386 				vm_map_unwire_nested(VME_SUBMAP(entry),
7387 				    sub_start, sub_end, user_wire, map_pmap,
7388 				    pmap_addr);
7389 				vm_map_lock(map);
7390 
7391 				if (last_timestamp + 1 != map->timestamp) {
7392 					/*
7393 					 * Find the entry again.  It could have been
7394 					 * clipped or deleted after we unlocked the map.
7395 					 */
7396 					if (!vm_map_lookup_entry(map,
7397 					    tmp_entry.vme_start,
7398 					    &first_entry)) {
7399 						if (!user_wire) {
7400 							panic("vm_map_unwire: re-lookup failed");
7401 						}
7402 						entry = first_entry->vme_next;
7403 					} else {
7404 						entry = first_entry;
7405 					}
7406 				}
7407 				last_timestamp = map->timestamp;
7408 			}
7409 		}
7410 
7411 
7412 		if ((entry->wired_count == 0) ||
7413 		    (user_wire && entry->user_wired_count == 0)) {
7414 			if (!user_wire) {
7415 				panic("vm_map_unwire: entry is unwired");
7416 			}
7417 
7418 			entry = entry->vme_next;
7419 			continue;
7420 		}
7421 
7422 		assert(entry->wired_count > 0 &&
7423 		    (!user_wire || entry->user_wired_count > 0));
7424 
7425 		vm_map_clip_start(map, entry, start);
7426 		vm_map_clip_end(map, entry, end);
7427 
7428 		/*
7429 		 * Check for holes
7430 		 * Holes: Next entry should be contiguous unless
7431 		 *	  this is the end of the region.
7432 		 */
7433 		if (((entry->vme_end < end) &&
7434 		    ((entry->vme_next == vm_map_to_entry(map)) ||
7435 		    (entry->vme_next->vme_start > entry->vme_end)))) {
7436 			if (!user_wire) {
7437 				panic("vm_map_unwire: non-contiguous region");
7438 			}
7439 			entry = entry->vme_next;
7440 			continue;
7441 		}
7442 
7443 		subtract_wire_counts(map, entry, user_wire);
7444 
7445 		if (entry->wired_count != 0) {
7446 			entry = entry->vme_next;
7447 			continue;
7448 		}
7449 
7450 		if (entry->zero_wired_pages) {
7451 			entry->zero_wired_pages = FALSE;
7452 		}
7453 
7454 		entry->in_transition = TRUE;
7455 		tmp_entry = *entry;     /* see comment in vm_map_wire() */
7456 
7457 		/*
7458 		 * We can unlock the map now. The in_transition state
7459 		 * guarantees existance of the entry.
7460 		 */
7461 		vm_map_unlock(map);
7462 		if (map_pmap) {
7463 			vm_fault_unwire(map,
7464 			    &tmp_entry, FALSE, map_pmap, pmap_addr);
7465 		} else {
7466 			vm_fault_unwire(map,
7467 			    &tmp_entry, FALSE, map->pmap,
7468 			    tmp_entry.vme_start);
7469 		}
7470 		vm_map_lock(map);
7471 
7472 		if (last_timestamp + 1 != map->timestamp) {
7473 			/*
7474 			 * Find the entry again.  It could have been clipped
7475 			 * or deleted after we unlocked the map.
7476 			 */
7477 			if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7478 			    &first_entry)) {
7479 				if (!user_wire) {
7480 					panic("vm_map_unwire: re-lookup failed");
7481 				}
7482 				entry = first_entry->vme_next;
7483 			} else {
7484 				entry = first_entry;
7485 			}
7486 		}
7487 		last_timestamp = map->timestamp;
7488 
7489 		/*
7490 		 * clear transition bit for all constituent entries that
7491 		 * were in the original entry (saved in tmp_entry).  Also
7492 		 * check for waiters.
7493 		 */
7494 		while ((entry != vm_map_to_entry(map)) &&
7495 		    (entry->vme_start < tmp_entry.vme_end)) {
7496 			assert(entry->in_transition);
7497 			entry->in_transition = FALSE;
7498 			if (entry->needs_wakeup) {
7499 				entry->needs_wakeup = FALSE;
7500 				need_wakeup = TRUE;
7501 			}
7502 			entry = entry->vme_next;
7503 		}
7504 	}
7505 
7506 	/*
7507 	 * We might have fragmented the address space when we wired this
7508 	 * range of addresses.  Attempt to re-coalesce these VM map entries
7509 	 * with their neighbors now that they're no longer wired.
7510 	 * Under some circumstances, address space fragmentation can
7511 	 * prevent VM object shadow chain collapsing, which can cause
7512 	 * swap space leaks.
7513 	 */
7514 	vm_map_simplify_range(map, start, end);
7515 
7516 	vm_map_unlock(map);
7517 	/*
7518 	 * wake up anybody waiting on entries that we have unwired.
7519 	 */
7520 	if (need_wakeup) {
7521 		vm_map_entry_wakeup(map);
7522 	}
7523 	return KERN_SUCCESS;
7524 }
7525 
7526 kern_return_t
vm_map_unwire(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,boolean_t user_wire)7527 vm_map_unwire(
7528 	vm_map_t                map,
7529 	vm_map_offset_t         start,
7530 	vm_map_offset_t         end,
7531 	boolean_t               user_wire)
7532 {
7533 	return vm_map_unwire_nested(map, start, end,
7534 	           user_wire, (pmap_t)NULL, 0);
7535 }
7536 
7537 
7538 /*
7539  *	vm_map_entry_delete:	[ internal use only ]
7540  *
7541  *	Deallocate the given entry from the target map.
7542  */
7543 static void
vm_map_entry_delete(vm_map_t map,vm_map_entry_t entry)7544 vm_map_entry_delete(
7545 	vm_map_t        map,
7546 	vm_map_entry_t  entry)
7547 {
7548 	vm_map_offset_t s, e;
7549 	vm_object_t     object;
7550 	vm_map_t        submap;
7551 
7552 	s = entry->vme_start;
7553 	e = entry->vme_end;
7554 	assert(VM_MAP_PAGE_ALIGNED(s, FOURK_PAGE_MASK));
7555 	assert(VM_MAP_PAGE_ALIGNED(e, FOURK_PAGE_MASK));
7556 	if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK) {
7557 		assert(page_aligned(s));
7558 		assert(page_aligned(e));
7559 	}
7560 	if (entry->map_aligned == TRUE) {
7561 		assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
7562 		assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
7563 	}
7564 	assert(entry->wired_count == 0);
7565 	assert(entry->user_wired_count == 0);
7566 	assert(!entry->permanent);
7567 
7568 	if (entry->is_sub_map) {
7569 		object = NULL;
7570 		submap = VME_SUBMAP(entry);
7571 	} else {
7572 		submap = NULL;
7573 		object = VME_OBJECT(entry);
7574 	}
7575 
7576 	vm_map_store_entry_unlink(map, entry);
7577 	map->size -= e - s;
7578 
7579 	vm_map_entry_dispose(map, entry);
7580 
7581 	vm_map_unlock(map);
7582 	/*
7583 	 *	Deallocate the object only after removing all
7584 	 *	pmap entries pointing to its pages.
7585 	 */
7586 	if (submap) {
7587 		vm_map_deallocate(submap);
7588 	} else {
7589 		vm_object_deallocate(object);
7590 	}
7591 }
7592 
7593 void
vm_map_submap_pmap_clean(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_map_t sub_map,vm_map_offset_t offset)7594 vm_map_submap_pmap_clean(
7595 	vm_map_t        map,
7596 	vm_map_offset_t start,
7597 	vm_map_offset_t end,
7598 	vm_map_t        sub_map,
7599 	vm_map_offset_t offset)
7600 {
7601 	vm_map_offset_t submap_start;
7602 	vm_map_offset_t submap_end;
7603 	vm_map_size_t   remove_size;
7604 	vm_map_entry_t  entry;
7605 
7606 	submap_end = offset + (end - start);
7607 	submap_start = offset;
7608 
7609 	vm_map_lock_read(sub_map);
7610 	if (vm_map_lookup_entry(sub_map, offset, &entry)) {
7611 		remove_size = (entry->vme_end - entry->vme_start);
7612 		if (offset > entry->vme_start) {
7613 			remove_size -= offset - entry->vme_start;
7614 		}
7615 
7616 
7617 		if (submap_end < entry->vme_end) {
7618 			remove_size -=
7619 			    entry->vme_end - submap_end;
7620 		}
7621 		if (entry->is_sub_map) {
7622 			vm_map_submap_pmap_clean(
7623 				sub_map,
7624 				start,
7625 				start + remove_size,
7626 				VME_SUBMAP(entry),
7627 				VME_OFFSET(entry));
7628 		} else {
7629 			if (map->mapped_in_other_pmaps &&
7630 			    os_ref_get_count(&map->map_refcnt) != 0 &&
7631 			    VME_OBJECT(entry) != NULL) {
7632 				vm_object_pmap_protect_options(
7633 					VME_OBJECT(entry),
7634 					(VME_OFFSET(entry) +
7635 					offset -
7636 					entry->vme_start),
7637 					remove_size,
7638 					PMAP_NULL,
7639 					PAGE_SIZE,
7640 					entry->vme_start,
7641 					VM_PROT_NONE,
7642 					PMAP_OPTIONS_REMOVE);
7643 			} else {
7644 				pmap_remove(map->pmap,
7645 				    (addr64_t)start,
7646 				    (addr64_t)(start + remove_size));
7647 			}
7648 		}
7649 	}
7650 
7651 	entry = entry->vme_next;
7652 
7653 	while ((entry != vm_map_to_entry(sub_map))
7654 	    && (entry->vme_start < submap_end)) {
7655 		remove_size = (entry->vme_end - entry->vme_start);
7656 		if (submap_end < entry->vme_end) {
7657 			remove_size -= entry->vme_end - submap_end;
7658 		}
7659 		if (entry->is_sub_map) {
7660 			vm_map_submap_pmap_clean(
7661 				sub_map,
7662 				(start + entry->vme_start) - offset,
7663 				((start + entry->vme_start) - offset) + remove_size,
7664 				VME_SUBMAP(entry),
7665 				VME_OFFSET(entry));
7666 		} else {
7667 			if (map->mapped_in_other_pmaps &&
7668 			    os_ref_get_count(&map->map_refcnt) != 0 &&
7669 			    VME_OBJECT(entry) != NULL) {
7670 				vm_object_pmap_protect_options(
7671 					VME_OBJECT(entry),
7672 					VME_OFFSET(entry),
7673 					remove_size,
7674 					PMAP_NULL,
7675 					PAGE_SIZE,
7676 					entry->vme_start,
7677 					VM_PROT_NONE,
7678 					PMAP_OPTIONS_REMOVE);
7679 			} else {
7680 				pmap_remove(map->pmap,
7681 				    (addr64_t)((start + entry->vme_start)
7682 				    - offset),
7683 				    (addr64_t)(((start + entry->vme_start)
7684 				    - offset) + remove_size));
7685 			}
7686 		}
7687 		entry = entry->vme_next;
7688 	}
7689 	vm_map_unlock_read(sub_map);
7690 	return;
7691 }
7692 
7693 /*
7694  *     virt_memory_guard_ast:
7695  *
7696  *     Handle the AST callout for a virtual memory guard.
7697  *	   raise an EXC_GUARD exception and terminate the task
7698  *     if configured to do so.
7699  */
7700 void
virt_memory_guard_ast(thread_t thread,mach_exception_data_type_t code,mach_exception_data_type_t subcode)7701 virt_memory_guard_ast(
7702 	thread_t thread,
7703 	mach_exception_data_type_t code,
7704 	mach_exception_data_type_t subcode)
7705 {
7706 	task_t task = get_threadtask(thread);
7707 	assert(task != kernel_task);
7708 	assert(task == current_task());
7709 	kern_return_t sync_exception_result;
7710 	uint32_t behavior;
7711 
7712 	behavior = task->task_exc_guard;
7713 
7714 	/* Is delivery enabled */
7715 	if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7716 		return;
7717 	}
7718 
7719 	/* If only once, make sure we're that once */
7720 	while (behavior & TASK_EXC_GUARD_VM_ONCE) {
7721 		uint32_t new_behavior = behavior & ~TASK_EXC_GUARD_VM_DELIVER;
7722 
7723 		if (OSCompareAndSwap(behavior, new_behavior, &task->task_exc_guard)) {
7724 			break;
7725 		}
7726 		behavior = task->task_exc_guard;
7727 		if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7728 			return;
7729 		}
7730 	}
7731 
7732 	/* Raise exception synchronously and see if handler claimed it */
7733 	sync_exception_result = task_exception_notify(EXC_GUARD, code, subcode);
7734 
7735 	if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7736 		/*
7737 		 * If Synchronous EXC_GUARD delivery was successful then
7738 		 * kill the process and return, else kill the process
7739 		 * and deliver the exception via EXC_CORPSE_NOTIFY.
7740 		 */
7741 		if (sync_exception_result == KERN_SUCCESS) {
7742 			task_bsdtask_kill(current_task());
7743 		} else {
7744 			exit_with_guard_exception(current_proc(), code, subcode);
7745 		}
7746 	} else if (task->task_exc_guard & TASK_EXC_GUARD_VM_CORPSE) {
7747 		/*
7748 		 * If the synchronous EXC_GUARD delivery was not successful,
7749 		 * raise a simulated crash.
7750 		 */
7751 		if (sync_exception_result != KERN_SUCCESS) {
7752 			task_violated_guard(code, subcode, NULL);
7753 		}
7754 	}
7755 }
7756 
7757 /*
7758  *     vm_map_guard_exception:
7759  *
7760  *     Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7761  *
7762  *     Right now, we do this when we find nothing mapped, or a
7763  *     gap in the mapping when a user address space deallocate
7764  *     was requested. We report the address of the first gap found.
7765  */
7766 static void
vm_map_guard_exception(vm_map_offset_t gap_start,unsigned reason)7767 vm_map_guard_exception(
7768 	vm_map_offset_t gap_start,
7769 	unsigned reason)
7770 {
7771 	mach_exception_code_t code = 0;
7772 	unsigned int guard_type = GUARD_TYPE_VIRT_MEMORY;
7773 	unsigned int target = 0; /* should we pass in pid associated with map? */
7774 	mach_exception_data_type_t subcode = (uint64_t)gap_start;
7775 	boolean_t fatal = FALSE;
7776 
7777 	task_t task = current_task();
7778 
7779 	/* Can't deliver exceptions to kernel task */
7780 	if (task == kernel_task) {
7781 		return;
7782 	}
7783 
7784 	EXC_GUARD_ENCODE_TYPE(code, guard_type);
7785 	EXC_GUARD_ENCODE_FLAVOR(code, reason);
7786 	EXC_GUARD_ENCODE_TARGET(code, target);
7787 
7788 	if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7789 		fatal = TRUE;
7790 	}
7791 	thread_guard_violation(current_thread(), code, subcode, fatal);
7792 }
7793 
7794 /*
7795  *	vm_map_delete:	[ internal use only ]
7796  *
7797  *	Deallocates the given address range from the target map.
7798  *	Removes all user wirings. Unwires one kernel wiring if
7799  *	VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
7800  *	away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
7801  *	interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7802  *
7803  *	This routine is called with map locked and leaves map locked.
7804  */
7805 static kern_return_t
vm_map_delete(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,int flags,vm_map_t zap_map)7806 vm_map_delete(
7807 	vm_map_t                map,
7808 	vm_map_offset_t         start,
7809 	vm_map_offset_t         end,
7810 	int                     flags,
7811 	vm_map_t                zap_map)
7812 {
7813 	vm_map_entry_t          entry, next;
7814 	struct   vm_map_entry   *first_entry, tmp_entry;
7815 	vm_map_offset_t         s;
7816 	vm_object_t             object;
7817 	boolean_t               need_wakeup;
7818 	unsigned int            last_timestamp = ~0; /* unlikely value */
7819 	int                     interruptible;
7820 	vm_map_offset_t         gap_start;
7821 	__unused vm_map_offset_t save_start = start;
7822 	__unused vm_map_offset_t save_end = end;
7823 	const vm_map_offset_t   FIND_GAP = 1;   /* a not page aligned value */
7824 	const vm_map_offset_t   GAPS_OK = 2;    /* a different not page aligned value */
7825 
7826 	if (map != kernel_map && !(flags & VM_MAP_REMOVE_GAPS_OK) && !map->terminated) {
7827 		gap_start = FIND_GAP;
7828 	} else {
7829 		gap_start = GAPS_OK;
7830 	}
7831 
7832 	interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
7833 	    THREAD_ABORTSAFE : THREAD_UNINT;
7834 
7835 	/*
7836 	 * All our DMA I/O operations in IOKit are currently done by
7837 	 * wiring through the map entries of the task requesting the I/O.
7838 	 * Because of this, we must always wait for kernel wirings
7839 	 * to go away on the entries before deleting them.
7840 	 *
7841 	 * Any caller who wants to actually remove a kernel wiring
7842 	 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7843 	 * properly remove one wiring instead of blasting through
7844 	 * them all.
7845 	 */
7846 	flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
7847 
7848 	while (1) {
7849 		/*
7850 		 *	Find the start of the region, and clip it
7851 		 */
7852 		if (vm_map_lookup_entry(map, start, &first_entry)) {
7853 			entry = first_entry;
7854 			if (kalloc_owned_map(map) &&
7855 			    (entry->vme_start != start ||
7856 			    entry->vme_end != end)) {
7857 				panic("vm_map_delete(%p,0x%llx,0x%llx): "
7858 				    "mismatched entry %p [0x%llx:0x%llx]\n",
7859 				    map,
7860 				    (uint64_t)start,
7861 				    (uint64_t)end,
7862 				    entry,
7863 				    (uint64_t)entry->vme_start,
7864 				    (uint64_t)entry->vme_end);
7865 			}
7866 
7867 			/*
7868 			 * If in a superpage, extend the range to include the start of the mapping.
7869 			 */
7870 			if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) {
7871 				start = SUPERPAGE_ROUND_DOWN(start);
7872 				continue;
7873 			}
7874 
7875 			if (start == entry->vme_start) {
7876 				/*
7877 				 * No need to clip.  We don't want to cause
7878 				 * any unnecessary unnesting in this case...
7879 				 */
7880 			} else {
7881 				if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7882 				    entry->map_aligned &&
7883 				    !VM_MAP_PAGE_ALIGNED(
7884 					    start,
7885 					    VM_MAP_PAGE_MASK(map))) {
7886 					/*
7887 					 * The entry will no longer be
7888 					 * map-aligned after clipping
7889 					 * and the caller said it's OK.
7890 					 */
7891 					entry->map_aligned = FALSE;
7892 				}
7893 				if (kalloc_owned_map(map)) {
7894 					panic("vm_map_delete(%p,0x%llx,0x%llx):"
7895 					    " clipping %p at 0x%llx\n",
7896 					    map,
7897 					    (uint64_t)start,
7898 					    (uint64_t)end,
7899 					    entry,
7900 					    (uint64_t)start);
7901 				}
7902 				vm_map_clip_start(map, entry, start);
7903 			}
7904 
7905 			/*
7906 			 *	Fix the lookup hint now, rather than each
7907 			 *	time through the loop.
7908 			 */
7909 			SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7910 		} else {
7911 			if (map->pmap == kernel_pmap &&
7912 			    os_ref_get_count(&map->map_refcnt) != 0) {
7913 				panic("vm_map_delete(%p,0x%llx,0x%llx): "
7914 				    "no map entry at 0x%llx\n",
7915 				    map,
7916 				    (uint64_t)start,
7917 				    (uint64_t)end,
7918 				    (uint64_t)start);
7919 			}
7920 			entry = first_entry->vme_next;
7921 			if (gap_start == FIND_GAP) {
7922 				gap_start = start;
7923 			}
7924 		}
7925 		break;
7926 	}
7927 	if (entry->superpage_size) {
7928 		end = SUPERPAGE_ROUND_UP(end);
7929 	}
7930 
7931 	need_wakeup = FALSE;
7932 	/*
7933 	 *	Step through all entries in this region
7934 	 */
7935 	s = entry->vme_start;
7936 	while ((entry != vm_map_to_entry(map)) && (s < end)) {
7937 		/*
7938 		 * At this point, we have deleted all the memory entries
7939 		 * between "start" and "s".  We still need to delete
7940 		 * all memory entries between "s" and "end".
7941 		 * While we were blocked and the map was unlocked, some
7942 		 * new memory entries could have been re-allocated between
7943 		 * "start" and "s" and we don't want to mess with those.
7944 		 * Some of those entries could even have been re-assembled
7945 		 * with an entry after "s" (in vm_map_simplify_entry()), so
7946 		 * we may have to vm_map_clip_start() again.
7947 		 */
7948 
7949 		if (entry->vme_start >= s) {
7950 			/*
7951 			 * This entry starts on or after "s"
7952 			 * so no need to clip its start.
7953 			 */
7954 		} else {
7955 			/*
7956 			 * This entry has been re-assembled by a
7957 			 * vm_map_simplify_entry().  We need to
7958 			 * re-clip its start.
7959 			 */
7960 			if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7961 			    entry->map_aligned &&
7962 			    !VM_MAP_PAGE_ALIGNED(s,
7963 			    VM_MAP_PAGE_MASK(map))) {
7964 				/*
7965 				 * The entry will no longer be map-aligned
7966 				 * after clipping and the caller said it's OK.
7967 				 */
7968 				entry->map_aligned = FALSE;
7969 			}
7970 			if (kalloc_owned_map(map)) {
7971 				panic("vm_map_delete(%p,0x%llx,0x%llx): "
7972 				    "clipping %p at 0x%llx\n",
7973 				    map,
7974 				    (uint64_t)start,
7975 				    (uint64_t)end,
7976 				    entry,
7977 				    (uint64_t)s);
7978 			}
7979 			vm_map_clip_start(map, entry, s);
7980 		}
7981 		if (entry->vme_end <= end) {
7982 			/*
7983 			 * This entry is going away completely, so no need
7984 			 * to clip and possibly cause an unnecessary unnesting.
7985 			 */
7986 		} else {
7987 			if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7988 			    entry->map_aligned &&
7989 			    !VM_MAP_PAGE_ALIGNED(end,
7990 			    VM_MAP_PAGE_MASK(map))) {
7991 				/*
7992 				 * The entry will no longer be map-aligned
7993 				 * after clipping and the caller said it's OK.
7994 				 */
7995 				entry->map_aligned = FALSE;
7996 			}
7997 			if (kalloc_owned_map(map)) {
7998 				panic("vm_map_delete(%p,0x%llx,0x%llx): "
7999 				    "clipping %p at 0x%llx\n",
8000 				    map,
8001 				    (uint64_t)start,
8002 				    (uint64_t)end,
8003 				    entry,
8004 				    (uint64_t)end);
8005 			}
8006 			vm_map_clip_end(map, entry, end);
8007 		}
8008 
8009 		if (entry->permanent) {
8010 			if (map->pmap == kernel_pmap) {
8011 				panic("%s(%p,0x%llx,0x%llx): "
8012 				    "attempt to remove permanent "
8013 				    "VM map entry "
8014 				    "%p [0x%llx:0x%llx]\n",
8015 				    __FUNCTION__,
8016 				    map,
8017 				    (uint64_t) start,
8018 				    (uint64_t) end,
8019 				    entry,
8020 				    (uint64_t) entry->vme_start,
8021 				    (uint64_t) entry->vme_end);
8022 			} else if (flags & VM_MAP_REMOVE_IMMUTABLE) {
8023 //				printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
8024 				entry->permanent = FALSE;
8025 			} else {
8026 				if (vm_map_executable_immutable_verbose) {
8027 					printf("%d[%s] %s(0x%llx,0x%llx): "
8028 					    "permanent entry [0x%llx:0x%llx] "
8029 					    "prot 0x%x/0x%x\n",
8030 					    proc_selfpid(),
8031 					    (current_task()->bsd_info
8032 					    ? proc_name_address(current_task()->bsd_info)
8033 					    : "?"),
8034 					    __FUNCTION__,
8035 					    (uint64_t) start,
8036 					    (uint64_t) end,
8037 					    (uint64_t)entry->vme_start,
8038 					    (uint64_t)entry->vme_end,
8039 					    entry->protection,
8040 					    entry->max_protection);
8041 				}
8042 				/*
8043 				 * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
8044 				 */
8045 				DTRACE_VM5(vm_map_delete_permanent,
8046 				    vm_map_offset_t, entry->vme_start,
8047 				    vm_map_offset_t, entry->vme_end,
8048 				    vm_prot_t, entry->protection,
8049 				    vm_prot_t, entry->max_protection,
8050 				    int, VME_ALIAS(entry));
8051 			}
8052 		}
8053 
8054 
8055 		if (entry->in_transition) {
8056 			wait_result_t wait_result;
8057 
8058 			/*
8059 			 * Another thread is wiring/unwiring this entry.
8060 			 * Let the other thread know we are waiting.
8061 			 */
8062 			assert(s == entry->vme_start);
8063 			entry->needs_wakeup = TRUE;
8064 
8065 			/*
8066 			 * wake up anybody waiting on entries that we have
8067 			 * already unwired/deleted.
8068 			 */
8069 			if (need_wakeup) {
8070 				vm_map_entry_wakeup(map);
8071 				need_wakeup = FALSE;
8072 			}
8073 
8074 			wait_result = vm_map_entry_wait(map, interruptible);
8075 
8076 			if (interruptible &&
8077 			    wait_result == THREAD_INTERRUPTED) {
8078 				/*
8079 				 * We do not clear the needs_wakeup flag,
8080 				 * since we cannot tell if we were the only one.
8081 				 */
8082 				return KERN_ABORTED;
8083 			}
8084 
8085 			/*
8086 			 * The entry could have been clipped or it
8087 			 * may not exist anymore.  Look it up again.
8088 			 */
8089 			if (!vm_map_lookup_entry(map, s, &first_entry)) {
8090 				/*
8091 				 * User: use the next entry
8092 				 */
8093 				if (gap_start == FIND_GAP) {
8094 					gap_start = s;
8095 				}
8096 				entry = first_entry->vme_next;
8097 				s = entry->vme_start;
8098 			} else {
8099 				entry = first_entry;
8100 				SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8101 			}
8102 			last_timestamp = map->timestamp;
8103 			continue;
8104 		} /* end in_transition */
8105 
8106 		if (entry->wired_count) {
8107 			boolean_t       user_wire;
8108 
8109 			user_wire = entry->user_wired_count > 0;
8110 
8111 			/*
8112 			 *      Remove a kernel wiring if requested
8113 			 */
8114 			if (flags & VM_MAP_REMOVE_KUNWIRE) {
8115 				entry->wired_count--;
8116 			}
8117 
8118 			/*
8119 			 *	Remove all user wirings for proper accounting
8120 			 */
8121 			if (entry->user_wired_count > 0) {
8122 				while (entry->user_wired_count) {
8123 					subtract_wire_counts(map, entry, user_wire);
8124 				}
8125 			}
8126 
8127 			if (entry->wired_count != 0) {
8128 				assert(map != kernel_map);
8129 				/*
8130 				 * Cannot continue.  Typical case is when
8131 				 * a user thread has physical io pending on
8132 				 * on this page.  Either wait for the
8133 				 * kernel wiring to go away or return an
8134 				 * error.
8135 				 */
8136 				if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
8137 					wait_result_t wait_result;
8138 
8139 					assert(s == entry->vme_start);
8140 					entry->needs_wakeup = TRUE;
8141 					wait_result = vm_map_entry_wait(map,
8142 					    interruptible);
8143 
8144 					if (interruptible &&
8145 					    wait_result == THREAD_INTERRUPTED) {
8146 						/*
8147 						 * We do not clear the
8148 						 * needs_wakeup flag, since we
8149 						 * cannot tell if we were the
8150 						 * only one.
8151 						 */
8152 						return KERN_ABORTED;
8153 					}
8154 
8155 					/*
8156 					 * The entry could have been clipped or
8157 					 * it may not exist anymore.  Look it
8158 					 * up again.
8159 					 */
8160 					if (!vm_map_lookup_entry(map, s,
8161 					    &first_entry)) {
8162 						assert(map != kernel_map);
8163 						/*
8164 						 * User: use the next entry
8165 						 */
8166 						if (gap_start == FIND_GAP) {
8167 							gap_start = s;
8168 						}
8169 						entry = first_entry->vme_next;
8170 						s = entry->vme_start;
8171 					} else {
8172 						entry = first_entry;
8173 						SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8174 					}
8175 					last_timestamp = map->timestamp;
8176 					continue;
8177 				} else {
8178 					return KERN_FAILURE;
8179 				}
8180 			}
8181 
8182 			entry->in_transition = TRUE;
8183 			/*
8184 			 * copy current entry.  see comment in vm_map_wire()
8185 			 */
8186 			tmp_entry = *entry;
8187 			assert(s == entry->vme_start);
8188 
8189 			/*
8190 			 * We can unlock the map now. The in_transition
8191 			 * state guarentees existance of the entry.
8192 			 */
8193 			vm_map_unlock(map);
8194 
8195 			if (tmp_entry.is_sub_map) {
8196 				vm_map_t sub_map;
8197 				vm_map_offset_t sub_start, sub_end;
8198 				pmap_t pmap;
8199 				vm_map_offset_t pmap_addr;
8200 
8201 
8202 				sub_map = VME_SUBMAP(&tmp_entry);
8203 				sub_start = VME_OFFSET(&tmp_entry);
8204 				sub_end = sub_start + (tmp_entry.vme_end -
8205 				    tmp_entry.vme_start);
8206 				if (tmp_entry.use_pmap) {
8207 					pmap = sub_map->pmap;
8208 					pmap_addr = tmp_entry.vme_start;
8209 				} else {
8210 					pmap = map->pmap;
8211 					pmap_addr = tmp_entry.vme_start;
8212 				}
8213 				(void) vm_map_unwire_nested(sub_map,
8214 				    sub_start, sub_end,
8215 				    user_wire,
8216 				    pmap, pmap_addr);
8217 			} else {
8218 				if (VME_OBJECT(&tmp_entry) == kernel_object) {
8219 					pmap_protect_options(
8220 						map->pmap,
8221 						tmp_entry.vme_start,
8222 						tmp_entry.vme_end,
8223 						VM_PROT_NONE,
8224 						PMAP_OPTIONS_REMOVE,
8225 						NULL);
8226 				}
8227 				vm_fault_unwire(map, &tmp_entry,
8228 				    VME_OBJECT(&tmp_entry) == kernel_object,
8229 				    map->pmap, tmp_entry.vme_start);
8230 			}
8231 
8232 			vm_map_lock(map);
8233 
8234 			if (last_timestamp + 1 != map->timestamp) {
8235 				/*
8236 				 * Find the entry again.  It could have
8237 				 * been clipped after we unlocked the map.
8238 				 */
8239 				if (!vm_map_lookup_entry(map, s, &first_entry)) {
8240 					assert((map != kernel_map) &&
8241 					    (!entry->is_sub_map));
8242 					if (gap_start == FIND_GAP) {
8243 						gap_start = s;
8244 					}
8245 					first_entry = first_entry->vme_next;
8246 					s = first_entry->vme_start;
8247 				} else {
8248 					SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8249 				}
8250 			} else {
8251 				SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8252 				first_entry = entry;
8253 			}
8254 
8255 			last_timestamp = map->timestamp;
8256 
8257 			entry = first_entry;
8258 			while ((entry != vm_map_to_entry(map)) &&
8259 			    (entry->vme_start < tmp_entry.vme_end)) {
8260 				assert(entry->in_transition);
8261 				entry->in_transition = FALSE;
8262 				if (entry->needs_wakeup) {
8263 					entry->needs_wakeup = FALSE;
8264 					need_wakeup = TRUE;
8265 				}
8266 				entry = entry->vme_next;
8267 			}
8268 			/*
8269 			 * We have unwired the entry(s).  Go back and
8270 			 * delete them.
8271 			 */
8272 			entry = first_entry;
8273 			continue;
8274 		}
8275 
8276 		/* entry is unwired */
8277 		assert(entry->wired_count == 0);
8278 		assert(entry->user_wired_count == 0);
8279 
8280 		assert(s == entry->vme_start);
8281 
8282 		if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
8283 			/*
8284 			 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
8285 			 * vm_map_delete(), some map entries might have been
8286 			 * transferred to a "zap_map", which doesn't have a
8287 			 * pmap.  The original pmap has already been flushed
8288 			 * in the vm_map_delete() call targeting the original
8289 			 * map, but when we get to destroying the "zap_map",
8290 			 * we don't have any pmap to flush, so let's just skip
8291 			 * all this.
8292 			 */
8293 		} else if (entry->is_sub_map) {
8294 			assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) >= VM_MAP_PAGE_SHIFT(map),
8295 			    "map %p (%d) entry %p submap %p (%d)\n",
8296 			    map, VM_MAP_PAGE_SHIFT(map), entry,
8297 			    VME_SUBMAP(entry),
8298 			    VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
8299 			if (entry->use_pmap) {
8300 				assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) == VM_MAP_PAGE_SHIFT(map),
8301 				    "map %p (%d) entry %p submap %p (%d)\n",
8302 				    map, VM_MAP_PAGE_SHIFT(map), entry,
8303 				    VME_SUBMAP(entry),
8304 				    VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
8305 #ifndef NO_NESTED_PMAP
8306 				int pmap_flags;
8307 
8308 				if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
8309 					/*
8310 					 * This is the final cleanup of the
8311 					 * address space being terminated.
8312 					 * No new mappings are expected and
8313 					 * we don't really need to unnest the
8314 					 * shared region (and lose the "global"
8315 					 * pmap mappings, if applicable).
8316 					 *
8317 					 * Tell the pmap layer that we're
8318 					 * "clean" wrt nesting.
8319 					 */
8320 					pmap_flags = PMAP_UNNEST_CLEAN;
8321 				} else {
8322 					/*
8323 					 * We're unmapping part of the nested
8324 					 * shared region, so we can't keep the
8325 					 * nested pmap.
8326 					 */
8327 					pmap_flags = 0;
8328 				}
8329 				pmap_unnest_options(
8330 					map->pmap,
8331 					(addr64_t)entry->vme_start,
8332 					entry->vme_end - entry->vme_start,
8333 					pmap_flags);
8334 #endif  /* NO_NESTED_PMAP */
8335 				if (map->mapped_in_other_pmaps &&
8336 				    os_ref_get_count(&map->map_refcnt) != 0) {
8337 					/* clean up parent map/maps */
8338 					vm_map_submap_pmap_clean(
8339 						map, entry->vme_start,
8340 						entry->vme_end,
8341 						VME_SUBMAP(entry),
8342 						VME_OFFSET(entry));
8343 				}
8344 			} else {
8345 				vm_map_submap_pmap_clean(
8346 					map, entry->vme_start, entry->vme_end,
8347 					VME_SUBMAP(entry),
8348 					VME_OFFSET(entry));
8349 			}
8350 		} else if (VME_OBJECT(entry) != kernel_object &&
8351 		    VME_OBJECT(entry) != compressor_object) {
8352 			object = VME_OBJECT(entry);
8353 			if (map->mapped_in_other_pmaps &&
8354 			    os_ref_get_count(&map->map_refcnt) != 0) {
8355 				vm_object_pmap_protect_options(
8356 					object, VME_OFFSET(entry),
8357 					entry->vme_end - entry->vme_start,
8358 					PMAP_NULL,
8359 					PAGE_SIZE,
8360 					entry->vme_start,
8361 					VM_PROT_NONE,
8362 					PMAP_OPTIONS_REMOVE);
8363 			} else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
8364 			    (map->pmap == kernel_pmap)) {
8365 				/* Remove translations associated
8366 				 * with this range unless the entry
8367 				 * does not have an object, or
8368 				 * it's the kernel map or a descendant
8369 				 * since the platform could potentially
8370 				 * create "backdoor" mappings invisible
8371 				 * to the VM. It is expected that
8372 				 * objectless, non-kernel ranges
8373 				 * do not have such VM invisible
8374 				 * translations.
8375 				 */
8376 				pmap_remove_options(map->pmap,
8377 				    (addr64_t)entry->vme_start,
8378 				    (addr64_t)entry->vme_end,
8379 				    PMAP_OPTIONS_REMOVE);
8380 			}
8381 		}
8382 
8383 		if (entry->iokit_acct) {
8384 			/* alternate accounting */
8385 			DTRACE_VM4(vm_map_iokit_unmapped_region,
8386 			    vm_map_t, map,
8387 			    vm_map_offset_t, entry->vme_start,
8388 			    vm_map_offset_t, entry->vme_end,
8389 			    int, VME_ALIAS(entry));
8390 			vm_map_iokit_unmapped_region(map,
8391 			    (entry->vme_end -
8392 			    entry->vme_start));
8393 			entry->iokit_acct = FALSE;
8394 			entry->use_pmap = FALSE;
8395 		}
8396 
8397 		/*
8398 		 * All pmap mappings for this map entry must have been
8399 		 * cleared by now.
8400 		 */
8401 #if DEBUG
8402 		assert(pmap_is_empty(map->pmap,
8403 		    entry->vme_start,
8404 		    entry->vme_end));
8405 #endif /* DEBUG */
8406 
8407 		next = entry->vme_next;
8408 
8409 		if (map->pmap == kernel_pmap &&
8410 		    os_ref_get_count(&map->map_refcnt) != 0) {
8411 			if (entry->vme_end < end && (next == vm_map_to_entry(map) || next->vme_start != entry->vme_end)) {
8412 				panic("vm_map_delete(%p,0x%llx,0x%llx): "
8413 				    "hole after %p at 0x%llx\n",
8414 				    map,
8415 				    (uint64_t)start,
8416 				    (uint64_t)end,
8417 				    entry,
8418 				    (uint64_t)entry->vme_end);
8419 			}
8420 
8421 			if (entry->vme_atomic && (entry->vme_start != start || entry->vme_end != end)) {
8422 				/*
8423 				 * In the kernel map and its submaps, the removal of
8424 				 * an atomic entry is strict. An atomic entry is
8425 				 * processed only if it was specifically targeted. We
8426 				 * might have deleted non-atomic entries before it but
8427 				 * we won't remove this atomic entry OR anything after it.
8428 				 */
8429 #if DEVELOPMENT || DEBUG
8430 				panic("vm_map_delete(%p,0x%llx,0x%llx): "
8431 				    "request loosely encompasses atomic entry %p at (0x%llx,0x%llx)\n",
8432 				    map,
8433 				    (uint64_t)start,
8434 				    (uint64_t)end,
8435 				    entry,
8436 				    (uint64_t)entry->vme_start,
8437 				    (uint64_t)entry->vme_end);
8438 #endif /* DEVELOPMENT || DEBUG */
8439 
8440 				break;
8441 			}
8442 		}
8443 
8444 		/*
8445 		 * If the desired range didn't end with "entry", then there is a gap if
8446 		 * we wrapped around to the start of the map or if "entry" and "next"
8447 		 * aren't contiguous.
8448 		 *
8449 		 * The vm_map_round_page() is needed since an entry can be less than VM_MAP_PAGE_MASK() sized.
8450 		 * For example, devices which have h/w 4K pages, but entry sizes are all now 16K.
8451 		 */
8452 		if (gap_start == FIND_GAP &&
8453 		    vm_map_round_page(entry->vme_end, VM_MAP_PAGE_MASK(map)) < end &&
8454 		    (next == vm_map_to_entry(map) || entry->vme_end != next->vme_start)) {
8455 			gap_start = entry->vme_end;
8456 		}
8457 		s = next->vme_start;
8458 		last_timestamp = map->timestamp;
8459 
8460 		if (entry->permanent) {
8461 			/*
8462 			 * A permanent entry can not be removed, so leave it
8463 			 * in place but remove all access permissions.
8464 			 */
8465 			entry->protection = VM_PROT_NONE;
8466 			entry->max_protection = VM_PROT_NONE;
8467 		} else if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
8468 		    zap_map != VM_MAP_NULL) {
8469 			vm_map_size_t entry_size;
8470 			/*
8471 			 * The caller wants to save the affected VM map entries
8472 			 * into the "zap_map".  The caller will take care of
8473 			 * these entries.
8474 			 */
8475 			/* unlink the entry from "map" ... */
8476 			vm_map_store_entry_unlink(map, entry);
8477 			/* ... and add it to the end of the "zap_map" */
8478 			vm_map_store_entry_link(zap_map,
8479 			    vm_map_last_entry(zap_map),
8480 			    entry,
8481 			    VM_MAP_KERNEL_FLAGS_NONE);
8482 			entry_size = entry->vme_end - entry->vme_start;
8483 			map->size -= entry_size;
8484 			zap_map->size += entry_size;
8485 			/* we didn't unlock the map, so no timestamp increase */
8486 			last_timestamp--;
8487 		} else {
8488 			vm_map_entry_delete(map, entry);
8489 			/* vm_map_entry_delete unlocks the map */
8490 			vm_map_lock(map);
8491 		}
8492 
8493 		entry = next;
8494 
8495 		if (entry == vm_map_to_entry(map)) {
8496 			break;
8497 		}
8498 		if (last_timestamp + 1 != map->timestamp) {
8499 			/*
8500 			 * We are responsible for deleting everything
8501 			 * from the given space. If someone has interfered,
8502 			 * we pick up where we left off. Back fills should
8503 			 * be all right for anyone, except map_delete, and
8504 			 * we have to assume that the task has been fully
8505 			 * disabled before we get here
8506 			 */
8507 			if (!vm_map_lookup_entry(map, s, &entry)) {
8508 				entry = entry->vme_next;
8509 
8510 				/*
8511 				 * Nothing found for s. If we weren't already done, then there is a gap.
8512 				 */
8513 				if (gap_start == FIND_GAP && s < end) {
8514 					gap_start = s;
8515 				}
8516 				s = entry->vme_start;
8517 			} else {
8518 				SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8519 			}
8520 			/*
8521 			 * others can not only allocate behind us, we can
8522 			 * also see coalesce while we don't have the map lock
8523 			 */
8524 			if (entry == vm_map_to_entry(map)) {
8525 				break;
8526 			}
8527 		}
8528 		last_timestamp = map->timestamp;
8529 	}
8530 
8531 	if (map->wait_for_space) {
8532 		thread_wakeup((event_t) map);
8533 	}
8534 	/*
8535 	 * wake up anybody waiting on entries that we have already deleted.
8536 	 */
8537 	if (need_wakeup) {
8538 		vm_map_entry_wakeup(map);
8539 	}
8540 
8541 	if (gap_start != FIND_GAP && gap_start != GAPS_OK) {
8542 		DTRACE_VM3(kern_vm_deallocate_gap,
8543 		    vm_map_offset_t, gap_start,
8544 		    vm_map_offset_t, save_start,
8545 		    vm_map_offset_t, save_end);
8546 		if (!(flags & VM_MAP_REMOVE_GAPS_OK)) {
8547 			vm_map_guard_exception(gap_start, kGUARD_EXC_DEALLOC_GAP);
8548 		}
8549 	}
8550 
8551 	return KERN_SUCCESS;
8552 }
8553 
8554 
8555 /*
8556  *	vm_map_terminate:
8557  *
8558  *	Clean out a task's map.
8559  */
8560 kern_return_t
vm_map_terminate(vm_map_t map)8561 vm_map_terminate(
8562 	vm_map_t        map)
8563 {
8564 	vm_map_lock(map);
8565 	map->terminated = TRUE;
8566 	vm_map_unlock(map);
8567 
8568 	return vm_map_remove(map,
8569 	           map->min_offset,
8570 	           map->max_offset,
8571 	           /*
8572 	            * Final cleanup:
8573 	            * + no unnesting
8574 	            * + remove immutable mappings
8575 	            * + allow gaps in range
8576 	            */
8577 	           (VM_MAP_REMOVE_NO_UNNESTING |
8578 	           VM_MAP_REMOVE_IMMUTABLE |
8579 	           VM_MAP_REMOVE_GAPS_OK));
8580 }
8581 
8582 /*
8583  *	vm_map_remove:
8584  *
8585  *	Remove the given address range from the target map.
8586  *	This is the exported form of vm_map_delete.
8587  */
8588 kern_return_t
vm_map_remove(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,boolean_t flags)8589 vm_map_remove(
8590 	vm_map_t        map,
8591 	vm_map_offset_t start,
8592 	vm_map_offset_t end,
8593 	boolean_t      flags)
8594 {
8595 	kern_return_t   result;
8596 
8597 	vm_map_lock(map);
8598 	VM_MAP_RANGE_CHECK(map, start, end);
8599 	/*
8600 	 * For the zone maps, the kernel controls the allocation/freeing of memory.
8601 	 * Any free to the zone maps should be within the bounds of the map and
8602 	 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
8603 	 * free to the zone maps into a no-op, there is a problem and we should
8604 	 * panic.
8605 	 */
8606 	if ((start == end) && zone_maps_owned(start, 1)) {
8607 		panic("Nothing being freed to a zone map. start = end = %p", (void *)start);
8608 	}
8609 	result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8610 	vm_map_unlock(map);
8611 
8612 	return result;
8613 }
8614 
8615 /*
8616  *	vm_map_remove_locked:
8617  *
8618  *	Remove the given address range from the target locked map.
8619  *	This is the exported form of vm_map_delete.
8620  */
8621 kern_return_t
vm_map_remove_locked(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,boolean_t flags)8622 vm_map_remove_locked(
8623 	vm_map_t        map,
8624 	vm_map_offset_t start,
8625 	vm_map_offset_t end,
8626 	boolean_t       flags)
8627 {
8628 	kern_return_t   result;
8629 
8630 	VM_MAP_RANGE_CHECK(map, start, end);
8631 	result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8632 	return result;
8633 }
8634 
8635 
8636 /*
8637  *	Routine:	vm_map_copy_allocate
8638  *
8639  *	Description:
8640  *		Allocates and initializes a map copy object.
8641  */
8642 static vm_map_copy_t
vm_map_copy_allocate(void)8643 vm_map_copy_allocate(void)
8644 {
8645 	vm_map_copy_t new_copy;
8646 
8647 	new_copy = zalloc_flags(vm_map_copy_zone, Z_WAITOK | Z_ZERO);
8648 	new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8649 	vm_map_copy_first_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8650 	vm_map_copy_last_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8651 	return new_copy;
8652 }
8653 
8654 /*
8655  *	Routine:	vm_map_copy_discard
8656  *
8657  *	Description:
8658  *		Dispose of a map copy object (returned by
8659  *		vm_map_copyin).
8660  */
8661 void
vm_map_copy_discard(vm_map_copy_t copy)8662 vm_map_copy_discard(
8663 	vm_map_copy_t   copy)
8664 {
8665 	if (copy == VM_MAP_COPY_NULL) {
8666 		return;
8667 	}
8668 
8669 	switch (copy->type) {
8670 	case VM_MAP_COPY_ENTRY_LIST:
8671 		while (vm_map_copy_first_entry(copy) !=
8672 		    vm_map_copy_to_entry(copy)) {
8673 			vm_map_entry_t  entry = vm_map_copy_first_entry(copy);
8674 
8675 			vm_map_copy_entry_unlink(copy, entry);
8676 			if (entry->is_sub_map) {
8677 				vm_map_deallocate(VME_SUBMAP(entry));
8678 			} else {
8679 				vm_object_deallocate(VME_OBJECT(entry));
8680 			}
8681 			vm_map_copy_entry_dispose(copy, entry);
8682 		}
8683 		break;
8684 	case VM_MAP_COPY_OBJECT:
8685 		vm_object_deallocate(copy->cpy_object);
8686 		break;
8687 	case VM_MAP_COPY_KERNEL_BUFFER:
8688 
8689 		/*
8690 		 * The vm_map_copy_t and possibly the data buffer were
8691 		 * allocated by a single call to kalloc_data(), i.e. the
8692 		 * vm_map_copy_t was not allocated out of the zone.
8693 		 */
8694 		if (copy->size > msg_ool_size_small || copy->offset) {
8695 			panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8696 			    (long long)copy->size, (long long)copy->offset);
8697 		}
8698 		kfree_data(copy->cpy_kdata, copy->size);
8699 	}
8700 	zfree(vm_map_copy_zone, copy);
8701 }
8702 
8703 /*
8704  *	Routine:	vm_map_copy_copy
8705  *
8706  *	Description:
8707  *			Move the information in a map copy object to
8708  *			a new map copy object, leaving the old one
8709  *			empty.
8710  *
8711  *			This is used by kernel routines that need
8712  *			to look at out-of-line data (in copyin form)
8713  *			before deciding whether to return SUCCESS.
8714  *			If the routine returns FAILURE, the original
8715  *			copy object will be deallocated; therefore,
8716  *			these routines must make a copy of the copy
8717  *			object and leave the original empty so that
8718  *			deallocation will not fail.
8719  */
8720 vm_map_copy_t
vm_map_copy_copy(vm_map_copy_t copy)8721 vm_map_copy_copy(
8722 	vm_map_copy_t   copy)
8723 {
8724 	vm_map_copy_t   new_copy;
8725 
8726 	if (copy == VM_MAP_COPY_NULL) {
8727 		return VM_MAP_COPY_NULL;
8728 	}
8729 
8730 	/*
8731 	 * Allocate a new copy object, and copy the information
8732 	 * from the old one into it.
8733 	 */
8734 
8735 	new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8736 	memcpy((void *) new_copy, (void *) copy, sizeof(struct vm_map_copy));
8737 #if __has_feature(ptrauth_calls)
8738 	if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8739 		new_copy->cpy_kdata = copy->cpy_kdata;
8740 	}
8741 #endif
8742 
8743 	if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
8744 		/*
8745 		 * The links in the entry chain must be
8746 		 * changed to point to the new copy object.
8747 		 */
8748 		vm_map_copy_first_entry(copy)->vme_prev
8749 		        = vm_map_copy_to_entry(new_copy);
8750 		vm_map_copy_last_entry(copy)->vme_next
8751 		        = vm_map_copy_to_entry(new_copy);
8752 	}
8753 
8754 	/*
8755 	 * Change the old copy object into one that contains
8756 	 * nothing to be deallocated.
8757 	 */
8758 	copy->type = VM_MAP_COPY_OBJECT;
8759 	copy->cpy_object = VM_OBJECT_NULL;
8760 
8761 	/*
8762 	 * Return the new object.
8763 	 */
8764 	return new_copy;
8765 }
8766 
8767 static boolean_t
vm_map_entry_is_overwritable(vm_map_t dst_map __unused,vm_map_entry_t entry)8768 vm_map_entry_is_overwritable(
8769 	vm_map_t        dst_map __unused,
8770 	vm_map_entry_t  entry)
8771 {
8772 	if (!(entry->protection & VM_PROT_WRITE)) {
8773 		/* can't overwrite if not writable */
8774 		return FALSE;
8775 	}
8776 #if !__x86_64__
8777 	if (entry->used_for_jit &&
8778 	    vm_map_cs_enforcement(dst_map) &&
8779 	    !dst_map->cs_debugged) {
8780 		/*
8781 		 * Can't overwrite a JIT region while cs_enforced
8782 		 * and not cs_debugged.
8783 		 */
8784 		return FALSE;
8785 	}
8786 #endif /* !__x86_64__ */
8787 	return TRUE;
8788 }
8789 
8790 static kern_return_t
vm_map_overwrite_submap_recurse(vm_map_t dst_map,vm_map_offset_t dst_addr,vm_map_size_t dst_size)8791 vm_map_overwrite_submap_recurse(
8792 	vm_map_t        dst_map,
8793 	vm_map_offset_t dst_addr,
8794 	vm_map_size_t   dst_size)
8795 {
8796 	vm_map_offset_t dst_end;
8797 	vm_map_entry_t  tmp_entry;
8798 	vm_map_entry_t  entry;
8799 	kern_return_t   result;
8800 	boolean_t       encountered_sub_map = FALSE;
8801 
8802 
8803 
8804 	/*
8805 	 *	Verify that the destination is all writeable
8806 	 *	initially.  We have to trunc the destination
8807 	 *	address and round the copy size or we'll end up
8808 	 *	splitting entries in strange ways.
8809 	 */
8810 
8811 	dst_end = vm_map_round_page(dst_addr + dst_size,
8812 	    VM_MAP_PAGE_MASK(dst_map));
8813 	vm_map_lock(dst_map);
8814 
8815 start_pass_1:
8816 	if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8817 		vm_map_unlock(dst_map);
8818 		return KERN_INVALID_ADDRESS;
8819 	}
8820 
8821 	vm_map_clip_start(dst_map,
8822 	    tmp_entry,
8823 	    vm_map_trunc_page(dst_addr,
8824 	    VM_MAP_PAGE_MASK(dst_map)));
8825 	if (tmp_entry->is_sub_map) {
8826 		/* clipping did unnest if needed */
8827 		assert(!tmp_entry->use_pmap);
8828 	}
8829 
8830 	for (entry = tmp_entry;;) {
8831 		vm_map_entry_t  next;
8832 
8833 		next = entry->vme_next;
8834 		while (entry->is_sub_map) {
8835 			vm_map_offset_t sub_start;
8836 			vm_map_offset_t sub_end;
8837 			vm_map_offset_t local_end;
8838 
8839 			if (entry->in_transition) {
8840 				/*
8841 				 * Say that we are waiting, and wait for entry.
8842 				 */
8843 				entry->needs_wakeup = TRUE;
8844 				vm_map_entry_wait(dst_map, THREAD_UNINT);
8845 
8846 				goto start_pass_1;
8847 			}
8848 
8849 			encountered_sub_map = TRUE;
8850 			sub_start = VME_OFFSET(entry);
8851 
8852 			if (entry->vme_end < dst_end) {
8853 				sub_end = entry->vme_end;
8854 			} else {
8855 				sub_end = dst_end;
8856 			}
8857 			sub_end -= entry->vme_start;
8858 			sub_end += VME_OFFSET(entry);
8859 			local_end = entry->vme_end;
8860 			vm_map_unlock(dst_map);
8861 
8862 			result = vm_map_overwrite_submap_recurse(
8863 				VME_SUBMAP(entry),
8864 				sub_start,
8865 				sub_end - sub_start);
8866 
8867 			if (result != KERN_SUCCESS) {
8868 				return result;
8869 			}
8870 			if (dst_end <= entry->vme_end) {
8871 				return KERN_SUCCESS;
8872 			}
8873 			vm_map_lock(dst_map);
8874 			if (!vm_map_lookup_entry(dst_map, local_end,
8875 			    &tmp_entry)) {
8876 				vm_map_unlock(dst_map);
8877 				return KERN_INVALID_ADDRESS;
8878 			}
8879 			entry = tmp_entry;
8880 			next = entry->vme_next;
8881 		}
8882 
8883 		if (!(entry->protection & VM_PROT_WRITE)) {
8884 			vm_map_unlock(dst_map);
8885 			return KERN_PROTECTION_FAILURE;
8886 		}
8887 
8888 		if (!vm_map_entry_is_overwritable(dst_map, entry)) {
8889 			vm_map_unlock(dst_map);
8890 			return KERN_PROTECTION_FAILURE;
8891 		}
8892 
8893 		/*
8894 		 *	If the entry is in transition, we must wait
8895 		 *	for it to exit that state.  Anything could happen
8896 		 *	when we unlock the map, so start over.
8897 		 */
8898 		if (entry->in_transition) {
8899 			/*
8900 			 * Say that we are waiting, and wait for entry.
8901 			 */
8902 			entry->needs_wakeup = TRUE;
8903 			vm_map_entry_wait(dst_map, THREAD_UNINT);
8904 
8905 			goto start_pass_1;
8906 		}
8907 
8908 /*
8909  *		our range is contained completely within this map entry
8910  */
8911 		if (dst_end <= entry->vme_end) {
8912 			vm_map_unlock(dst_map);
8913 			return KERN_SUCCESS;
8914 		}
8915 /*
8916  *		check that range specified is contiguous region
8917  */
8918 		if ((next == vm_map_to_entry(dst_map)) ||
8919 		    (next->vme_start != entry->vme_end)) {
8920 			vm_map_unlock(dst_map);
8921 			return KERN_INVALID_ADDRESS;
8922 		}
8923 
8924 		/*
8925 		 *	Check for permanent objects in the destination.
8926 		 */
8927 		if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8928 		    ((!VME_OBJECT(entry)->internal) ||
8929 		    (VME_OBJECT(entry)->true_share))) {
8930 			if (encountered_sub_map) {
8931 				vm_map_unlock(dst_map);
8932 				return KERN_FAILURE;
8933 			}
8934 		}
8935 
8936 
8937 		entry = next;
8938 	}/* for */
8939 	vm_map_unlock(dst_map);
8940 	return KERN_SUCCESS;
8941 }
8942 
8943 /*
8944  *	Routine:	vm_map_copy_overwrite
8945  *
8946  *	Description:
8947  *		Copy the memory described by the map copy
8948  *		object (copy; returned by vm_map_copyin) onto
8949  *		the specified destination region (dst_map, dst_addr).
8950  *		The destination must be writeable.
8951  *
8952  *		Unlike vm_map_copyout, this routine actually
8953  *		writes over previously-mapped memory.  If the
8954  *		previous mapping was to a permanent (user-supplied)
8955  *		memory object, it is preserved.
8956  *
8957  *		The attributes (protection and inheritance) of the
8958  *		destination region are preserved.
8959  *
8960  *		If successful, consumes the copy object.
8961  *		Otherwise, the caller is responsible for it.
8962  *
8963  *	Implementation notes:
8964  *		To overwrite aligned temporary virtual memory, it is
8965  *		sufficient to remove the previous mapping and insert
8966  *		the new copy.  This replacement is done either on
8967  *		the whole region (if no permanent virtual memory
8968  *		objects are embedded in the destination region) or
8969  *		in individual map entries.
8970  *
8971  *		To overwrite permanent virtual memory , it is necessary
8972  *		to copy each page, as the external memory management
8973  *		interface currently does not provide any optimizations.
8974  *
8975  *		Unaligned memory also has to be copied.  It is possible
8976  *		to use 'vm_trickery' to copy the aligned data.  This is
8977  *		not done but not hard to implement.
8978  *
8979  *		Once a page of permanent memory has been overwritten,
8980  *		it is impossible to interrupt this function; otherwise,
8981  *		the call would be neither atomic nor location-independent.
8982  *		The kernel-state portion of a user thread must be
8983  *		interruptible.
8984  *
8985  *		It may be expensive to forward all requests that might
8986  *		overwrite permanent memory (vm_write, vm_copy) to
8987  *		uninterruptible kernel threads.  This routine may be
8988  *		called by interruptible threads; however, success is
8989  *		not guaranteed -- if the request cannot be performed
8990  *		atomically and interruptibly, an error indication is
8991  *		returned.
8992  */
8993 
8994 static kern_return_t
vm_map_copy_overwrite_nested(vm_map_t dst_map,vm_map_address_t dst_addr,vm_map_copy_t copy,boolean_t interruptible,pmap_t pmap,boolean_t discard_on_success)8995 vm_map_copy_overwrite_nested(
8996 	vm_map_t                dst_map,
8997 	vm_map_address_t        dst_addr,
8998 	vm_map_copy_t           copy,
8999 	boolean_t               interruptible,
9000 	pmap_t                  pmap,
9001 	boolean_t               discard_on_success)
9002 {
9003 	vm_map_offset_t         dst_end;
9004 	vm_map_entry_t          tmp_entry;
9005 	vm_map_entry_t          entry;
9006 	kern_return_t           kr;
9007 	boolean_t               aligned = TRUE;
9008 	boolean_t               contains_permanent_objects = FALSE;
9009 	boolean_t               encountered_sub_map = FALSE;
9010 	vm_map_offset_t         base_addr;
9011 	vm_map_size_t           copy_size;
9012 	vm_map_size_t           total_size;
9013 	int                     copy_page_shift;
9014 
9015 
9016 	/*
9017 	 *	Check for null copy object.
9018 	 */
9019 
9020 	if (copy == VM_MAP_COPY_NULL) {
9021 		return KERN_SUCCESS;
9022 	}
9023 
9024 	/*
9025 	 * Assert that the vm_map_copy is coming from the right
9026 	 * zone and hasn't been forged
9027 	 */
9028 	vm_map_copy_require(copy);
9029 
9030 	/*
9031 	 *	Check for special kernel buffer allocated
9032 	 *	by new_ipc_kmsg_copyin.
9033 	 */
9034 
9035 	if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
9036 		return vm_map_copyout_kernel_buffer(
9037 			dst_map, &dst_addr,
9038 			copy, copy->size, TRUE, discard_on_success);
9039 	}
9040 
9041 	/*
9042 	 *      Only works for entry lists at the moment.  Will
9043 	 *	support page lists later.
9044 	 */
9045 
9046 	assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9047 
9048 	if (copy->size == 0) {
9049 		if (discard_on_success) {
9050 			vm_map_copy_discard(copy);
9051 		}
9052 		return KERN_SUCCESS;
9053 	}
9054 
9055 	copy_page_shift = copy->cpy_hdr.page_shift;
9056 
9057 	/*
9058 	 *	Verify that the destination is all writeable
9059 	 *	initially.  We have to trunc the destination
9060 	 *	address and round the copy size or we'll end up
9061 	 *	splitting entries in strange ways.
9062 	 */
9063 
9064 	if (!VM_MAP_PAGE_ALIGNED(copy->size,
9065 	    VM_MAP_PAGE_MASK(dst_map)) ||
9066 	    !VM_MAP_PAGE_ALIGNED(copy->offset,
9067 	    VM_MAP_PAGE_MASK(dst_map)) ||
9068 	    !VM_MAP_PAGE_ALIGNED(dst_addr,
9069 	    VM_MAP_PAGE_MASK(dst_map)) ||
9070 	    copy_page_shift != VM_MAP_PAGE_SHIFT(dst_map)) {
9071 		aligned = FALSE;
9072 		dst_end = vm_map_round_page(dst_addr + copy->size,
9073 		    VM_MAP_PAGE_MASK(dst_map));
9074 	} else {
9075 		dst_end = dst_addr + copy->size;
9076 	}
9077 
9078 	vm_map_lock(dst_map);
9079 
9080 	/* LP64todo - remove this check when vm_map_commpage64()
9081 	 * no longer has to stuff in a map_entry for the commpage
9082 	 * above the map's max_offset.
9083 	 */
9084 	if (dst_addr >= dst_map->max_offset) {
9085 		vm_map_unlock(dst_map);
9086 		return KERN_INVALID_ADDRESS;
9087 	}
9088 
9089 start_pass_1:
9090 	if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
9091 		vm_map_unlock(dst_map);
9092 		return KERN_INVALID_ADDRESS;
9093 	}
9094 	vm_map_clip_start(dst_map,
9095 	    tmp_entry,
9096 	    vm_map_trunc_page(dst_addr,
9097 	    VM_MAP_PAGE_MASK(dst_map)));
9098 	for (entry = tmp_entry;;) {
9099 		vm_map_entry_t  next = entry->vme_next;
9100 
9101 		while (entry->is_sub_map) {
9102 			vm_map_offset_t sub_start;
9103 			vm_map_offset_t sub_end;
9104 			vm_map_offset_t local_end;
9105 
9106 			if (entry->in_transition) {
9107 				/*
9108 				 * Say that we are waiting, and wait for entry.
9109 				 */
9110 				entry->needs_wakeup = TRUE;
9111 				vm_map_entry_wait(dst_map, THREAD_UNINT);
9112 
9113 				goto start_pass_1;
9114 			}
9115 
9116 			local_end = entry->vme_end;
9117 			if (!(entry->needs_copy)) {
9118 				/* if needs_copy we are a COW submap */
9119 				/* in such a case we just replace so */
9120 				/* there is no need for the follow-  */
9121 				/* ing check.                        */
9122 				encountered_sub_map = TRUE;
9123 				sub_start = VME_OFFSET(entry);
9124 
9125 				if (entry->vme_end < dst_end) {
9126 					sub_end = entry->vme_end;
9127 				} else {
9128 					sub_end = dst_end;
9129 				}
9130 				sub_end -= entry->vme_start;
9131 				sub_end += VME_OFFSET(entry);
9132 				vm_map_unlock(dst_map);
9133 
9134 				kr = vm_map_overwrite_submap_recurse(
9135 					VME_SUBMAP(entry),
9136 					sub_start,
9137 					sub_end - sub_start);
9138 				if (kr != KERN_SUCCESS) {
9139 					return kr;
9140 				}
9141 				vm_map_lock(dst_map);
9142 			}
9143 
9144 			if (dst_end <= entry->vme_end) {
9145 				goto start_overwrite;
9146 			}
9147 			if (!vm_map_lookup_entry(dst_map, local_end,
9148 			    &entry)) {
9149 				vm_map_unlock(dst_map);
9150 				return KERN_INVALID_ADDRESS;
9151 			}
9152 			next = entry->vme_next;
9153 		}
9154 
9155 		if (!(entry->protection & VM_PROT_WRITE)) {
9156 			vm_map_unlock(dst_map);
9157 			return KERN_PROTECTION_FAILURE;
9158 		}
9159 
9160 		if (!vm_map_entry_is_overwritable(dst_map, entry)) {
9161 			vm_map_unlock(dst_map);
9162 			return KERN_PROTECTION_FAILURE;
9163 		}
9164 
9165 		/*
9166 		 *	If the entry is in transition, we must wait
9167 		 *	for it to exit that state.  Anything could happen
9168 		 *	when we unlock the map, so start over.
9169 		 */
9170 		if (entry->in_transition) {
9171 			/*
9172 			 * Say that we are waiting, and wait for entry.
9173 			 */
9174 			entry->needs_wakeup = TRUE;
9175 			vm_map_entry_wait(dst_map, THREAD_UNINT);
9176 
9177 			goto start_pass_1;
9178 		}
9179 
9180 /*
9181  *		our range is contained completely within this map entry
9182  */
9183 		if (dst_end <= entry->vme_end) {
9184 			break;
9185 		}
9186 /*
9187  *		check that range specified is contiguous region
9188  */
9189 		if ((next == vm_map_to_entry(dst_map)) ||
9190 		    (next->vme_start != entry->vme_end)) {
9191 			vm_map_unlock(dst_map);
9192 			return KERN_INVALID_ADDRESS;
9193 		}
9194 
9195 
9196 		/*
9197 		 *	Check for permanent objects in the destination.
9198 		 */
9199 		if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
9200 		    ((!VME_OBJECT(entry)->internal) ||
9201 		    (VME_OBJECT(entry)->true_share))) {
9202 			contains_permanent_objects = TRUE;
9203 		}
9204 
9205 		entry = next;
9206 	}/* for */
9207 
9208 start_overwrite:
9209 	/*
9210 	 *	If there are permanent objects in the destination, then
9211 	 *	the copy cannot be interrupted.
9212 	 */
9213 
9214 	if (interruptible && contains_permanent_objects) {
9215 		vm_map_unlock(dst_map);
9216 		return KERN_FAILURE;   /* XXX */
9217 	}
9218 
9219 	/*
9220 	 *
9221 	 *	Make a second pass, overwriting the data
9222 	 *	At the beginning of each loop iteration,
9223 	 *	the next entry to be overwritten is "tmp_entry"
9224 	 *	(initially, the value returned from the lookup above),
9225 	 *	and the starting address expected in that entry
9226 	 *	is "start".
9227 	 */
9228 
9229 	total_size = copy->size;
9230 	if (encountered_sub_map) {
9231 		copy_size = 0;
9232 		/* re-calculate tmp_entry since we've had the map */
9233 		/* unlocked */
9234 		if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
9235 			vm_map_unlock(dst_map);
9236 			return KERN_INVALID_ADDRESS;
9237 		}
9238 	} else {
9239 		copy_size = copy->size;
9240 	}
9241 
9242 	base_addr = dst_addr;
9243 	while (TRUE) {
9244 		/* deconstruct the copy object and do in parts */
9245 		/* only in sub_map, interruptable case */
9246 		vm_map_entry_t  copy_entry;
9247 		vm_map_entry_t  previous_prev = VM_MAP_ENTRY_NULL;
9248 		vm_map_entry_t  next_copy = VM_MAP_ENTRY_NULL;
9249 		int             nentries;
9250 		int             remaining_entries = 0;
9251 		vm_map_offset_t new_offset = 0;
9252 
9253 		for (entry = tmp_entry; copy_size == 0;) {
9254 			vm_map_entry_t  next;
9255 
9256 			next = entry->vme_next;
9257 
9258 			/* tmp_entry and base address are moved along */
9259 			/* each time we encounter a sub-map.  Otherwise */
9260 			/* entry can outpase tmp_entry, and the copy_size */
9261 			/* may reflect the distance between them */
9262 			/* if the current entry is found to be in transition */
9263 			/* we will start over at the beginning or the last */
9264 			/* encounter of a submap as dictated by base_addr */
9265 			/* we will zero copy_size accordingly. */
9266 			if (entry->in_transition) {
9267 				/*
9268 				 * Say that we are waiting, and wait for entry.
9269 				 */
9270 				entry->needs_wakeup = TRUE;
9271 				vm_map_entry_wait(dst_map, THREAD_UNINT);
9272 
9273 				if (!vm_map_lookup_entry(dst_map, base_addr,
9274 				    &tmp_entry)) {
9275 					vm_map_unlock(dst_map);
9276 					return KERN_INVALID_ADDRESS;
9277 				}
9278 				copy_size = 0;
9279 				entry = tmp_entry;
9280 				continue;
9281 			}
9282 			if (entry->is_sub_map) {
9283 				vm_map_offset_t sub_start;
9284 				vm_map_offset_t sub_end;
9285 				vm_map_offset_t local_end;
9286 
9287 				if (entry->needs_copy) {
9288 					/* if this is a COW submap */
9289 					/* just back the range with a */
9290 					/* anonymous entry */
9291 					if (entry->vme_end < dst_end) {
9292 						sub_end = entry->vme_end;
9293 					} else {
9294 						sub_end = dst_end;
9295 					}
9296 					if (entry->vme_start < base_addr) {
9297 						sub_start = base_addr;
9298 					} else {
9299 						sub_start = entry->vme_start;
9300 					}
9301 					vm_map_clip_end(
9302 						dst_map, entry, sub_end);
9303 					vm_map_clip_start(
9304 						dst_map, entry, sub_start);
9305 					assert(!entry->use_pmap);
9306 					assert(!entry->iokit_acct);
9307 					entry->use_pmap = TRUE;
9308 					entry->is_sub_map = FALSE;
9309 					vm_map_deallocate(
9310 						VME_SUBMAP(entry));
9311 					VME_OBJECT_SET(entry, VM_OBJECT_NULL);
9312 					VME_OFFSET_SET(entry, 0);
9313 					entry->is_shared = FALSE;
9314 					entry->needs_copy = FALSE;
9315 					entry->protection = VM_PROT_DEFAULT;
9316 					entry->max_protection = VM_PROT_ALL;
9317 					entry->wired_count = 0;
9318 					entry->user_wired_count = 0;
9319 					if (entry->inheritance
9320 					    == VM_INHERIT_SHARE) {
9321 						entry->inheritance = VM_INHERIT_COPY;
9322 					}
9323 					continue;
9324 				}
9325 				/* first take care of any non-sub_map */
9326 				/* entries to send */
9327 				if (base_addr < entry->vme_start) {
9328 					/* stuff to send */
9329 					copy_size =
9330 					    entry->vme_start - base_addr;
9331 					break;
9332 				}
9333 				sub_start = VME_OFFSET(entry);
9334 
9335 				if (entry->vme_end < dst_end) {
9336 					sub_end = entry->vme_end;
9337 				} else {
9338 					sub_end = dst_end;
9339 				}
9340 				sub_end -= entry->vme_start;
9341 				sub_end += VME_OFFSET(entry);
9342 				local_end = entry->vme_end;
9343 				vm_map_unlock(dst_map);
9344 				copy_size = sub_end - sub_start;
9345 
9346 				/* adjust the copy object */
9347 				if (total_size > copy_size) {
9348 					vm_map_size_t   local_size = 0;
9349 					vm_map_size_t   entry_size;
9350 
9351 					nentries = 1;
9352 					new_offset = copy->offset;
9353 					copy_entry = vm_map_copy_first_entry(copy);
9354 					while (copy_entry !=
9355 					    vm_map_copy_to_entry(copy)) {
9356 						entry_size = copy_entry->vme_end -
9357 						    copy_entry->vme_start;
9358 						if ((local_size < copy_size) &&
9359 						    ((local_size + entry_size)
9360 						    >= copy_size)) {
9361 							vm_map_copy_clip_end(copy,
9362 							    copy_entry,
9363 							    copy_entry->vme_start +
9364 							    (copy_size - local_size));
9365 							entry_size = copy_entry->vme_end -
9366 							    copy_entry->vme_start;
9367 							local_size += entry_size;
9368 							new_offset += entry_size;
9369 						}
9370 						if (local_size >= copy_size) {
9371 							next_copy = copy_entry->vme_next;
9372 							copy_entry->vme_next =
9373 							    vm_map_copy_to_entry(copy);
9374 							previous_prev =
9375 							    copy->cpy_hdr.links.prev;
9376 							copy->cpy_hdr.links.prev = copy_entry;
9377 							copy->size = copy_size;
9378 							remaining_entries =
9379 							    copy->cpy_hdr.nentries;
9380 							remaining_entries -= nentries;
9381 							copy->cpy_hdr.nentries = nentries;
9382 							break;
9383 						} else {
9384 							local_size += entry_size;
9385 							new_offset += entry_size;
9386 							nentries++;
9387 						}
9388 						copy_entry = copy_entry->vme_next;
9389 					}
9390 				}
9391 
9392 				if ((entry->use_pmap) && (pmap == NULL)) {
9393 					kr = vm_map_copy_overwrite_nested(
9394 						VME_SUBMAP(entry),
9395 						sub_start,
9396 						copy,
9397 						interruptible,
9398 						VME_SUBMAP(entry)->pmap,
9399 						TRUE);
9400 				} else if (pmap != NULL) {
9401 					kr = vm_map_copy_overwrite_nested(
9402 						VME_SUBMAP(entry),
9403 						sub_start,
9404 						copy,
9405 						interruptible, pmap,
9406 						TRUE);
9407 				} else {
9408 					kr = vm_map_copy_overwrite_nested(
9409 						VME_SUBMAP(entry),
9410 						sub_start,
9411 						copy,
9412 						interruptible,
9413 						dst_map->pmap,
9414 						TRUE);
9415 				}
9416 				if (kr != KERN_SUCCESS) {
9417 					if (next_copy != NULL) {
9418 						copy->cpy_hdr.nentries +=
9419 						    remaining_entries;
9420 						copy->cpy_hdr.links.prev->vme_next =
9421 						    next_copy;
9422 						copy->cpy_hdr.links.prev
9423 						        = previous_prev;
9424 						copy->size = total_size;
9425 					}
9426 					return kr;
9427 				}
9428 				if (dst_end <= local_end) {
9429 					return KERN_SUCCESS;
9430 				}
9431 				/* otherwise copy no longer exists, it was */
9432 				/* destroyed after successful copy_overwrite */
9433 				copy = vm_map_copy_allocate();
9434 				copy->type = VM_MAP_COPY_ENTRY_LIST;
9435 				copy->offset = new_offset;
9436 				copy->cpy_hdr.page_shift = copy_page_shift;
9437 
9438 				/*
9439 				 * XXX FBDP
9440 				 * this does not seem to deal with
9441 				 * the VM map store (R&B tree)
9442 				 */
9443 
9444 				total_size -= copy_size;
9445 				copy_size = 0;
9446 				/* put back remainder of copy in container */
9447 				if (next_copy != NULL) {
9448 					copy->cpy_hdr.nentries = remaining_entries;
9449 					copy->cpy_hdr.links.next = next_copy;
9450 					copy->cpy_hdr.links.prev = previous_prev;
9451 					copy->size = total_size;
9452 					next_copy->vme_prev =
9453 					    vm_map_copy_to_entry(copy);
9454 					next_copy = NULL;
9455 				}
9456 				base_addr = local_end;
9457 				vm_map_lock(dst_map);
9458 				if (!vm_map_lookup_entry(dst_map,
9459 				    local_end, &tmp_entry)) {
9460 					vm_map_unlock(dst_map);
9461 					return KERN_INVALID_ADDRESS;
9462 				}
9463 				entry = tmp_entry;
9464 				continue;
9465 			}
9466 			if (dst_end <= entry->vme_end) {
9467 				copy_size = dst_end - base_addr;
9468 				break;
9469 			}
9470 
9471 			if ((next == vm_map_to_entry(dst_map)) ||
9472 			    (next->vme_start != entry->vme_end)) {
9473 				vm_map_unlock(dst_map);
9474 				return KERN_INVALID_ADDRESS;
9475 			}
9476 
9477 			entry = next;
9478 		}/* for */
9479 
9480 		next_copy = NULL;
9481 		nentries = 1;
9482 
9483 		/* adjust the copy object */
9484 		if (total_size > copy_size) {
9485 			vm_map_size_t   local_size = 0;
9486 			vm_map_size_t   entry_size;
9487 
9488 			new_offset = copy->offset;
9489 			copy_entry = vm_map_copy_first_entry(copy);
9490 			while (copy_entry != vm_map_copy_to_entry(copy)) {
9491 				entry_size = copy_entry->vme_end -
9492 				    copy_entry->vme_start;
9493 				if ((local_size < copy_size) &&
9494 				    ((local_size + entry_size)
9495 				    >= copy_size)) {
9496 					vm_map_copy_clip_end(copy, copy_entry,
9497 					    copy_entry->vme_start +
9498 					    (copy_size - local_size));
9499 					entry_size = copy_entry->vme_end -
9500 					    copy_entry->vme_start;
9501 					local_size += entry_size;
9502 					new_offset += entry_size;
9503 				}
9504 				if (local_size >= copy_size) {
9505 					next_copy = copy_entry->vme_next;
9506 					copy_entry->vme_next =
9507 					    vm_map_copy_to_entry(copy);
9508 					previous_prev =
9509 					    copy->cpy_hdr.links.prev;
9510 					copy->cpy_hdr.links.prev = copy_entry;
9511 					copy->size = copy_size;
9512 					remaining_entries =
9513 					    copy->cpy_hdr.nentries;
9514 					remaining_entries -= nentries;
9515 					copy->cpy_hdr.nentries = nentries;
9516 					break;
9517 				} else {
9518 					local_size += entry_size;
9519 					new_offset += entry_size;
9520 					nentries++;
9521 				}
9522 				copy_entry = copy_entry->vme_next;
9523 			}
9524 		}
9525 
9526 		if (aligned) {
9527 			pmap_t  local_pmap;
9528 
9529 			if (pmap) {
9530 				local_pmap = pmap;
9531 			} else {
9532 				local_pmap = dst_map->pmap;
9533 			}
9534 
9535 			if ((kr =  vm_map_copy_overwrite_aligned(
9536 				    dst_map, tmp_entry, copy,
9537 				    base_addr, local_pmap)) != KERN_SUCCESS) {
9538 				if (next_copy != NULL) {
9539 					copy->cpy_hdr.nentries +=
9540 					    remaining_entries;
9541 					copy->cpy_hdr.links.prev->vme_next =
9542 					    next_copy;
9543 					copy->cpy_hdr.links.prev =
9544 					    previous_prev;
9545 					copy->size += copy_size;
9546 				}
9547 				return kr;
9548 			}
9549 			vm_map_unlock(dst_map);
9550 		} else {
9551 			/*
9552 			 * Performance gain:
9553 			 *
9554 			 * if the copy and dst address are misaligned but the same
9555 			 * offset within the page we can copy_not_aligned the
9556 			 * misaligned parts and copy aligned the rest.  If they are
9557 			 * aligned but len is unaligned we simply need to copy
9558 			 * the end bit unaligned.  We'll need to split the misaligned
9559 			 * bits of the region in this case !
9560 			 */
9561 			/* ALWAYS UNLOCKS THE dst_map MAP */
9562 			kr = vm_map_copy_overwrite_unaligned(
9563 				dst_map,
9564 				tmp_entry,
9565 				copy,
9566 				base_addr,
9567 				discard_on_success);
9568 			if (kr != KERN_SUCCESS) {
9569 				if (next_copy != NULL) {
9570 					copy->cpy_hdr.nentries +=
9571 					    remaining_entries;
9572 					copy->cpy_hdr.links.prev->vme_next =
9573 					    next_copy;
9574 					copy->cpy_hdr.links.prev =
9575 					    previous_prev;
9576 					copy->size += copy_size;
9577 				}
9578 				return kr;
9579 			}
9580 		}
9581 		total_size -= copy_size;
9582 		if (total_size == 0) {
9583 			break;
9584 		}
9585 		base_addr += copy_size;
9586 		copy_size = 0;
9587 		copy->offset = new_offset;
9588 		if (next_copy != NULL) {
9589 			copy->cpy_hdr.nentries = remaining_entries;
9590 			copy->cpy_hdr.links.next = next_copy;
9591 			copy->cpy_hdr.links.prev = previous_prev;
9592 			next_copy->vme_prev = vm_map_copy_to_entry(copy);
9593 			copy->size = total_size;
9594 		}
9595 		vm_map_lock(dst_map);
9596 		while (TRUE) {
9597 			if (!vm_map_lookup_entry(dst_map,
9598 			    base_addr, &tmp_entry)) {
9599 				vm_map_unlock(dst_map);
9600 				return KERN_INVALID_ADDRESS;
9601 			}
9602 			if (tmp_entry->in_transition) {
9603 				entry->needs_wakeup = TRUE;
9604 				vm_map_entry_wait(dst_map, THREAD_UNINT);
9605 			} else {
9606 				break;
9607 			}
9608 		}
9609 		vm_map_clip_start(dst_map,
9610 		    tmp_entry,
9611 		    vm_map_trunc_page(base_addr,
9612 		    VM_MAP_PAGE_MASK(dst_map)));
9613 
9614 		entry = tmp_entry;
9615 	} /* while */
9616 
9617 	/*
9618 	 *	Throw away the vm_map_copy object
9619 	 */
9620 	if (discard_on_success) {
9621 		vm_map_copy_discard(copy);
9622 	}
9623 
9624 	return KERN_SUCCESS;
9625 }/* vm_map_copy_overwrite */
9626 
9627 kern_return_t
vm_map_copy_overwrite(vm_map_t dst_map,vm_map_offset_t dst_addr,vm_map_copy_t copy,vm_map_size_t copy_size,boolean_t interruptible)9628 vm_map_copy_overwrite(
9629 	vm_map_t        dst_map,
9630 	vm_map_offset_t dst_addr,
9631 	vm_map_copy_t   copy,
9632 	vm_map_size_t   copy_size,
9633 	boolean_t       interruptible)
9634 {
9635 	vm_map_size_t   head_size, tail_size;
9636 	vm_map_copy_t   head_copy, tail_copy;
9637 	vm_map_offset_t head_addr, tail_addr;
9638 	vm_map_entry_t  entry;
9639 	kern_return_t   kr;
9640 	vm_map_offset_t effective_page_mask, effective_page_size;
9641 	int             copy_page_shift;
9642 
9643 	head_size = 0;
9644 	tail_size = 0;
9645 	head_copy = NULL;
9646 	tail_copy = NULL;
9647 	head_addr = 0;
9648 	tail_addr = 0;
9649 
9650 	if (interruptible ||
9651 	    copy == VM_MAP_COPY_NULL ||
9652 	    copy->type != VM_MAP_COPY_ENTRY_LIST) {
9653 		/*
9654 		 * We can't split the "copy" map if we're interruptible
9655 		 * or if we don't have a "copy" map...
9656 		 */
9657 blunt_copy:
9658 		return vm_map_copy_overwrite_nested(dst_map,
9659 		           dst_addr,
9660 		           copy,
9661 		           interruptible,
9662 		           (pmap_t) NULL,
9663 		           TRUE);
9664 	}
9665 
9666 	copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy);
9667 	if (copy_page_shift < PAGE_SHIFT ||
9668 	    VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
9669 		goto blunt_copy;
9670 	}
9671 
9672 	if (VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
9673 		effective_page_mask = VM_MAP_PAGE_MASK(dst_map);
9674 	} else {
9675 		effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
9676 		effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
9677 		    effective_page_mask);
9678 	}
9679 	effective_page_size = effective_page_mask + 1;
9680 
9681 	if (copy_size < VM_MAP_COPY_OVERWRITE_OPTIMIZATION_THRESHOLD_PAGES * effective_page_size) {
9682 		/*
9683 		 * Too small to bother with optimizing...
9684 		 */
9685 		goto blunt_copy;
9686 	}
9687 
9688 	if ((dst_addr & effective_page_mask) !=
9689 	    (copy->offset & effective_page_mask)) {
9690 		/*
9691 		 * Incompatible mis-alignment of source and destination...
9692 		 */
9693 		goto blunt_copy;
9694 	}
9695 
9696 	/*
9697 	 * Proper alignment or identical mis-alignment at the beginning.
9698 	 * Let's try and do a small unaligned copy first (if needed)
9699 	 * and then an aligned copy for the rest.
9700 	 */
9701 	if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
9702 		head_addr = dst_addr;
9703 		head_size = (effective_page_size -
9704 		    (copy->offset & effective_page_mask));
9705 		head_size = MIN(head_size, copy_size);
9706 	}
9707 	if (!vm_map_page_aligned(copy->offset + copy_size,
9708 	    effective_page_mask)) {
9709 		/*
9710 		 * Mis-alignment at the end.
9711 		 * Do an aligned copy up to the last page and
9712 		 * then an unaligned copy for the remaining bytes.
9713 		 */
9714 		tail_size = ((copy->offset + copy_size) &
9715 		    effective_page_mask);
9716 		tail_size = MIN(tail_size, copy_size);
9717 		tail_addr = dst_addr + copy_size - tail_size;
9718 		assert(tail_addr >= head_addr + head_size);
9719 	}
9720 	assert(head_size + tail_size <= copy_size);
9721 
9722 	if (head_size + tail_size == copy_size) {
9723 		/*
9724 		 * It's all unaligned, no optimization possible...
9725 		 */
9726 		goto blunt_copy;
9727 	}
9728 
9729 	/*
9730 	 * Can't optimize if there are any submaps in the
9731 	 * destination due to the way we free the "copy" map
9732 	 * progressively in vm_map_copy_overwrite_nested()
9733 	 * in that case.
9734 	 */
9735 	vm_map_lock_read(dst_map);
9736 	if (!vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
9737 		vm_map_unlock_read(dst_map);
9738 		goto blunt_copy;
9739 	}
9740 	for (;
9741 	    (entry != vm_map_copy_to_entry(copy) &&
9742 	    entry->vme_start < dst_addr + copy_size);
9743 	    entry = entry->vme_next) {
9744 		if (entry->is_sub_map) {
9745 			vm_map_unlock_read(dst_map);
9746 			goto blunt_copy;
9747 		}
9748 	}
9749 	vm_map_unlock_read(dst_map);
9750 
9751 	if (head_size) {
9752 		/*
9753 		 * Unaligned copy of the first "head_size" bytes, to reach
9754 		 * a page boundary.
9755 		 */
9756 
9757 		/*
9758 		 * Extract "head_copy" out of "copy".
9759 		 */
9760 		head_copy = vm_map_copy_allocate();
9761 		head_copy->type = VM_MAP_COPY_ENTRY_LIST;
9762 		head_copy->cpy_hdr.entries_pageable =
9763 		    copy->cpy_hdr.entries_pageable;
9764 		vm_map_store_init(&head_copy->cpy_hdr);
9765 		head_copy->cpy_hdr.page_shift = copy_page_shift;
9766 
9767 		entry = vm_map_copy_first_entry(copy);
9768 		if (entry->vme_end < copy->offset + head_size) {
9769 			head_size = entry->vme_end - copy->offset;
9770 		}
9771 
9772 		head_copy->offset = copy->offset;
9773 		head_copy->size = head_size;
9774 		copy->offset += head_size;
9775 		copy->size -= head_size;
9776 		copy_size -= head_size;
9777 		assert(copy_size > 0);
9778 
9779 		vm_map_copy_clip_end(copy, entry, copy->offset);
9780 		vm_map_copy_entry_unlink(copy, entry);
9781 		vm_map_copy_entry_link(head_copy,
9782 		    vm_map_copy_to_entry(head_copy),
9783 		    entry);
9784 
9785 		/*
9786 		 * Do the unaligned copy.
9787 		 */
9788 		kr = vm_map_copy_overwrite_nested(dst_map,
9789 		    head_addr,
9790 		    head_copy,
9791 		    interruptible,
9792 		    (pmap_t) NULL,
9793 		    FALSE);
9794 		if (kr != KERN_SUCCESS) {
9795 			goto done;
9796 		}
9797 	}
9798 
9799 	if (tail_size) {
9800 		/*
9801 		 * Extract "tail_copy" out of "copy".
9802 		 */
9803 		tail_copy = vm_map_copy_allocate();
9804 		tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
9805 		tail_copy->cpy_hdr.entries_pageable =
9806 		    copy->cpy_hdr.entries_pageable;
9807 		vm_map_store_init(&tail_copy->cpy_hdr);
9808 		tail_copy->cpy_hdr.page_shift = copy_page_shift;
9809 
9810 		tail_copy->offset = copy->offset + copy_size - tail_size;
9811 		tail_copy->size = tail_size;
9812 
9813 		copy->size -= tail_size;
9814 		copy_size -= tail_size;
9815 		assert(copy_size > 0);
9816 
9817 		entry = vm_map_copy_last_entry(copy);
9818 		vm_map_copy_clip_start(copy, entry, tail_copy->offset);
9819 		entry = vm_map_copy_last_entry(copy);
9820 		vm_map_copy_entry_unlink(copy, entry);
9821 		vm_map_copy_entry_link(tail_copy,
9822 		    vm_map_copy_last_entry(tail_copy),
9823 		    entry);
9824 	}
9825 
9826 	/*
9827 	 * If we are here from ipc_kmsg_copyout_ool_descriptor(),
9828 	 * we want to avoid TOCTOU issues w.r.t copy->size but
9829 	 * we don't need to change vm_map_copy_overwrite_nested()
9830 	 * and all other vm_map_copy_overwrite variants.
9831 	 *
9832 	 * So we assign the original copy_size that was passed into
9833 	 * this routine back to copy.
9834 	 *
9835 	 * This use of local 'copy_size' passed into this routine is
9836 	 * to try and protect against TOCTOU attacks where the kernel
9837 	 * has been exploited. We don't expect this to be an issue
9838 	 * during normal system operation.
9839 	 */
9840 	assertf(copy->size == copy_size,
9841 	    "Mismatch of copy sizes. Expected 0x%llx, Got 0x%llx\n", (uint64_t) copy_size, (uint64_t) copy->size);
9842 	copy->size = copy_size;
9843 
9844 	/*
9845 	 * Copy most (or possibly all) of the data.
9846 	 */
9847 	kr = vm_map_copy_overwrite_nested(dst_map,
9848 	    dst_addr + head_size,
9849 	    copy,
9850 	    interruptible,
9851 	    (pmap_t) NULL,
9852 	    FALSE);
9853 	if (kr != KERN_SUCCESS) {
9854 		goto done;
9855 	}
9856 
9857 	if (tail_size) {
9858 		kr = vm_map_copy_overwrite_nested(dst_map,
9859 		    tail_addr,
9860 		    tail_copy,
9861 		    interruptible,
9862 		    (pmap_t) NULL,
9863 		    FALSE);
9864 	}
9865 
9866 done:
9867 	assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9868 	if (kr == KERN_SUCCESS) {
9869 		/*
9870 		 * Discard all the copy maps.
9871 		 */
9872 		if (head_copy) {
9873 			vm_map_copy_discard(head_copy);
9874 			head_copy = NULL;
9875 		}
9876 		vm_map_copy_discard(copy);
9877 		if (tail_copy) {
9878 			vm_map_copy_discard(tail_copy);
9879 			tail_copy = NULL;
9880 		}
9881 	} else {
9882 		/*
9883 		 * Re-assemble the original copy map.
9884 		 */
9885 		if (head_copy) {
9886 			entry = vm_map_copy_first_entry(head_copy);
9887 			vm_map_copy_entry_unlink(head_copy, entry);
9888 			vm_map_copy_entry_link(copy,
9889 			    vm_map_copy_to_entry(copy),
9890 			    entry);
9891 			copy->offset -= head_size;
9892 			copy->size += head_size;
9893 			vm_map_copy_discard(head_copy);
9894 			head_copy = NULL;
9895 		}
9896 		if (tail_copy) {
9897 			entry = vm_map_copy_last_entry(tail_copy);
9898 			vm_map_copy_entry_unlink(tail_copy, entry);
9899 			vm_map_copy_entry_link(copy,
9900 			    vm_map_copy_last_entry(copy),
9901 			    entry);
9902 			copy->size += tail_size;
9903 			vm_map_copy_discard(tail_copy);
9904 			tail_copy = NULL;
9905 		}
9906 	}
9907 	return kr;
9908 }
9909 
9910 
9911 /*
9912  *	Routine: vm_map_copy_overwrite_unaligned	[internal use only]
9913  *
9914  *	Decription:
9915  *	Physically copy unaligned data
9916  *
9917  *	Implementation:
9918  *	Unaligned parts of pages have to be physically copied.  We use
9919  *	a modified form of vm_fault_copy (which understands none-aligned
9920  *	page offsets and sizes) to do the copy.  We attempt to copy as
9921  *	much memory in one go as possibly, however vm_fault_copy copies
9922  *	within 1 memory object so we have to find the smaller of "amount left"
9923  *	"source object data size" and "target object data size".  With
9924  *	unaligned data we don't need to split regions, therefore the source
9925  *	(copy) object should be one map entry, the target range may be split
9926  *	over multiple map entries however.  In any event we are pessimistic
9927  *	about these assumptions.
9928  *
9929  *	Assumptions:
9930  *	dst_map is locked on entry and is return locked on success,
9931  *	unlocked on error.
9932  */
9933 
9934 static kern_return_t
vm_map_copy_overwrite_unaligned(vm_map_t dst_map,vm_map_entry_t entry,vm_map_copy_t copy,vm_map_offset_t start,boolean_t discard_on_success)9935 vm_map_copy_overwrite_unaligned(
9936 	vm_map_t        dst_map,
9937 	vm_map_entry_t  entry,
9938 	vm_map_copy_t   copy,
9939 	vm_map_offset_t start,
9940 	boolean_t       discard_on_success)
9941 {
9942 	vm_map_entry_t          copy_entry;
9943 	vm_map_entry_t          copy_entry_next;
9944 	vm_map_version_t        version;
9945 	vm_object_t             dst_object;
9946 	vm_object_offset_t      dst_offset;
9947 	vm_object_offset_t      src_offset;
9948 	vm_object_offset_t      entry_offset;
9949 	vm_map_offset_t         entry_end;
9950 	vm_map_size_t           src_size,
9951 	    dst_size,
9952 	    copy_size,
9953 	    amount_left;
9954 	kern_return_t           kr = KERN_SUCCESS;
9955 
9956 
9957 	copy_entry = vm_map_copy_first_entry(copy);
9958 
9959 	vm_map_lock_write_to_read(dst_map);
9960 
9961 	src_offset = copy->offset - trunc_page_mask_64(copy->offset, VM_MAP_COPY_PAGE_MASK(copy));
9962 	amount_left = copy->size;
9963 /*
9964  *	unaligned so we never clipped this entry, we need the offset into
9965  *	the vm_object not just the data.
9966  */
9967 	while (amount_left > 0) {
9968 		if (entry == vm_map_to_entry(dst_map)) {
9969 			vm_map_unlock_read(dst_map);
9970 			return KERN_INVALID_ADDRESS;
9971 		}
9972 
9973 		/* "start" must be within the current map entry */
9974 		assert((start >= entry->vme_start) && (start < entry->vme_end));
9975 
9976 		dst_offset = start - entry->vme_start;
9977 
9978 		dst_size = entry->vme_end - start;
9979 
9980 		src_size = copy_entry->vme_end -
9981 		    (copy_entry->vme_start + src_offset);
9982 
9983 		if (dst_size < src_size) {
9984 /*
9985  *			we can only copy dst_size bytes before
9986  *			we have to get the next destination entry
9987  */
9988 			copy_size = dst_size;
9989 		} else {
9990 /*
9991  *			we can only copy src_size bytes before
9992  *			we have to get the next source copy entry
9993  */
9994 			copy_size = src_size;
9995 		}
9996 
9997 		if (copy_size > amount_left) {
9998 			copy_size = amount_left;
9999 		}
10000 /*
10001  *		Entry needs copy, create a shadow shadow object for
10002  *		Copy on write region.
10003  */
10004 		if (entry->needs_copy &&
10005 		    ((entry->protection & VM_PROT_WRITE) != 0)) {
10006 			if (vm_map_lock_read_to_write(dst_map)) {
10007 				vm_map_lock_read(dst_map);
10008 				goto RetryLookup;
10009 			}
10010 			VME_OBJECT_SHADOW(entry,
10011 			    (vm_map_size_t)(entry->vme_end
10012 			    - entry->vme_start));
10013 			entry->needs_copy = FALSE;
10014 			vm_map_lock_write_to_read(dst_map);
10015 		}
10016 		dst_object = VME_OBJECT(entry);
10017 /*
10018  *		unlike with the virtual (aligned) copy we're going
10019  *		to fault on it therefore we need a target object.
10020  */
10021 		if (dst_object == VM_OBJECT_NULL) {
10022 			if (vm_map_lock_read_to_write(dst_map)) {
10023 				vm_map_lock_read(dst_map);
10024 				goto RetryLookup;
10025 			}
10026 			dst_object = vm_object_allocate((vm_map_size_t)
10027 			    entry->vme_end - entry->vme_start);
10028 			VME_OBJECT_SET(entry, dst_object);
10029 			VME_OFFSET_SET(entry, 0);
10030 			assert(entry->use_pmap);
10031 			vm_map_lock_write_to_read(dst_map);
10032 		}
10033 /*
10034  *		Take an object reference and unlock map. The "entry" may
10035  *		disappear or change when the map is unlocked.
10036  */
10037 		vm_object_reference(dst_object);
10038 		version.main_timestamp = dst_map->timestamp;
10039 		entry_offset = VME_OFFSET(entry);
10040 		entry_end = entry->vme_end;
10041 		vm_map_unlock_read(dst_map);
10042 /*
10043  *		Copy as much as possible in one pass
10044  */
10045 		kr = vm_fault_copy(
10046 			VME_OBJECT(copy_entry),
10047 			VME_OFFSET(copy_entry) + src_offset,
10048 			&copy_size,
10049 			dst_object,
10050 			entry_offset + dst_offset,
10051 			dst_map,
10052 			&version,
10053 			THREAD_UNINT );
10054 
10055 		start += copy_size;
10056 		src_offset += copy_size;
10057 		amount_left -= copy_size;
10058 /*
10059  *		Release the object reference
10060  */
10061 		vm_object_deallocate(dst_object);
10062 /*
10063  *		If a hard error occurred, return it now
10064  */
10065 		if (kr != KERN_SUCCESS) {
10066 			return kr;
10067 		}
10068 
10069 		if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
10070 		    || amount_left == 0) {
10071 /*
10072  *			all done with this copy entry, dispose.
10073  */
10074 			copy_entry_next = copy_entry->vme_next;
10075 
10076 			if (discard_on_success) {
10077 				vm_map_copy_entry_unlink(copy, copy_entry);
10078 				assert(!copy_entry->is_sub_map);
10079 				vm_object_deallocate(VME_OBJECT(copy_entry));
10080 				vm_map_copy_entry_dispose(copy, copy_entry);
10081 			}
10082 
10083 			if (copy_entry_next == vm_map_copy_to_entry(copy) &&
10084 			    amount_left) {
10085 /*
10086  *				not finished copying but run out of source
10087  */
10088 				return KERN_INVALID_ADDRESS;
10089 			}
10090 
10091 			copy_entry = copy_entry_next;
10092 
10093 			src_offset = 0;
10094 		}
10095 
10096 		if (amount_left == 0) {
10097 			return KERN_SUCCESS;
10098 		}
10099 
10100 		vm_map_lock_read(dst_map);
10101 		if (version.main_timestamp == dst_map->timestamp) {
10102 			if (start == entry_end) {
10103 /*
10104  *				destination region is split.  Use the version
10105  *				information to avoid a lookup in the normal
10106  *				case.
10107  */
10108 				entry = entry->vme_next;
10109 /*
10110  *				should be contiguous. Fail if we encounter
10111  *				a hole in the destination.
10112  */
10113 				if (start != entry->vme_start) {
10114 					vm_map_unlock_read(dst_map);
10115 					return KERN_INVALID_ADDRESS;
10116 				}
10117 			}
10118 		} else {
10119 /*
10120  *			Map version check failed.
10121  *			we must lookup the entry because somebody
10122  *			might have changed the map behind our backs.
10123  */
10124 RetryLookup:
10125 			if (!vm_map_lookup_entry(dst_map, start, &entry)) {
10126 				vm_map_unlock_read(dst_map);
10127 				return KERN_INVALID_ADDRESS;
10128 			}
10129 		}
10130 	}/* while */
10131 
10132 	return KERN_SUCCESS;
10133 }/* vm_map_copy_overwrite_unaligned */
10134 
10135 /*
10136  *	Routine: vm_map_copy_overwrite_aligned	[internal use only]
10137  *
10138  *	Description:
10139  *	Does all the vm_trickery possible for whole pages.
10140  *
10141  *	Implementation:
10142  *
10143  *	If there are no permanent objects in the destination,
10144  *	and the source and destination map entry zones match,
10145  *	and the destination map entry is not shared,
10146  *	then the map entries can be deleted and replaced
10147  *	with those from the copy.  The following code is the
10148  *	basic idea of what to do, but there are lots of annoying
10149  *	little details about getting protection and inheritance
10150  *	right.  Should add protection, inheritance, and sharing checks
10151  *	to the above pass and make sure that no wiring is involved.
10152  */
10153 
10154 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
10155 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
10156 int vm_map_copy_overwrite_aligned_src_large = 0;
10157 
10158 static kern_return_t
vm_map_copy_overwrite_aligned(vm_map_t dst_map,vm_map_entry_t tmp_entry,vm_map_copy_t copy,vm_map_offset_t start,__unused pmap_t pmap)10159 vm_map_copy_overwrite_aligned(
10160 	vm_map_t        dst_map,
10161 	vm_map_entry_t  tmp_entry,
10162 	vm_map_copy_t   copy,
10163 	vm_map_offset_t start,
10164 	__unused pmap_t pmap)
10165 {
10166 	vm_object_t     object;
10167 	vm_map_entry_t  copy_entry;
10168 	vm_map_size_t   copy_size;
10169 	vm_map_size_t   size;
10170 	vm_map_entry_t  entry;
10171 
10172 	while ((copy_entry = vm_map_copy_first_entry(copy))
10173 	    != vm_map_copy_to_entry(copy)) {
10174 		copy_size = (copy_entry->vme_end - copy_entry->vme_start);
10175 
10176 		entry = tmp_entry;
10177 		if (entry->is_sub_map) {
10178 			/* unnested when clipped earlier */
10179 			assert(!entry->use_pmap);
10180 		}
10181 		if (entry == vm_map_to_entry(dst_map)) {
10182 			vm_map_unlock(dst_map);
10183 			return KERN_INVALID_ADDRESS;
10184 		}
10185 		size = (entry->vme_end - entry->vme_start);
10186 		/*
10187 		 *	Make sure that no holes popped up in the
10188 		 *	address map, and that the protection is
10189 		 *	still valid, in case the map was unlocked
10190 		 *	earlier.
10191 		 */
10192 
10193 		if ((entry->vme_start != start) || ((entry->is_sub_map)
10194 		    && !entry->needs_copy)) {
10195 			vm_map_unlock(dst_map);
10196 			return KERN_INVALID_ADDRESS;
10197 		}
10198 		assert(entry != vm_map_to_entry(dst_map));
10199 
10200 		/*
10201 		 *	Check protection again
10202 		 */
10203 
10204 		if (!(entry->protection & VM_PROT_WRITE)) {
10205 			vm_map_unlock(dst_map);
10206 			return KERN_PROTECTION_FAILURE;
10207 		}
10208 
10209 		if (!vm_map_entry_is_overwritable(dst_map, entry)) {
10210 			vm_map_unlock(dst_map);
10211 			return KERN_PROTECTION_FAILURE;
10212 		}
10213 
10214 		/*
10215 		 *	Adjust to source size first
10216 		 */
10217 
10218 		if (copy_size < size) {
10219 			if (entry->map_aligned &&
10220 			    !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
10221 			    VM_MAP_PAGE_MASK(dst_map))) {
10222 				/* no longer map-aligned */
10223 				entry->map_aligned = FALSE;
10224 			}
10225 			vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
10226 			size = copy_size;
10227 		}
10228 
10229 		/*
10230 		 *	Adjust to destination size
10231 		 */
10232 
10233 		if (size < copy_size) {
10234 			vm_map_copy_clip_end(copy, copy_entry,
10235 			    copy_entry->vme_start + size);
10236 			copy_size = size;
10237 		}
10238 
10239 		assert((entry->vme_end - entry->vme_start) == size);
10240 		assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
10241 		assert((copy_entry->vme_end - copy_entry->vme_start) == size);
10242 
10243 		/*
10244 		 *	If the destination contains temporary unshared memory,
10245 		 *	we can perform the copy by throwing it away and
10246 		 *	installing the source data.
10247 		 */
10248 
10249 		object = VME_OBJECT(entry);
10250 		if ((!entry->is_shared &&
10251 		    ((object == VM_OBJECT_NULL) ||
10252 		    (object->internal && !object->true_share))) ||
10253 		    entry->needs_copy) {
10254 			vm_object_t     old_object = VME_OBJECT(entry);
10255 			vm_object_offset_t      old_offset = VME_OFFSET(entry);
10256 			vm_object_offset_t      offset;
10257 
10258 			/*
10259 			 * Ensure that the source and destination aren't
10260 			 * identical
10261 			 */
10262 			if (old_object == VME_OBJECT(copy_entry) &&
10263 			    old_offset == VME_OFFSET(copy_entry)) {
10264 				vm_map_copy_entry_unlink(copy, copy_entry);
10265 				vm_map_copy_entry_dispose(copy, copy_entry);
10266 
10267 				if (old_object != VM_OBJECT_NULL) {
10268 					vm_object_deallocate(old_object);
10269 				}
10270 
10271 				start = tmp_entry->vme_end;
10272 				tmp_entry = tmp_entry->vme_next;
10273 				continue;
10274 			}
10275 
10276 #if XNU_TARGET_OS_OSX
10277 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
10278 #define __TRADEOFF1_COPY_SIZE (128 * 1024)      /* 128 KB */
10279 			if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
10280 			    VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
10281 			    copy_size <= __TRADEOFF1_COPY_SIZE) {
10282 				/*
10283 				 * Virtual vs. Physical copy tradeoff #1.
10284 				 *
10285 				 * Copying only a few pages out of a large
10286 				 * object:  do a physical copy instead of
10287 				 * a virtual copy, to avoid possibly keeping
10288 				 * the entire large object alive because of
10289 				 * those few copy-on-write pages.
10290 				 */
10291 				vm_map_copy_overwrite_aligned_src_large++;
10292 				goto slow_copy;
10293 			}
10294 #endif /* XNU_TARGET_OS_OSX */
10295 
10296 			if ((dst_map->pmap != kernel_pmap) &&
10297 			    (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
10298 			    (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_MEDIUM)) {
10299 				vm_object_t new_object, new_shadow;
10300 
10301 				/*
10302 				 * We're about to map something over a mapping
10303 				 * established by malloc()...
10304 				 */
10305 				new_object = VME_OBJECT(copy_entry);
10306 				if (new_object != VM_OBJECT_NULL) {
10307 					vm_object_lock_shared(new_object);
10308 				}
10309 				while (new_object != VM_OBJECT_NULL &&
10310 #if XNU_TARGET_OS_OSX
10311 				    !new_object->true_share &&
10312 				    new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10313 #endif /* XNU_TARGET_OS_OSX */
10314 				    new_object->internal) {
10315 					new_shadow = new_object->shadow;
10316 					if (new_shadow == VM_OBJECT_NULL) {
10317 						break;
10318 					}
10319 					vm_object_lock_shared(new_shadow);
10320 					vm_object_unlock(new_object);
10321 					new_object = new_shadow;
10322 				}
10323 				if (new_object != VM_OBJECT_NULL) {
10324 					if (!new_object->internal) {
10325 						/*
10326 						 * The new mapping is backed
10327 						 * by an external object.  We
10328 						 * don't want malloc'ed memory
10329 						 * to be replaced with such a
10330 						 * non-anonymous mapping, so
10331 						 * let's go off the optimized
10332 						 * path...
10333 						 */
10334 						vm_map_copy_overwrite_aligned_src_not_internal++;
10335 						vm_object_unlock(new_object);
10336 						goto slow_copy;
10337 					}
10338 #if XNU_TARGET_OS_OSX
10339 					if (new_object->true_share ||
10340 					    new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
10341 						/*
10342 						 * Same if there's a "true_share"
10343 						 * object in the shadow chain, or
10344 						 * an object with a non-default
10345 						 * (SYMMETRIC) copy strategy.
10346 						 */
10347 						vm_map_copy_overwrite_aligned_src_not_symmetric++;
10348 						vm_object_unlock(new_object);
10349 						goto slow_copy;
10350 					}
10351 #endif /* XNU_TARGET_OS_OSX */
10352 					vm_object_unlock(new_object);
10353 				}
10354 				/*
10355 				 * The new mapping is still backed by
10356 				 * anonymous (internal) memory, so it's
10357 				 * OK to substitute it for the original
10358 				 * malloc() mapping.
10359 				 */
10360 			}
10361 
10362 			if (old_object != VM_OBJECT_NULL) {
10363 				if (entry->is_sub_map) {
10364 					if (entry->use_pmap) {
10365 #ifndef NO_NESTED_PMAP
10366 						pmap_unnest(dst_map->pmap,
10367 						    (addr64_t)entry->vme_start,
10368 						    entry->vme_end - entry->vme_start);
10369 #endif  /* NO_NESTED_PMAP */
10370 						if (dst_map->mapped_in_other_pmaps) {
10371 							/* clean up parent */
10372 							/* map/maps */
10373 							vm_map_submap_pmap_clean(
10374 								dst_map, entry->vme_start,
10375 								entry->vme_end,
10376 								VME_SUBMAP(entry),
10377 								VME_OFFSET(entry));
10378 						}
10379 					} else {
10380 						vm_map_submap_pmap_clean(
10381 							dst_map, entry->vme_start,
10382 							entry->vme_end,
10383 							VME_SUBMAP(entry),
10384 							VME_OFFSET(entry));
10385 					}
10386 					vm_map_deallocate(VME_SUBMAP(entry));
10387 				} else {
10388 					if (dst_map->mapped_in_other_pmaps) {
10389 						vm_object_pmap_protect_options(
10390 							VME_OBJECT(entry),
10391 							VME_OFFSET(entry),
10392 							entry->vme_end
10393 							- entry->vme_start,
10394 							PMAP_NULL,
10395 							PAGE_SIZE,
10396 							entry->vme_start,
10397 							VM_PROT_NONE,
10398 							PMAP_OPTIONS_REMOVE);
10399 					} else {
10400 						pmap_remove_options(
10401 							dst_map->pmap,
10402 							(addr64_t)(entry->vme_start),
10403 							(addr64_t)(entry->vme_end),
10404 							PMAP_OPTIONS_REMOVE);
10405 					}
10406 					vm_object_deallocate(old_object);
10407 				}
10408 			}
10409 
10410 			if (entry->iokit_acct) {
10411 				/* keep using iokit accounting */
10412 				entry->use_pmap = FALSE;
10413 			} else {
10414 				/* use pmap accounting */
10415 				entry->use_pmap = TRUE;
10416 			}
10417 			entry->is_sub_map = FALSE;
10418 			VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
10419 			object = VME_OBJECT(entry);
10420 			entry->needs_copy = copy_entry->needs_copy;
10421 			entry->wired_count = 0;
10422 			entry->user_wired_count = 0;
10423 			offset = VME_OFFSET(copy_entry);
10424 			VME_OFFSET_SET(entry, offset);
10425 
10426 			vm_map_copy_entry_unlink(copy, copy_entry);
10427 			vm_map_copy_entry_dispose(copy, copy_entry);
10428 
10429 			/*
10430 			 * we could try to push pages into the pmap at this point, BUT
10431 			 * this optimization only saved on average 2 us per page if ALL
10432 			 * the pages in the source were currently mapped
10433 			 * and ALL the pages in the dest were touched, if there were fewer
10434 			 * than 2/3 of the pages touched, this optimization actually cost more cycles
10435 			 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
10436 			 */
10437 
10438 			/*
10439 			 *	Set up for the next iteration.  The map
10440 			 *	has not been unlocked, so the next
10441 			 *	address should be at the end of this
10442 			 *	entry, and the next map entry should be
10443 			 *	the one following it.
10444 			 */
10445 
10446 			start = tmp_entry->vme_end;
10447 			tmp_entry = tmp_entry->vme_next;
10448 		} else {
10449 			vm_map_version_t        version;
10450 			vm_object_t             dst_object;
10451 			vm_object_offset_t      dst_offset;
10452 			kern_return_t           r;
10453 
10454 slow_copy:
10455 			if (entry->needs_copy) {
10456 				VME_OBJECT_SHADOW(entry,
10457 				    (entry->vme_end -
10458 				    entry->vme_start));
10459 				entry->needs_copy = FALSE;
10460 			}
10461 
10462 			dst_object = VME_OBJECT(entry);
10463 			dst_offset = VME_OFFSET(entry);
10464 
10465 			/*
10466 			 *	Take an object reference, and record
10467 			 *	the map version information so that the
10468 			 *	map can be safely unlocked.
10469 			 */
10470 
10471 			if (dst_object == VM_OBJECT_NULL) {
10472 				/*
10473 				 * We would usually have just taken the
10474 				 * optimized path above if the destination
10475 				 * object has not been allocated yet.  But we
10476 				 * now disable that optimization if the copy
10477 				 * entry's object is not backed by anonymous
10478 				 * memory to avoid replacing malloc'ed
10479 				 * (i.e. re-usable) anonymous memory with a
10480 				 * not-so-anonymous mapping.
10481 				 * So we have to handle this case here and
10482 				 * allocate a new VM object for this map entry.
10483 				 */
10484 				dst_object = vm_object_allocate(
10485 					entry->vme_end - entry->vme_start);
10486 				dst_offset = 0;
10487 				VME_OBJECT_SET(entry, dst_object);
10488 				VME_OFFSET_SET(entry, dst_offset);
10489 				assert(entry->use_pmap);
10490 			}
10491 
10492 			vm_object_reference(dst_object);
10493 
10494 			/* account for unlock bumping up timestamp */
10495 			version.main_timestamp = dst_map->timestamp + 1;
10496 
10497 			vm_map_unlock(dst_map);
10498 
10499 			/*
10500 			 *	Copy as much as possible in one pass
10501 			 */
10502 
10503 			copy_size = size;
10504 			r = vm_fault_copy(
10505 				VME_OBJECT(copy_entry),
10506 				VME_OFFSET(copy_entry),
10507 				&copy_size,
10508 				dst_object,
10509 				dst_offset,
10510 				dst_map,
10511 				&version,
10512 				THREAD_UNINT );
10513 
10514 			/*
10515 			 *	Release the object reference
10516 			 */
10517 
10518 			vm_object_deallocate(dst_object);
10519 
10520 			/*
10521 			 *	If a hard error occurred, return it now
10522 			 */
10523 
10524 			if (r != KERN_SUCCESS) {
10525 				return r;
10526 			}
10527 
10528 			if (copy_size != 0) {
10529 				/*
10530 				 *	Dispose of the copied region
10531 				 */
10532 
10533 				vm_map_copy_clip_end(copy, copy_entry,
10534 				    copy_entry->vme_start + copy_size);
10535 				vm_map_copy_entry_unlink(copy, copy_entry);
10536 				vm_object_deallocate(VME_OBJECT(copy_entry));
10537 				vm_map_copy_entry_dispose(copy, copy_entry);
10538 			}
10539 
10540 			/*
10541 			 *	Pick up in the destination map where we left off.
10542 			 *
10543 			 *	Use the version information to avoid a lookup
10544 			 *	in the normal case.
10545 			 */
10546 
10547 			start += copy_size;
10548 			vm_map_lock(dst_map);
10549 			if (version.main_timestamp == dst_map->timestamp &&
10550 			    copy_size != 0) {
10551 				/* We can safely use saved tmp_entry value */
10552 
10553 				if (tmp_entry->map_aligned &&
10554 				    !VM_MAP_PAGE_ALIGNED(
10555 					    start,
10556 					    VM_MAP_PAGE_MASK(dst_map))) {
10557 					/* no longer map-aligned */
10558 					tmp_entry->map_aligned = FALSE;
10559 				}
10560 				vm_map_clip_end(dst_map, tmp_entry, start);
10561 				tmp_entry = tmp_entry->vme_next;
10562 			} else {
10563 				/* Must do lookup of tmp_entry */
10564 
10565 				if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
10566 					vm_map_unlock(dst_map);
10567 					return KERN_INVALID_ADDRESS;
10568 				}
10569 				if (tmp_entry->map_aligned &&
10570 				    !VM_MAP_PAGE_ALIGNED(
10571 					    start,
10572 					    VM_MAP_PAGE_MASK(dst_map))) {
10573 					/* no longer map-aligned */
10574 					tmp_entry->map_aligned = FALSE;
10575 				}
10576 				vm_map_clip_start(dst_map, tmp_entry, start);
10577 			}
10578 		}
10579 	}/* while */
10580 
10581 	return KERN_SUCCESS;
10582 }/* vm_map_copy_overwrite_aligned */
10583 
10584 /*
10585  *	Routine: vm_map_copyin_kernel_buffer [internal use only]
10586  *
10587  *	Description:
10588  *		Copy in data to a kernel buffer from space in the
10589  *		source map. The original space may be optionally
10590  *		deallocated.
10591  *
10592  *		If successful, returns a new copy object.
10593  */
10594 static kern_return_t
vm_map_copyin_kernel_buffer(vm_map_t src_map,vm_map_offset_t src_addr,vm_map_size_t len,boolean_t src_destroy,vm_map_copy_t * copy_result)10595 vm_map_copyin_kernel_buffer(
10596 	vm_map_t        src_map,
10597 	vm_map_offset_t src_addr,
10598 	vm_map_size_t   len,
10599 	boolean_t       src_destroy,
10600 	vm_map_copy_t   *copy_result)
10601 {
10602 	kern_return_t kr;
10603 	vm_map_copy_t copy;
10604 
10605 	if (len > msg_ool_size_small) {
10606 		return KERN_INVALID_ARGUMENT;
10607 	}
10608 
10609 	copy = zalloc_flags(vm_map_copy_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
10610 	copy->cpy_kdata = kalloc_data(len, Z_WAITOK);
10611 	if (copy->cpy_kdata == NULL) {
10612 		zfree(vm_map_copy_zone, copy);
10613 		return KERN_RESOURCE_SHORTAGE;
10614 	}
10615 
10616 	copy->type = VM_MAP_COPY_KERNEL_BUFFER;
10617 	copy->size = len;
10618 	copy->offset = 0;
10619 
10620 	kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
10621 	if (kr != KERN_SUCCESS) {
10622 		kfree_data(copy->cpy_kdata, len);
10623 		zfree(vm_map_copy_zone, copy);
10624 		return kr;
10625 	}
10626 	if (src_destroy) {
10627 		(void) vm_map_remove(
10628 			src_map,
10629 			vm_map_trunc_page(src_addr,
10630 			VM_MAP_PAGE_MASK(src_map)),
10631 			vm_map_round_page(src_addr + len,
10632 			VM_MAP_PAGE_MASK(src_map)),
10633 			(VM_MAP_REMOVE_INTERRUPTIBLE |
10634 			VM_MAP_REMOVE_WAIT_FOR_KWIRE |
10635 			((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : VM_MAP_REMOVE_NO_FLAGS)));
10636 	}
10637 	*copy_result = copy;
10638 	return KERN_SUCCESS;
10639 }
10640 
10641 /*
10642  *	Routine: vm_map_copyout_kernel_buffer	[internal use only]
10643  *
10644  *	Description:
10645  *		Copy out data from a kernel buffer into space in the
10646  *		destination map. The space may be otpionally dynamically
10647  *		allocated.
10648  *
10649  *		If successful, consumes the copy object.
10650  *		Otherwise, the caller is responsible for it.
10651  */
10652 static int vm_map_copyout_kernel_buffer_failures = 0;
10653 static kern_return_t
vm_map_copyout_kernel_buffer(vm_map_t map,vm_map_address_t * addr,vm_map_copy_t copy,vm_map_size_t copy_size,boolean_t overwrite,boolean_t consume_on_success)10654 vm_map_copyout_kernel_buffer(
10655 	vm_map_t                map,
10656 	vm_map_address_t        *addr,  /* IN/OUT */
10657 	vm_map_copy_t           copy,
10658 	vm_map_size_t           copy_size,
10659 	boolean_t               overwrite,
10660 	boolean_t               consume_on_success)
10661 {
10662 	kern_return_t kr = KERN_SUCCESS;
10663 	thread_t thread = current_thread();
10664 
10665 	assert(copy->size == copy_size);
10666 
10667 	/*
10668 	 * check for corrupted vm_map_copy structure
10669 	 */
10670 	if (copy_size > msg_ool_size_small || copy->offset) {
10671 		panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
10672 		    (long long)copy->size, (long long)copy->offset);
10673 	}
10674 
10675 	if (!overwrite) {
10676 		/*
10677 		 * Allocate space in the target map for the data
10678 		 */
10679 		*addr = 0;
10680 		kr = vm_map_enter(map,
10681 		    addr,
10682 		    vm_map_round_page(copy_size,
10683 		    VM_MAP_PAGE_MASK(map)),
10684 		    (vm_map_offset_t) 0,
10685 		    VM_FLAGS_ANYWHERE,
10686 		    VM_MAP_KERNEL_FLAGS_NONE,
10687 		    VM_KERN_MEMORY_NONE,
10688 		    VM_OBJECT_NULL,
10689 		    (vm_object_offset_t) 0,
10690 		    FALSE,
10691 		    VM_PROT_DEFAULT,
10692 		    VM_PROT_ALL,
10693 		    VM_INHERIT_DEFAULT);
10694 		if (kr != KERN_SUCCESS) {
10695 			return kr;
10696 		}
10697 #if KASAN
10698 		if (map->pmap == kernel_pmap) {
10699 			kasan_notify_address(*addr, copy->size);
10700 		}
10701 #endif
10702 	}
10703 
10704 	/*
10705 	 * Copyout the data from the kernel buffer to the target map.
10706 	 */
10707 	if (thread->map == map) {
10708 		/*
10709 		 * If the target map is the current map, just do
10710 		 * the copy.
10711 		 */
10712 		assert((vm_size_t)copy_size == copy_size);
10713 		if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10714 			kr = KERN_INVALID_ADDRESS;
10715 		}
10716 	} else {
10717 		vm_map_t oldmap;
10718 
10719 		/*
10720 		 * If the target map is another map, assume the
10721 		 * target's address space identity for the duration
10722 		 * of the copy.
10723 		 */
10724 		vm_map_reference(map);
10725 		oldmap = vm_map_switch(map);
10726 
10727 		assert((vm_size_t)copy_size == copy_size);
10728 		if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10729 			vm_map_copyout_kernel_buffer_failures++;
10730 			kr = KERN_INVALID_ADDRESS;
10731 		}
10732 
10733 		(void) vm_map_switch(oldmap);
10734 		vm_map_deallocate(map);
10735 	}
10736 
10737 	if (kr != KERN_SUCCESS) {
10738 		/* the copy failed, clean up */
10739 		if (!overwrite) {
10740 			/*
10741 			 * Deallocate the space we allocated in the target map.
10742 			 */
10743 			(void) vm_map_remove(
10744 				map,
10745 				vm_map_trunc_page(*addr,
10746 				VM_MAP_PAGE_MASK(map)),
10747 				vm_map_round_page((*addr +
10748 				vm_map_round_page(copy_size,
10749 				VM_MAP_PAGE_MASK(map))),
10750 				VM_MAP_PAGE_MASK(map)),
10751 				VM_MAP_REMOVE_NO_FLAGS);
10752 			*addr = 0;
10753 		}
10754 	} else {
10755 		/* copy was successful, dicard the copy structure */
10756 		if (consume_on_success) {
10757 			kfree_data(copy->cpy_kdata, copy_size);
10758 			zfree(vm_map_copy_zone, copy);
10759 		}
10760 	}
10761 
10762 	return kr;
10763 }
10764 
10765 /*
10766  *	Routine:	vm_map_copy_insert      [internal use only]
10767  *
10768  *	Description:
10769  *		Link a copy chain ("copy") into a map at the
10770  *		specified location (after "where").
10771  *	Side effects:
10772  *		The copy chain is destroyed.
10773  */
10774 static void
vm_map_copy_insert(vm_map_t map,vm_map_entry_t after_where,vm_map_copy_t copy)10775 vm_map_copy_insert(
10776 	vm_map_t        map,
10777 	vm_map_entry_t  after_where,
10778 	vm_map_copy_t   copy)
10779 {
10780 	vm_map_entry_t  entry;
10781 
10782 	while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
10783 		entry = vm_map_copy_first_entry(copy);
10784 		vm_map_copy_entry_unlink(copy, entry);
10785 		vm_map_store_entry_link(map, after_where, entry,
10786 		    VM_MAP_KERNEL_FLAGS_NONE);
10787 		after_where = entry;
10788 	}
10789 	zfree(vm_map_copy_zone, copy);
10790 }
10791 
10792 void
vm_map_copy_remap(vm_map_t map,vm_map_entry_t where,vm_map_copy_t copy,vm_map_offset_t adjustment,vm_prot_t cur_prot,vm_prot_t max_prot,vm_inherit_t inheritance)10793 vm_map_copy_remap(
10794 	vm_map_t        map,
10795 	vm_map_entry_t  where,
10796 	vm_map_copy_t   copy,
10797 	vm_map_offset_t adjustment,
10798 	vm_prot_t       cur_prot,
10799 	vm_prot_t       max_prot,
10800 	vm_inherit_t    inheritance)
10801 {
10802 	vm_map_entry_t  copy_entry, new_entry;
10803 
10804 	for (copy_entry = vm_map_copy_first_entry(copy);
10805 	    copy_entry != vm_map_copy_to_entry(copy);
10806 	    copy_entry = copy_entry->vme_next) {
10807 		/* get a new VM map entry for the map */
10808 		new_entry = vm_map_entry_create(map,
10809 		    !map->hdr.entries_pageable);
10810 		/* copy the "copy entry" to the new entry */
10811 		vm_map_entry_copy(map, new_entry, copy_entry);
10812 		/* adjust "start" and "end" */
10813 		new_entry->vme_start += adjustment;
10814 		new_entry->vme_end += adjustment;
10815 		/* clear some attributes */
10816 		new_entry->inheritance = inheritance;
10817 		new_entry->protection = cur_prot;
10818 		new_entry->max_protection = max_prot;
10819 		new_entry->behavior = VM_BEHAVIOR_DEFAULT;
10820 		/* take an extra reference on the entry's "object" */
10821 		if (new_entry->is_sub_map) {
10822 			assert(!new_entry->use_pmap); /* not nested */
10823 			vm_map_lock(VME_SUBMAP(new_entry));
10824 			vm_map_reference(VME_SUBMAP(new_entry));
10825 			vm_map_unlock(VME_SUBMAP(new_entry));
10826 		} else {
10827 			vm_object_reference(VME_OBJECT(new_entry));
10828 		}
10829 		/* insert the new entry in the map */
10830 		vm_map_store_entry_link(map, where, new_entry,
10831 		    VM_MAP_KERNEL_FLAGS_NONE);
10832 		/* continue inserting the "copy entries" after the new entry */
10833 		where = new_entry;
10834 	}
10835 }
10836 
10837 
10838 /*
10839  * Returns true if *size matches (or is in the range of) copy->size.
10840  * Upon returning true, the *size field is updated with the actual size of the
10841  * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
10842  */
10843 boolean_t
vm_map_copy_validate_size(vm_map_t dst_map,vm_map_copy_t copy,vm_map_size_t * size)10844 vm_map_copy_validate_size(
10845 	vm_map_t                dst_map,
10846 	vm_map_copy_t           copy,
10847 	vm_map_size_t           *size)
10848 {
10849 	if (copy == VM_MAP_COPY_NULL) {
10850 		return FALSE;
10851 	}
10852 	vm_map_size_t copy_sz = copy->size;
10853 	vm_map_size_t sz = *size;
10854 	switch (copy->type) {
10855 	case VM_MAP_COPY_OBJECT:
10856 	case VM_MAP_COPY_KERNEL_BUFFER:
10857 		if (sz == copy_sz) {
10858 			return TRUE;
10859 		}
10860 		break;
10861 	case VM_MAP_COPY_ENTRY_LIST:
10862 		/*
10863 		 * potential page-size rounding prevents us from exactly
10864 		 * validating this flavor of vm_map_copy, but we can at least
10865 		 * assert that it's within a range.
10866 		 */
10867 		if (copy_sz >= sz &&
10868 		    copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
10869 			*size = copy_sz;
10870 			return TRUE;
10871 		}
10872 		break;
10873 	default:
10874 		break;
10875 	}
10876 	return FALSE;
10877 }
10878 
10879 /*
10880  *	Routine:	vm_map_copyout_size
10881  *
10882  *	Description:
10883  *		Copy out a copy chain ("copy") into newly-allocated
10884  *		space in the destination map. Uses a prevalidated
10885  *		size for the copy object (vm_map_copy_validate_size).
10886  *
10887  *		If successful, consumes the copy object.
10888  *		Otherwise, the caller is responsible for it.
10889  */
10890 kern_return_t
vm_map_copyout_size(vm_map_t dst_map,vm_map_address_t * dst_addr,vm_map_copy_t copy,vm_map_size_t copy_size)10891 vm_map_copyout_size(
10892 	vm_map_t                dst_map,
10893 	vm_map_address_t        *dst_addr,      /* OUT */
10894 	vm_map_copy_t           copy,
10895 	vm_map_size_t           copy_size)
10896 {
10897 	return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
10898 	           TRUE,                     /* consume_on_success */
10899 	           VM_PROT_DEFAULT,
10900 	           VM_PROT_ALL,
10901 	           VM_INHERIT_DEFAULT);
10902 }
10903 
10904 /*
10905  *	Routine:	vm_map_copyout
10906  *
10907  *	Description:
10908  *		Copy out a copy chain ("copy") into newly-allocated
10909  *		space in the destination map.
10910  *
10911  *		If successful, consumes the copy object.
10912  *		Otherwise, the caller is responsible for it.
10913  */
10914 kern_return_t
vm_map_copyout(vm_map_t dst_map,vm_map_address_t * dst_addr,vm_map_copy_t copy)10915 vm_map_copyout(
10916 	vm_map_t                dst_map,
10917 	vm_map_address_t        *dst_addr,      /* OUT */
10918 	vm_map_copy_t           copy)
10919 {
10920 	return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
10921 	           TRUE,                     /* consume_on_success */
10922 	           VM_PROT_DEFAULT,
10923 	           VM_PROT_ALL,
10924 	           VM_INHERIT_DEFAULT);
10925 }
10926 
10927 kern_return_t
vm_map_copyout_internal(vm_map_t dst_map,vm_map_address_t * dst_addr,vm_map_copy_t copy,vm_map_size_t copy_size,boolean_t consume_on_success,vm_prot_t cur_protection,vm_prot_t max_protection,vm_inherit_t inheritance)10928 vm_map_copyout_internal(
10929 	vm_map_t                dst_map,
10930 	vm_map_address_t        *dst_addr,      /* OUT */
10931 	vm_map_copy_t           copy,
10932 	vm_map_size_t           copy_size,
10933 	boolean_t               consume_on_success,
10934 	vm_prot_t               cur_protection,
10935 	vm_prot_t               max_protection,
10936 	vm_inherit_t            inheritance)
10937 {
10938 	vm_map_size_t           size;
10939 	vm_map_size_t           adjustment;
10940 	vm_map_offset_t         start;
10941 	vm_object_offset_t      vm_copy_start;
10942 	vm_map_entry_t          last;
10943 	vm_map_entry_t          entry;
10944 	vm_map_entry_t          hole_entry;
10945 	vm_map_copy_t           original_copy;
10946 
10947 	/*
10948 	 *	Check for null copy object.
10949 	 */
10950 
10951 	if (copy == VM_MAP_COPY_NULL) {
10952 		*dst_addr = 0;
10953 		return KERN_SUCCESS;
10954 	}
10955 
10956 	/*
10957 	 * Assert that the vm_map_copy is coming from the right
10958 	 * zone and hasn't been forged
10959 	 */
10960 	vm_map_copy_require(copy);
10961 
10962 	if (copy->size != copy_size) {
10963 		*dst_addr = 0;
10964 		return KERN_FAILURE;
10965 	}
10966 
10967 	/*
10968 	 *	Check for special copy object, created
10969 	 *	by vm_map_copyin_object.
10970 	 */
10971 
10972 	if (copy->type == VM_MAP_COPY_OBJECT) {
10973 		vm_object_t             object = copy->cpy_object;
10974 		kern_return_t           kr;
10975 		vm_object_offset_t      offset;
10976 
10977 		offset = vm_object_trunc_page(copy->offset);
10978 		size = vm_map_round_page((copy_size +
10979 		    (vm_map_size_t)(copy->offset -
10980 		    offset)),
10981 		    VM_MAP_PAGE_MASK(dst_map));
10982 		*dst_addr = 0;
10983 		kr = vm_map_enter(dst_map, dst_addr, size,
10984 		    (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
10985 		    VM_MAP_KERNEL_FLAGS_NONE,
10986 		    VM_KERN_MEMORY_NONE,
10987 		    object, offset, FALSE,
10988 		    VM_PROT_DEFAULT, VM_PROT_ALL,
10989 		    VM_INHERIT_DEFAULT);
10990 		if (kr != KERN_SUCCESS) {
10991 			return kr;
10992 		}
10993 		/* Account for non-pagealigned copy object */
10994 		*dst_addr += (vm_map_offset_t)(copy->offset - offset);
10995 		if (consume_on_success) {
10996 			zfree(vm_map_copy_zone, copy);
10997 		}
10998 		return KERN_SUCCESS;
10999 	}
11000 
11001 	/*
11002 	 *	Check for special kernel buffer allocated
11003 	 *	by new_ipc_kmsg_copyin.
11004 	 */
11005 
11006 	if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
11007 		return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
11008 		           copy, copy_size, FALSE,
11009 		           consume_on_success);
11010 	}
11011 
11012 	original_copy = copy;
11013 	if (copy->cpy_hdr.page_shift != VM_MAP_PAGE_SHIFT(dst_map)) {
11014 		kern_return_t kr;
11015 		vm_map_copy_t target_copy;
11016 		vm_map_offset_t overmap_start, overmap_end, trimmed_start;
11017 
11018 		target_copy = VM_MAP_COPY_NULL;
11019 		DEBUG4K_ADJUST("adjusting...\n");
11020 		kr = vm_map_copy_adjust_to_target(
11021 			copy,
11022 			0, /* offset */
11023 			copy->size, /* size */
11024 			dst_map,
11025 			TRUE, /* copy */
11026 			&target_copy,
11027 			&overmap_start,
11028 			&overmap_end,
11029 			&trimmed_start);
11030 		if (kr != KERN_SUCCESS) {
11031 			DEBUG4K_COPY("adjust failed 0x%x\n", kr);
11032 			return kr;
11033 		}
11034 		DEBUG4K_COPY("copy %p (%d 0x%llx 0x%llx) dst_map %p (%d) target_copy %p (%d 0x%llx 0x%llx) overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx\n", copy, copy->cpy_hdr.page_shift, copy->offset, (uint64_t)copy->size, dst_map, VM_MAP_PAGE_SHIFT(dst_map), target_copy, target_copy->cpy_hdr.page_shift, target_copy->offset, (uint64_t)target_copy->size, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start);
11035 		if (target_copy != copy) {
11036 			copy = target_copy;
11037 		}
11038 		copy_size = copy->size;
11039 	}
11040 
11041 	/*
11042 	 *	Find space for the data
11043 	 */
11044 
11045 	vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
11046 	    VM_MAP_COPY_PAGE_MASK(copy));
11047 	size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
11048 	    VM_MAP_COPY_PAGE_MASK(copy))
11049 	    - vm_copy_start;
11050 
11051 
11052 StartAgain:;
11053 
11054 	vm_map_lock(dst_map);
11055 	if (dst_map->disable_vmentry_reuse == TRUE) {
11056 		VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
11057 		last = entry;
11058 	} else {
11059 		if (dst_map->holelistenabled) {
11060 			hole_entry = CAST_TO_VM_MAP_ENTRY(dst_map->holes_list);
11061 
11062 			if (hole_entry == NULL) {
11063 				/*
11064 				 * No more space in the map?
11065 				 */
11066 				vm_map_unlock(dst_map);
11067 				return KERN_NO_SPACE;
11068 			}
11069 
11070 			last = hole_entry;
11071 			start = last->vme_start;
11072 		} else {
11073 			assert(first_free_is_valid(dst_map));
11074 			start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
11075 			    vm_map_min(dst_map) : last->vme_end;
11076 		}
11077 		start = vm_map_round_page(start,
11078 		    VM_MAP_PAGE_MASK(dst_map));
11079 	}
11080 
11081 	while (TRUE) {
11082 		vm_map_entry_t  next = last->vme_next;
11083 		vm_map_offset_t end = start + size;
11084 
11085 		if ((end > dst_map->max_offset) || (end < start)) {
11086 			if (dst_map->wait_for_space) {
11087 				if (size <= (dst_map->max_offset - dst_map->min_offset)) {
11088 					assert_wait((event_t) dst_map,
11089 					    THREAD_INTERRUPTIBLE);
11090 					vm_map_unlock(dst_map);
11091 					thread_block(THREAD_CONTINUE_NULL);
11092 					goto StartAgain;
11093 				}
11094 			}
11095 			vm_map_unlock(dst_map);
11096 			return KERN_NO_SPACE;
11097 		}
11098 
11099 		if (dst_map->holelistenabled) {
11100 			if (last->vme_end >= end) {
11101 				break;
11102 			}
11103 		} else {
11104 			/*
11105 			 *	If there are no more entries, we must win.
11106 			 *
11107 			 *	OR
11108 			 *
11109 			 *	If there is another entry, it must be
11110 			 *	after the end of the potential new region.
11111 			 */
11112 
11113 			if (next == vm_map_to_entry(dst_map)) {
11114 				break;
11115 			}
11116 
11117 			if (next->vme_start >= end) {
11118 				break;
11119 			}
11120 		}
11121 
11122 		last = next;
11123 
11124 		if (dst_map->holelistenabled) {
11125 			if (last == CAST_TO_VM_MAP_ENTRY(dst_map->holes_list)) {
11126 				/*
11127 				 * Wrapped around
11128 				 */
11129 				vm_map_unlock(dst_map);
11130 				return KERN_NO_SPACE;
11131 			}
11132 			start = last->vme_start;
11133 		} else {
11134 			start = last->vme_end;
11135 		}
11136 		start = vm_map_round_page(start,
11137 		    VM_MAP_PAGE_MASK(dst_map));
11138 	}
11139 
11140 	if (dst_map->holelistenabled) {
11141 		if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
11142 			panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.", last, (unsigned long long)last->vme_start);
11143 		}
11144 	}
11145 
11146 
11147 	adjustment = start - vm_copy_start;
11148 	if (!consume_on_success) {
11149 		/*
11150 		 * We're not allowed to consume "copy", so we'll have to
11151 		 * copy its map entries into the destination map below.
11152 		 * No need to re-allocate map entries from the correct
11153 		 * (pageable or not) zone, since we'll get new map entries
11154 		 * during the transfer.
11155 		 * We'll also adjust the map entries's "start" and "end"
11156 		 * during the transfer, to keep "copy"'s entries consistent
11157 		 * with its "offset".
11158 		 */
11159 		goto after_adjustments;
11160 	}
11161 
11162 	/*
11163 	 *	Since we're going to just drop the map
11164 	 *	entries from the copy into the destination
11165 	 *	map, they must come from the same pool.
11166 	 */
11167 
11168 	if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
11169 		/*
11170 		 * Mismatches occur when dealing with the default
11171 		 * pager.
11172 		 */
11173 		vm_map_entry_t  next, new;
11174 
11175 		/*
11176 		 * Find the zone that the copies were allocated from
11177 		 */
11178 
11179 		entry = vm_map_copy_first_entry(copy);
11180 
11181 		/*
11182 		 * Reinitialize the copy so that vm_map_copy_entry_link
11183 		 * will work.
11184 		 */
11185 		vm_map_store_copy_reset(copy, entry);
11186 		copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
11187 
11188 		/*
11189 		 * Copy each entry.
11190 		 */
11191 		while (entry != vm_map_copy_to_entry(copy)) {
11192 			new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11193 			vm_map_entry_copy_full(new, entry);
11194 			new->vme_no_copy_on_read = FALSE;
11195 			assert(!new->iokit_acct);
11196 			if (new->is_sub_map) {
11197 				/* clr address space specifics */
11198 				new->use_pmap = FALSE;
11199 			}
11200 			vm_map_copy_entry_link(copy,
11201 			    vm_map_copy_last_entry(copy),
11202 			    new);
11203 			next = entry->vme_next;
11204 			_vm_map_entry_dispose(NULL, entry);
11205 			entry = next;
11206 		}
11207 	}
11208 
11209 	/*
11210 	 *	Adjust the addresses in the copy chain, and
11211 	 *	reset the region attributes.
11212 	 */
11213 
11214 	for (entry = vm_map_copy_first_entry(copy);
11215 	    entry != vm_map_copy_to_entry(copy);
11216 	    entry = entry->vme_next) {
11217 		if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
11218 			/*
11219 			 * We're injecting this copy entry into a map that
11220 			 * has the standard page alignment, so clear
11221 			 * "map_aligned" (which might have been inherited
11222 			 * from the original map entry).
11223 			 */
11224 			entry->map_aligned = FALSE;
11225 		}
11226 
11227 		entry->vme_start += adjustment;
11228 		entry->vme_end += adjustment;
11229 
11230 		if (entry->map_aligned) {
11231 			assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
11232 			    VM_MAP_PAGE_MASK(dst_map)));
11233 			assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
11234 			    VM_MAP_PAGE_MASK(dst_map)));
11235 		}
11236 
11237 		entry->inheritance = VM_INHERIT_DEFAULT;
11238 		entry->protection = VM_PROT_DEFAULT;
11239 		entry->max_protection = VM_PROT_ALL;
11240 		entry->behavior = VM_BEHAVIOR_DEFAULT;
11241 
11242 		/*
11243 		 * If the entry is now wired,
11244 		 * map the pages into the destination map.
11245 		 */
11246 		if (entry->wired_count != 0) {
11247 			vm_map_offset_t va;
11248 			vm_object_offset_t       offset;
11249 			vm_object_t object;
11250 			vm_prot_t prot;
11251 			int     type_of_fault;
11252 
11253 			/* TODO4K would need to use actual page size */
11254 			assert(VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT);
11255 
11256 			object = VME_OBJECT(entry);
11257 			offset = VME_OFFSET(entry);
11258 			va = entry->vme_start;
11259 
11260 			pmap_pageable(dst_map->pmap,
11261 			    entry->vme_start,
11262 			    entry->vme_end,
11263 			    TRUE);
11264 
11265 			while (va < entry->vme_end) {
11266 				vm_page_t       m;
11267 				struct vm_object_fault_info fault_info = {};
11268 
11269 				/*
11270 				 * Look up the page in the object.
11271 				 * Assert that the page will be found in the
11272 				 * top object:
11273 				 * either
11274 				 *	the object was newly created by
11275 				 *	vm_object_copy_slowly, and has
11276 				 *	copies of all of the pages from
11277 				 *	the source object
11278 				 * or
11279 				 *	the object was moved from the old
11280 				 *	map entry; because the old map
11281 				 *	entry was wired, all of the pages
11282 				 *	were in the top-level object.
11283 				 *	(XXX not true if we wire pages for
11284 				 *	 reading)
11285 				 */
11286 				vm_object_lock(object);
11287 
11288 				m = vm_page_lookup(object, offset);
11289 				if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
11290 				    m->vmp_absent) {
11291 					panic("vm_map_copyout: wiring %p", m);
11292 				}
11293 
11294 				prot = entry->protection;
11295 
11296 				if (override_nx(dst_map, VME_ALIAS(entry)) &&
11297 				    prot) {
11298 					prot |= VM_PROT_EXECUTE;
11299 				}
11300 
11301 				type_of_fault = DBG_CACHE_HIT_FAULT;
11302 
11303 				fault_info.user_tag = VME_ALIAS(entry);
11304 				fault_info.pmap_options = 0;
11305 				if (entry->iokit_acct ||
11306 				    (!entry->is_sub_map && !entry->use_pmap)) {
11307 					fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
11308 				}
11309 
11310 				vm_fault_enter(m,
11311 				    dst_map->pmap,
11312 				    va,
11313 				    PAGE_SIZE, 0,
11314 				    prot,
11315 				    prot,
11316 				    VM_PAGE_WIRED(m),
11317 				    FALSE,            /* change_wiring */
11318 				    VM_KERN_MEMORY_NONE,            /* tag - not wiring */
11319 				    &fault_info,
11320 				    NULL,             /* need_retry */
11321 				    &type_of_fault);
11322 
11323 				vm_object_unlock(object);
11324 
11325 				offset += PAGE_SIZE_64;
11326 				va += PAGE_SIZE;
11327 			}
11328 		}
11329 	}
11330 
11331 after_adjustments:
11332 
11333 	/*
11334 	 *	Correct the page alignment for the result
11335 	 */
11336 
11337 	*dst_addr = start + (copy->offset - vm_copy_start);
11338 
11339 #if KASAN
11340 	kasan_notify_address(*dst_addr, size);
11341 #endif
11342 
11343 	/*
11344 	 *	Update the hints and the map size
11345 	 */
11346 
11347 	if (consume_on_success) {
11348 		SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
11349 	} else {
11350 		SAVE_HINT_MAP_WRITE(dst_map, last);
11351 	}
11352 
11353 	dst_map->size += size;
11354 
11355 	/*
11356 	 *	Link in the copy
11357 	 */
11358 
11359 	if (consume_on_success) {
11360 		vm_map_copy_insert(dst_map, last, copy);
11361 		if (copy != original_copy) {
11362 			vm_map_copy_discard(original_copy);
11363 			original_copy = VM_MAP_COPY_NULL;
11364 		}
11365 	} else {
11366 		vm_map_copy_remap(dst_map, last, copy, adjustment,
11367 		    cur_protection, max_protection,
11368 		    inheritance);
11369 		if (copy != original_copy && original_copy != VM_MAP_COPY_NULL) {
11370 			vm_map_copy_discard(copy);
11371 			copy = original_copy;
11372 		}
11373 	}
11374 
11375 
11376 	vm_map_unlock(dst_map);
11377 
11378 	/*
11379 	 * XXX	If wiring_required, call vm_map_pageable
11380 	 */
11381 
11382 	return KERN_SUCCESS;
11383 }
11384 
11385 /*
11386  *	Routine:	vm_map_copyin
11387  *
11388  *	Description:
11389  *		see vm_map_copyin_common.  Exported via Unsupported.exports.
11390  *
11391  */
11392 
11393 #undef vm_map_copyin
11394 
11395 kern_return_t
vm_map_copyin(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,boolean_t src_destroy,vm_map_copy_t * copy_result)11396 vm_map_copyin(
11397 	vm_map_t                        src_map,
11398 	vm_map_address_t        src_addr,
11399 	vm_map_size_t           len,
11400 	boolean_t                       src_destroy,
11401 	vm_map_copy_t           *copy_result)   /* OUT */
11402 {
11403 	return vm_map_copyin_common(src_map, src_addr, len, src_destroy,
11404 	           FALSE, copy_result, FALSE);
11405 }
11406 
11407 /*
11408  *	Routine:	vm_map_copyin_common
11409  *
11410  *	Description:
11411  *		Copy the specified region (src_addr, len) from the
11412  *		source address space (src_map), possibly removing
11413  *		the region from the source address space (src_destroy).
11414  *
11415  *	Returns:
11416  *		A vm_map_copy_t object (copy_result), suitable for
11417  *		insertion into another address space (using vm_map_copyout),
11418  *		copying over another address space region (using
11419  *		vm_map_copy_overwrite).  If the copy is unused, it
11420  *		should be destroyed (using vm_map_copy_discard).
11421  *
11422  *	In/out conditions:
11423  *		The source map should not be locked on entry.
11424  */
11425 
11426 typedef struct submap_map {
11427 	vm_map_t        parent_map;
11428 	vm_map_offset_t base_start;
11429 	vm_map_offset_t base_end;
11430 	vm_map_size_t   base_len;
11431 	struct submap_map *next;
11432 } submap_map_t;
11433 
11434 kern_return_t
vm_map_copyin_common(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,boolean_t src_destroy,__unused boolean_t src_volatile,vm_map_copy_t * copy_result,boolean_t use_maxprot)11435 vm_map_copyin_common(
11436 	vm_map_t        src_map,
11437 	vm_map_address_t src_addr,
11438 	vm_map_size_t   len,
11439 	boolean_t       src_destroy,
11440 	__unused boolean_t      src_volatile,
11441 	vm_map_copy_t   *copy_result,   /* OUT */
11442 	boolean_t       use_maxprot)
11443 {
11444 	int flags;
11445 
11446 	flags = 0;
11447 	if (src_destroy) {
11448 		flags |= VM_MAP_COPYIN_SRC_DESTROY;
11449 	}
11450 	if (use_maxprot) {
11451 		flags |= VM_MAP_COPYIN_USE_MAXPROT;
11452 	}
11453 	return vm_map_copyin_internal(src_map,
11454 	           src_addr,
11455 	           len,
11456 	           flags,
11457 	           copy_result);
11458 }
11459 kern_return_t
vm_map_copyin_internal(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,int flags,vm_map_copy_t * copy_result)11460 vm_map_copyin_internal(
11461 	vm_map_t        src_map,
11462 	vm_map_address_t src_addr,
11463 	vm_map_size_t   len,
11464 	int             flags,
11465 	vm_map_copy_t   *copy_result)   /* OUT */
11466 {
11467 	vm_map_entry_t  tmp_entry;      /* Result of last map lookup --
11468 	                                 * in multi-level lookup, this
11469 	                                 * entry contains the actual
11470 	                                 * vm_object/offset.
11471 	                                 */
11472 	vm_map_entry_t  new_entry = VM_MAP_ENTRY_NULL;  /* Map entry for copy */
11473 
11474 	vm_map_offset_t src_start;      /* Start of current entry --
11475 	                                 * where copy is taking place now
11476 	                                 */
11477 	vm_map_offset_t src_end;        /* End of entire region to be
11478 	                                 * copied */
11479 	vm_map_offset_t src_base;
11480 	vm_map_t        base_map = src_map;
11481 	boolean_t       map_share = FALSE;
11482 	submap_map_t    *parent_maps = NULL;
11483 
11484 	vm_map_copy_t   copy;           /* Resulting copy */
11485 	vm_map_address_t copy_addr;
11486 	vm_map_size_t   copy_size;
11487 	boolean_t       src_destroy;
11488 	boolean_t       use_maxprot;
11489 	boolean_t       preserve_purgeable;
11490 	boolean_t       entry_was_shared;
11491 	vm_map_entry_t  saved_src_entry;
11492 
11493 	if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
11494 		return KERN_INVALID_ARGUMENT;
11495 	}
11496 
11497 	src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
11498 	use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
11499 	preserve_purgeable =
11500 	    (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
11501 
11502 	/*
11503 	 *	Check for copies of zero bytes.
11504 	 */
11505 
11506 	if (len == 0) {
11507 		*copy_result = VM_MAP_COPY_NULL;
11508 		return KERN_SUCCESS;
11509 	}
11510 
11511 	/*
11512 	 *	Check that the end address doesn't overflow
11513 	 */
11514 	src_end = src_addr + len;
11515 	if (src_end < src_addr) {
11516 		return KERN_INVALID_ADDRESS;
11517 	}
11518 
11519 	/*
11520 	 *	Compute (page aligned) start and end of region
11521 	 */
11522 	src_start = vm_map_trunc_page(src_addr,
11523 	    VM_MAP_PAGE_MASK(src_map));
11524 	src_end = vm_map_round_page(src_end,
11525 	    VM_MAP_PAGE_MASK(src_map));
11526 
11527 	/*
11528 	 * If the copy is sufficiently small, use a kernel buffer instead
11529 	 * of making a virtual copy.  The theory being that the cost of
11530 	 * setting up VM (and taking C-O-W faults) dominates the copy costs
11531 	 * for small regions.
11532 	 */
11533 	if ((len < msg_ool_size_small) &&
11534 	    !use_maxprot &&
11535 	    !preserve_purgeable &&
11536 	    !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
11537 	    /*
11538 	     * Since the "msg_ool_size_small" threshold was increased and
11539 	     * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
11540 	     * address space limits, we revert to doing a virtual copy if the
11541 	     * copied range goes beyond those limits.  Otherwise, mach_vm_read()
11542 	     * of the commpage would now fail when it used to work.
11543 	     */
11544 	    (src_start >= vm_map_min(src_map) &&
11545 	    src_start < vm_map_max(src_map) &&
11546 	    src_end >= vm_map_min(src_map) &&
11547 	    src_end < vm_map_max(src_map))) {
11548 		return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
11549 		           src_destroy, copy_result);
11550 	}
11551 
11552 	/*
11553 	 *	Allocate a header element for the list.
11554 	 *
11555 	 *	Use the start and end in the header to
11556 	 *	remember the endpoints prior to rounding.
11557 	 */
11558 
11559 	copy = vm_map_copy_allocate();
11560 	copy->type = VM_MAP_COPY_ENTRY_LIST;
11561 	copy->cpy_hdr.entries_pageable = TRUE;
11562 	copy->cpy_hdr.page_shift = VM_MAP_PAGE_SHIFT(src_map);
11563 
11564 	vm_map_store_init( &(copy->cpy_hdr));
11565 
11566 	copy->offset = src_addr;
11567 	copy->size = len;
11568 
11569 	new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11570 
11571 #define RETURN(x)                                               \
11572 	MACRO_BEGIN                                             \
11573 	vm_map_unlock(src_map);                                 \
11574 	if(src_map != base_map)                                 \
11575 	        vm_map_deallocate(src_map);                     \
11576 	if (new_entry != VM_MAP_ENTRY_NULL)                     \
11577 	        vm_map_copy_entry_dispose(copy,new_entry);      \
11578 	vm_map_copy_discard(copy);                              \
11579 	{                                                       \
11580 	        submap_map_t	*_ptr;                          \
11581                                                                 \
11582 	        for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11583 	                parent_maps=parent_maps->next;          \
11584 	                if (_ptr->parent_map != base_map)       \
11585 	                        vm_map_deallocate(_ptr->parent_map);    \
11586 	                kfree_type(submap_map_t, _ptr);         \
11587 	        }                                               \
11588 	}                                                       \
11589 	MACRO_RETURN(x);                                        \
11590 	MACRO_END
11591 
11592 	/*
11593 	 *	Find the beginning of the region.
11594 	 */
11595 
11596 	vm_map_lock(src_map);
11597 
11598 	/*
11599 	 * Lookup the original "src_addr" rather than the truncated
11600 	 * "src_start", in case "src_start" falls in a non-map-aligned
11601 	 * map entry *before* the map entry that contains "src_addr"...
11602 	 */
11603 	if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry)) {
11604 		RETURN(KERN_INVALID_ADDRESS);
11605 	}
11606 	if (!tmp_entry->is_sub_map) {
11607 		/*
11608 		 * ... but clip to the map-rounded "src_start" rather than
11609 		 * "src_addr" to preserve map-alignment.  We'll adjust the
11610 		 * first copy entry at the end, if needed.
11611 		 */
11612 		vm_map_clip_start(src_map, tmp_entry, src_start);
11613 	}
11614 	if (src_start < tmp_entry->vme_start) {
11615 		/*
11616 		 * Move "src_start" up to the start of the
11617 		 * first map entry to copy.
11618 		 */
11619 		src_start = tmp_entry->vme_start;
11620 	}
11621 	/* set for later submap fix-up */
11622 	copy_addr = src_start;
11623 
11624 	/*
11625 	 *	Go through entries until we get to the end.
11626 	 */
11627 
11628 	while (TRUE) {
11629 		vm_map_entry_t  src_entry = tmp_entry;  /* Top-level entry */
11630 		vm_map_size_t   src_size;               /* Size of source
11631 		                                         * map entry (in both
11632 		                                         * maps)
11633 		                                         */
11634 
11635 		vm_object_t             src_object;     /* Object to copy */
11636 		vm_object_offset_t      src_offset;
11637 
11638 		boolean_t       src_needs_copy;         /* Should source map
11639 		                                         * be made read-only
11640 		                                         * for copy-on-write?
11641 		                                         */
11642 
11643 		boolean_t       new_entry_needs_copy;   /* Will new entry be COW? */
11644 
11645 		boolean_t       was_wired;              /* Was source wired? */
11646 		vm_map_version_t version;               /* Version before locks
11647 		                                         * dropped to make copy
11648 		                                         */
11649 		kern_return_t   result;                 /* Return value from
11650 		                                         * copy_strategically.
11651 		                                         */
11652 		while (tmp_entry->is_sub_map) {
11653 			vm_map_size_t submap_len;
11654 			submap_map_t *ptr;
11655 
11656 			ptr = kalloc_type(submap_map_t, Z_WAITOK);
11657 			ptr->next = parent_maps;
11658 			parent_maps = ptr;
11659 			ptr->parent_map = src_map;
11660 			ptr->base_start = src_start;
11661 			ptr->base_end = src_end;
11662 			submap_len = tmp_entry->vme_end - src_start;
11663 			if (submap_len > (src_end - src_start)) {
11664 				submap_len = src_end - src_start;
11665 			}
11666 			ptr->base_len = submap_len;
11667 
11668 			src_start -= tmp_entry->vme_start;
11669 			src_start += VME_OFFSET(tmp_entry);
11670 			src_end = src_start + submap_len;
11671 			src_map = VME_SUBMAP(tmp_entry);
11672 			vm_map_lock(src_map);
11673 			/* keep an outstanding reference for all maps in */
11674 			/* the parents tree except the base map */
11675 			vm_map_reference(src_map);
11676 			vm_map_unlock(ptr->parent_map);
11677 			if (!vm_map_lookup_entry(
11678 				    src_map, src_start, &tmp_entry)) {
11679 				RETURN(KERN_INVALID_ADDRESS);
11680 			}
11681 			map_share = TRUE;
11682 			if (!tmp_entry->is_sub_map) {
11683 				vm_map_clip_start(src_map, tmp_entry, src_start);
11684 			}
11685 			src_entry = tmp_entry;
11686 		}
11687 		/* we are now in the lowest level submap... */
11688 
11689 		if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
11690 		    (VME_OBJECT(tmp_entry)->phys_contiguous)) {
11691 			/* This is not, supported for now.In future */
11692 			/* we will need to detect the phys_contig   */
11693 			/* condition and then upgrade copy_slowly   */
11694 			/* to do physical copy from the device mem  */
11695 			/* based object. We can piggy-back off of   */
11696 			/* the was wired boolean to set-up the      */
11697 			/* proper handling */
11698 			RETURN(KERN_PROTECTION_FAILURE);
11699 		}
11700 		/*
11701 		 *	Create a new address map entry to hold the result.
11702 		 *	Fill in the fields from the appropriate source entries.
11703 		 *	We must unlock the source map to do this if we need
11704 		 *	to allocate a map entry.
11705 		 */
11706 		if (new_entry == VM_MAP_ENTRY_NULL) {
11707 			version.main_timestamp = src_map->timestamp;
11708 			vm_map_unlock(src_map);
11709 
11710 			new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11711 
11712 			vm_map_lock(src_map);
11713 			if ((version.main_timestamp + 1) != src_map->timestamp) {
11714 				if (!vm_map_lookup_entry(src_map, src_start,
11715 				    &tmp_entry)) {
11716 					RETURN(KERN_INVALID_ADDRESS);
11717 				}
11718 				if (!tmp_entry->is_sub_map) {
11719 					vm_map_clip_start(src_map, tmp_entry, src_start);
11720 				}
11721 				continue; /* restart w/ new tmp_entry */
11722 			}
11723 		}
11724 
11725 		/*
11726 		 *	Verify that the region can be read.
11727 		 */
11728 		if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
11729 		    !use_maxprot) ||
11730 		    (src_entry->max_protection & VM_PROT_READ) == 0) {
11731 			RETURN(KERN_PROTECTION_FAILURE);
11732 		}
11733 
11734 		/*
11735 		 *	Clip against the endpoints of the entire region.
11736 		 */
11737 
11738 		vm_map_clip_end(src_map, src_entry, src_end);
11739 
11740 		src_size = src_entry->vme_end - src_start;
11741 		src_object = VME_OBJECT(src_entry);
11742 		src_offset = VME_OFFSET(src_entry);
11743 		was_wired = (src_entry->wired_count != 0);
11744 
11745 		vm_map_entry_copy(src_map, new_entry, src_entry);
11746 		if (new_entry->is_sub_map) {
11747 			/* clr address space specifics */
11748 			new_entry->use_pmap = FALSE;
11749 		} else {
11750 			/*
11751 			 * We're dealing with a copy-on-write operation,
11752 			 * so the resulting mapping should not inherit the
11753 			 * original mapping's accounting settings.
11754 			 * "iokit_acct" should have been cleared in
11755 			 * vm_map_entry_copy().
11756 			 * "use_pmap" should be reset to its default (TRUE)
11757 			 * so that the new mapping gets accounted for in
11758 			 * the task's memory footprint.
11759 			 */
11760 			assert(!new_entry->iokit_acct);
11761 			new_entry->use_pmap = TRUE;
11762 		}
11763 
11764 		/*
11765 		 *	Attempt non-blocking copy-on-write optimizations.
11766 		 */
11767 
11768 		/*
11769 		 * If we are destroying the source, and the object
11770 		 * is internal, we could move the object reference
11771 		 * from the source to the copy.  The copy is
11772 		 * copy-on-write only if the source is.
11773 		 * We make another reference to the object, because
11774 		 * destroying the source entry will deallocate it.
11775 		 *
11776 		 * This memory transfer has to be atomic, (to prevent
11777 		 * the VM object from being shared or copied while
11778 		 * it's being moved here), so we could only do this
11779 		 * if we won't have to unlock the VM map until the
11780 		 * original mapping has been fully removed.
11781 		 */
11782 
11783 RestartCopy:
11784 		if ((src_object == VM_OBJECT_NULL ||
11785 		    (!was_wired && !map_share && !tmp_entry->is_shared
11786 		    && !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT))) &&
11787 		    vm_object_copy_quickly(
11788 			    VME_OBJECT_PTR(new_entry),
11789 			    src_offset,
11790 			    src_size,
11791 			    &src_needs_copy,
11792 			    &new_entry_needs_copy)) {
11793 			new_entry->needs_copy = new_entry_needs_copy;
11794 
11795 			/*
11796 			 *	Handle copy-on-write obligations
11797 			 */
11798 
11799 			if (src_needs_copy && !tmp_entry->needs_copy) {
11800 				vm_prot_t prot;
11801 
11802 				prot = src_entry->protection & ~VM_PROT_WRITE;
11803 
11804 				if (override_nx(src_map, VME_ALIAS(src_entry))
11805 				    && prot) {
11806 					prot |= VM_PROT_EXECUTE;
11807 				}
11808 
11809 				vm_object_pmap_protect(
11810 					src_object,
11811 					src_offset,
11812 					src_size,
11813 					(src_entry->is_shared ?
11814 					PMAP_NULL
11815 					: src_map->pmap),
11816 					VM_MAP_PAGE_SIZE(src_map),
11817 					src_entry->vme_start,
11818 					prot);
11819 
11820 				assert(tmp_entry->wired_count == 0);
11821 				tmp_entry->needs_copy = TRUE;
11822 			}
11823 
11824 			/*
11825 			 *	The map has never been unlocked, so it's safe
11826 			 *	to move to the next entry rather than doing
11827 			 *	another lookup.
11828 			 */
11829 
11830 			goto CopySuccessful;
11831 		}
11832 
11833 		entry_was_shared = tmp_entry->is_shared;
11834 
11835 		/*
11836 		 *	Take an object reference, so that we may
11837 		 *	release the map lock(s).
11838 		 */
11839 
11840 		assert(src_object != VM_OBJECT_NULL);
11841 		vm_object_reference(src_object);
11842 
11843 		/*
11844 		 *	Record the timestamp for later verification.
11845 		 *	Unlock the map.
11846 		 */
11847 
11848 		version.main_timestamp = src_map->timestamp;
11849 		vm_map_unlock(src_map); /* Increments timestamp once! */
11850 		saved_src_entry = src_entry;
11851 		tmp_entry = VM_MAP_ENTRY_NULL;
11852 		src_entry = VM_MAP_ENTRY_NULL;
11853 
11854 		/*
11855 		 *	Perform the copy
11856 		 */
11857 
11858 		if (was_wired ||
11859 		    (debug4k_no_cow_copyin &&
11860 		    VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT)) {
11861 CopySlowly:
11862 			vm_object_lock(src_object);
11863 			result = vm_object_copy_slowly(
11864 				src_object,
11865 				src_offset,
11866 				src_size,
11867 				THREAD_UNINT,
11868 				VME_OBJECT_PTR(new_entry));
11869 			VME_OFFSET_SET(new_entry,
11870 			    src_offset - vm_object_trunc_page(src_offset));
11871 			new_entry->needs_copy = FALSE;
11872 		} else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11873 		    (entry_was_shared || map_share)) {
11874 			vm_object_t new_object;
11875 
11876 			vm_object_lock_shared(src_object);
11877 			new_object = vm_object_copy_delayed(
11878 				src_object,
11879 				src_offset,
11880 				src_size,
11881 				TRUE);
11882 			if (new_object == VM_OBJECT_NULL) {
11883 				goto CopySlowly;
11884 			}
11885 
11886 			VME_OBJECT_SET(new_entry, new_object);
11887 			assert(new_entry->wired_count == 0);
11888 			new_entry->needs_copy = TRUE;
11889 			assert(!new_entry->iokit_acct);
11890 			assert(new_object->purgable == VM_PURGABLE_DENY);
11891 			assertf(new_entry->use_pmap, "src_map %p new_entry %p\n", src_map, new_entry);
11892 			result = KERN_SUCCESS;
11893 		} else {
11894 			vm_object_offset_t new_offset;
11895 			new_offset = VME_OFFSET(new_entry);
11896 			result = vm_object_copy_strategically(src_object,
11897 			    src_offset,
11898 			    src_size,
11899 			    VME_OBJECT_PTR(new_entry),
11900 			    &new_offset,
11901 			    &new_entry_needs_copy);
11902 			if (new_offset != VME_OFFSET(new_entry)) {
11903 				VME_OFFSET_SET(new_entry, new_offset);
11904 			}
11905 
11906 			new_entry->needs_copy = new_entry_needs_copy;
11907 		}
11908 
11909 		if (result == KERN_SUCCESS &&
11910 		    ((preserve_purgeable &&
11911 		    src_object->purgable != VM_PURGABLE_DENY) ||
11912 		    new_entry->used_for_jit)) {
11913 			/*
11914 			 * Purgeable objects should be COPY_NONE, true share;
11915 			 * this should be propogated to the copy.
11916 			 *
11917 			 * Also force mappings the pmap specially protects to
11918 			 * be COPY_NONE; trying to COW these mappings would
11919 			 * change the effective protections, which could have
11920 			 * side effects if the pmap layer relies on the
11921 			 * specified protections.
11922 			 */
11923 
11924 			vm_object_t     new_object;
11925 
11926 			new_object = VME_OBJECT(new_entry);
11927 			assert(new_object != src_object);
11928 			vm_object_lock(new_object);
11929 			assert(new_object->ref_count == 1);
11930 			assert(new_object->shadow == VM_OBJECT_NULL);
11931 			assert(new_object->copy == VM_OBJECT_NULL);
11932 			assert(new_object->vo_owner == NULL);
11933 
11934 			new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
11935 
11936 			if (preserve_purgeable &&
11937 			    src_object->purgable != VM_PURGABLE_DENY) {
11938 				new_object->true_share = TRUE;
11939 
11940 				/* start as non-volatile with no owner... */
11941 				new_object->purgable = VM_PURGABLE_NONVOLATILE;
11942 				vm_purgeable_nonvolatile_enqueue(new_object, NULL);
11943 				/* ... and move to src_object's purgeable state */
11944 				if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
11945 					int state;
11946 					state = src_object->purgable;
11947 					vm_object_purgable_control(
11948 						new_object,
11949 						VM_PURGABLE_SET_STATE_FROM_KERNEL,
11950 						&state);
11951 				}
11952 				/* no pmap accounting for purgeable objects */
11953 				new_entry->use_pmap = FALSE;
11954 			}
11955 
11956 			vm_object_unlock(new_object);
11957 			new_object = VM_OBJECT_NULL;
11958 		}
11959 
11960 		if (result != KERN_SUCCESS &&
11961 		    result != KERN_MEMORY_RESTART_COPY) {
11962 			vm_map_lock(src_map);
11963 			RETURN(result);
11964 		}
11965 
11966 		/*
11967 		 *	Throw away the extra reference
11968 		 */
11969 
11970 		vm_object_deallocate(src_object);
11971 
11972 		/*
11973 		 *	Verify that the map has not substantially
11974 		 *	changed while the copy was being made.
11975 		 */
11976 
11977 		vm_map_lock(src_map);
11978 
11979 		if ((version.main_timestamp + 1) == src_map->timestamp) {
11980 			/* src_map hasn't changed: src_entry is still valid */
11981 			src_entry = saved_src_entry;
11982 			goto VerificationSuccessful;
11983 		}
11984 
11985 		/*
11986 		 *	Simple version comparison failed.
11987 		 *
11988 		 *	Retry the lookup and verify that the
11989 		 *	same object/offset are still present.
11990 		 *
11991 		 *	[Note: a memory manager that colludes with
11992 		 *	the calling task can detect that we have
11993 		 *	cheated.  While the map was unlocked, the
11994 		 *	mapping could have been changed and restored.]
11995 		 */
11996 
11997 		if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
11998 			if (result != KERN_MEMORY_RESTART_COPY) {
11999 				vm_object_deallocate(VME_OBJECT(new_entry));
12000 				VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
12001 				/* reset accounting state */
12002 				new_entry->iokit_acct = FALSE;
12003 				new_entry->use_pmap = TRUE;
12004 			}
12005 			RETURN(KERN_INVALID_ADDRESS);
12006 		}
12007 
12008 		src_entry = tmp_entry;
12009 		vm_map_clip_start(src_map, src_entry, src_start);
12010 
12011 		if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
12012 		    !use_maxprot) ||
12013 		    ((src_entry->max_protection & VM_PROT_READ) == 0)) {
12014 			goto VerificationFailed;
12015 		}
12016 
12017 		if (src_entry->vme_end < new_entry->vme_end) {
12018 			/*
12019 			 * This entry might have been shortened
12020 			 * (vm_map_clip_end) or been replaced with
12021 			 * an entry that ends closer to "src_start"
12022 			 * than before.
12023 			 * Adjust "new_entry" accordingly; copying
12024 			 * less memory would be correct but we also
12025 			 * redo the copy (see below) if the new entry
12026 			 * no longer points at the same object/offset.
12027 			 */
12028 			assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
12029 			    VM_MAP_COPY_PAGE_MASK(copy)));
12030 			new_entry->vme_end = src_entry->vme_end;
12031 			src_size = new_entry->vme_end - src_start;
12032 		} else if (src_entry->vme_end > new_entry->vme_end) {
12033 			/*
12034 			 * This entry might have been extended
12035 			 * (vm_map_entry_simplify() or coalesce)
12036 			 * or been replaced with an entry that ends farther
12037 			 * from "src_start" than before.
12038 			 *
12039 			 * We've called vm_object_copy_*() only on
12040 			 * the previous <start:end> range, so we can't
12041 			 * just extend new_entry.  We have to re-do
12042 			 * the copy based on the new entry as if it was
12043 			 * pointing at a different object/offset (see
12044 			 * "Verification failed" below).
12045 			 */
12046 		}
12047 
12048 		if ((VME_OBJECT(src_entry) != src_object) ||
12049 		    (VME_OFFSET(src_entry) != src_offset) ||
12050 		    (src_entry->vme_end > new_entry->vme_end)) {
12051 			/*
12052 			 *	Verification failed.
12053 			 *
12054 			 *	Start over with this top-level entry.
12055 			 */
12056 
12057 VerificationFailed:     ;
12058 
12059 			vm_object_deallocate(VME_OBJECT(new_entry));
12060 			tmp_entry = src_entry;
12061 			continue;
12062 		}
12063 
12064 		/*
12065 		 *	Verification succeeded.
12066 		 */
12067 
12068 VerificationSuccessful:;
12069 
12070 		if (result == KERN_MEMORY_RESTART_COPY) {
12071 			goto RestartCopy;
12072 		}
12073 
12074 		/*
12075 		 *	Copy succeeded.
12076 		 */
12077 
12078 CopySuccessful: ;
12079 
12080 		/*
12081 		 *	Link in the new copy entry.
12082 		 */
12083 
12084 		vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
12085 		    new_entry);
12086 
12087 		/*
12088 		 *	Determine whether the entire region
12089 		 *	has been copied.
12090 		 */
12091 		src_base = src_start;
12092 		src_start = new_entry->vme_end;
12093 		new_entry = VM_MAP_ENTRY_NULL;
12094 		while ((src_start >= src_end) && (src_end != 0)) {
12095 			submap_map_t    *ptr;
12096 
12097 			if (src_map == base_map) {
12098 				/* back to the top */
12099 				break;
12100 			}
12101 
12102 			ptr = parent_maps;
12103 			assert(ptr != NULL);
12104 			parent_maps = parent_maps->next;
12105 
12106 			/* fix up the damage we did in that submap */
12107 			vm_map_simplify_range(src_map,
12108 			    src_base,
12109 			    src_end);
12110 
12111 			vm_map_unlock(src_map);
12112 			vm_map_deallocate(src_map);
12113 			vm_map_lock(ptr->parent_map);
12114 			src_map = ptr->parent_map;
12115 			src_base = ptr->base_start;
12116 			src_start = ptr->base_start + ptr->base_len;
12117 			src_end = ptr->base_end;
12118 			if (!vm_map_lookup_entry(src_map,
12119 			    src_start,
12120 			    &tmp_entry) &&
12121 			    (src_end > src_start)) {
12122 				RETURN(KERN_INVALID_ADDRESS);
12123 			}
12124 			kfree_type(submap_map_t, ptr);
12125 			if (parent_maps == NULL) {
12126 				map_share = FALSE;
12127 			}
12128 			src_entry = tmp_entry->vme_prev;
12129 		}
12130 
12131 		if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
12132 		    (src_start >= src_addr + len) &&
12133 		    (src_addr + len != 0)) {
12134 			/*
12135 			 * Stop copying now, even though we haven't reached
12136 			 * "src_end".  We'll adjust the end of the last copy
12137 			 * entry at the end, if needed.
12138 			 *
12139 			 * If src_map's aligment is different from the
12140 			 * system's page-alignment, there could be
12141 			 * extra non-map-aligned map entries between
12142 			 * the original (non-rounded) "src_addr + len"
12143 			 * and the rounded "src_end".
12144 			 * We do not want to copy those map entries since
12145 			 * they're not part of the copied range.
12146 			 */
12147 			break;
12148 		}
12149 
12150 		if ((src_start >= src_end) && (src_end != 0)) {
12151 			break;
12152 		}
12153 
12154 		/*
12155 		 *	Verify that there are no gaps in the region
12156 		 */
12157 
12158 		tmp_entry = src_entry->vme_next;
12159 		if ((tmp_entry->vme_start != src_start) ||
12160 		    (tmp_entry == vm_map_to_entry(src_map))) {
12161 			RETURN(KERN_INVALID_ADDRESS);
12162 		}
12163 	}
12164 
12165 	/*
12166 	 * If the source should be destroyed, do it now, since the
12167 	 * copy was successful.
12168 	 */
12169 	if (src_destroy) {
12170 		(void) vm_map_delete(
12171 			src_map,
12172 			vm_map_trunc_page(src_addr,
12173 			VM_MAP_PAGE_MASK(src_map)),
12174 			src_end,
12175 			((src_map == kernel_map) ?
12176 			VM_MAP_REMOVE_KUNWIRE :
12177 			VM_MAP_REMOVE_NO_FLAGS),
12178 			VM_MAP_NULL);
12179 	} else {
12180 		/* fix up the damage we did in the base map */
12181 		vm_map_simplify_range(
12182 			src_map,
12183 			vm_map_trunc_page(src_addr,
12184 			VM_MAP_PAGE_MASK(src_map)),
12185 			vm_map_round_page(src_end,
12186 			VM_MAP_PAGE_MASK(src_map)));
12187 	}
12188 
12189 	vm_map_unlock(src_map);
12190 	tmp_entry = VM_MAP_ENTRY_NULL;
12191 
12192 	if (VM_MAP_PAGE_SHIFT(src_map) > PAGE_SHIFT &&
12193 	    VM_MAP_PAGE_SHIFT(src_map) != VM_MAP_COPY_PAGE_SHIFT(copy)) {
12194 		vm_map_offset_t original_start, original_offset, original_end;
12195 
12196 		assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
12197 
12198 		/* adjust alignment of first copy_entry's "vme_start" */
12199 		tmp_entry = vm_map_copy_first_entry(copy);
12200 		if (tmp_entry != vm_map_copy_to_entry(copy)) {
12201 			vm_map_offset_t adjustment;
12202 
12203 			original_start = tmp_entry->vme_start;
12204 			original_offset = VME_OFFSET(tmp_entry);
12205 
12206 			/* map-align the start of the first copy entry... */
12207 			adjustment = (tmp_entry->vme_start -
12208 			    vm_map_trunc_page(
12209 				    tmp_entry->vme_start,
12210 				    VM_MAP_PAGE_MASK(src_map)));
12211 			tmp_entry->vme_start -= adjustment;
12212 			VME_OFFSET_SET(tmp_entry,
12213 			    VME_OFFSET(tmp_entry) - adjustment);
12214 			copy_addr -= adjustment;
12215 			assert(tmp_entry->vme_start < tmp_entry->vme_end);
12216 			/* ... adjust for mis-aligned start of copy range */
12217 			adjustment =
12218 			    (vm_map_trunc_page(copy->offset,
12219 			    PAGE_MASK) -
12220 			    vm_map_trunc_page(copy->offset,
12221 			    VM_MAP_PAGE_MASK(src_map)));
12222 			if (adjustment) {
12223 				assert(page_aligned(adjustment));
12224 				assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
12225 				tmp_entry->vme_start += adjustment;
12226 				VME_OFFSET_SET(tmp_entry,
12227 				    (VME_OFFSET(tmp_entry) +
12228 				    adjustment));
12229 				copy_addr += adjustment;
12230 				assert(tmp_entry->vme_start < tmp_entry->vme_end);
12231 			}
12232 
12233 			/*
12234 			 * Assert that the adjustments haven't exposed
12235 			 * more than was originally copied...
12236 			 */
12237 			assert(tmp_entry->vme_start >= original_start);
12238 			assert(VME_OFFSET(tmp_entry) >= original_offset);
12239 			/*
12240 			 * ... and that it did not adjust outside of a
12241 			 * a single 16K page.
12242 			 */
12243 			assert(vm_map_trunc_page(tmp_entry->vme_start,
12244 			    VM_MAP_PAGE_MASK(src_map)) ==
12245 			    vm_map_trunc_page(original_start,
12246 			    VM_MAP_PAGE_MASK(src_map)));
12247 		}
12248 
12249 		/* adjust alignment of last copy_entry's "vme_end" */
12250 		tmp_entry = vm_map_copy_last_entry(copy);
12251 		if (tmp_entry != vm_map_copy_to_entry(copy)) {
12252 			vm_map_offset_t adjustment;
12253 
12254 			original_end = tmp_entry->vme_end;
12255 
12256 			/* map-align the end of the last copy entry... */
12257 			tmp_entry->vme_end =
12258 			    vm_map_round_page(tmp_entry->vme_end,
12259 			    VM_MAP_PAGE_MASK(src_map));
12260 			/* ... adjust for mis-aligned end of copy range */
12261 			adjustment =
12262 			    (vm_map_round_page((copy->offset +
12263 			    copy->size),
12264 			    VM_MAP_PAGE_MASK(src_map)) -
12265 			    vm_map_round_page((copy->offset +
12266 			    copy->size),
12267 			    PAGE_MASK));
12268 			if (adjustment) {
12269 				assert(page_aligned(adjustment));
12270 				assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
12271 				tmp_entry->vme_end -= adjustment;
12272 				assert(tmp_entry->vme_start < tmp_entry->vme_end);
12273 			}
12274 
12275 			/*
12276 			 * Assert that the adjustments haven't exposed
12277 			 * more than was originally copied...
12278 			 */
12279 			assert(tmp_entry->vme_end <= original_end);
12280 			/*
12281 			 * ... and that it did not adjust outside of a
12282 			 * a single 16K page.
12283 			 */
12284 			assert(vm_map_round_page(tmp_entry->vme_end,
12285 			    VM_MAP_PAGE_MASK(src_map)) ==
12286 			    vm_map_round_page(original_end,
12287 			    VM_MAP_PAGE_MASK(src_map)));
12288 		}
12289 	}
12290 
12291 	/* Fix-up start and end points in copy.  This is necessary */
12292 	/* when the various entries in the copy object were picked */
12293 	/* up from different sub-maps */
12294 
12295 	tmp_entry = vm_map_copy_first_entry(copy);
12296 	copy_size = 0; /* compute actual size */
12297 	while (tmp_entry != vm_map_copy_to_entry(copy)) {
12298 		assert(VM_MAP_PAGE_ALIGNED(
12299 			    copy_addr + (tmp_entry->vme_end -
12300 			    tmp_entry->vme_start),
12301 			    MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
12302 		assert(VM_MAP_PAGE_ALIGNED(
12303 			    copy_addr,
12304 			    MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
12305 
12306 		/*
12307 		 * The copy_entries will be injected directly into the
12308 		 * destination map and might not be "map aligned" there...
12309 		 */
12310 		tmp_entry->map_aligned = FALSE;
12311 
12312 		tmp_entry->vme_end = copy_addr +
12313 		    (tmp_entry->vme_end - tmp_entry->vme_start);
12314 		tmp_entry->vme_start = copy_addr;
12315 		assert(tmp_entry->vme_start < tmp_entry->vme_end);
12316 		copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
12317 		copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
12318 		tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
12319 	}
12320 
12321 	if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
12322 	    copy_size < copy->size) {
12323 		/*
12324 		 * The actual size of the VM map copy is smaller than what
12325 		 * was requested by the caller.  This must be because some
12326 		 * PAGE_SIZE-sized pages are missing at the end of the last
12327 		 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
12328 		 * The caller might not have been aware of those missing
12329 		 * pages and might not want to be aware of it, which is
12330 		 * fine as long as they don't try to access (and crash on)
12331 		 * those missing pages.
12332 		 * Let's adjust the size of the "copy", to avoid failing
12333 		 * in vm_map_copyout() or vm_map_copy_overwrite().
12334 		 */
12335 		assert(vm_map_round_page(copy_size,
12336 		    VM_MAP_PAGE_MASK(src_map)) ==
12337 		    vm_map_round_page(copy->size,
12338 		    VM_MAP_PAGE_MASK(src_map)));
12339 		copy->size = copy_size;
12340 	}
12341 
12342 	*copy_result = copy;
12343 	return KERN_SUCCESS;
12344 
12345 #undef  RETURN
12346 }
12347 
12348 kern_return_t
vm_map_copy_extract(vm_map_t src_map,vm_map_address_t src_addr,vm_map_size_t len,boolean_t do_copy,vm_map_copy_t * copy_result,vm_prot_t * cur_prot,vm_prot_t * max_prot,vm_inherit_t inheritance,vm_map_kernel_flags_t vmk_flags)12349 vm_map_copy_extract(
12350 	vm_map_t                src_map,
12351 	vm_map_address_t        src_addr,
12352 	vm_map_size_t           len,
12353 	boolean_t               do_copy,
12354 	vm_map_copy_t           *copy_result,   /* OUT */
12355 	vm_prot_t               *cur_prot,      /* IN/OUT */
12356 	vm_prot_t               *max_prot,      /* IN/OUT */
12357 	vm_inherit_t            inheritance,
12358 	vm_map_kernel_flags_t   vmk_flags)
12359 {
12360 	vm_map_copy_t   copy;
12361 	kern_return_t   kr;
12362 	vm_prot_t required_cur_prot, required_max_prot;
12363 
12364 	/*
12365 	 *	Check for copies of zero bytes.
12366 	 */
12367 
12368 	if (len == 0) {
12369 		*copy_result = VM_MAP_COPY_NULL;
12370 		return KERN_SUCCESS;
12371 	}
12372 
12373 	/*
12374 	 *	Check that the end address doesn't overflow
12375 	 */
12376 	if (src_addr + len < src_addr) {
12377 		return KERN_INVALID_ADDRESS;
12378 	}
12379 
12380 	if (VM_MAP_PAGE_SIZE(src_map) < PAGE_SIZE) {
12381 		DEBUG4K_SHARE("src_map %p src_addr 0x%llx src_end 0x%llx\n", src_map, (uint64_t)src_addr, (uint64_t)(src_addr + len));
12382 	}
12383 
12384 	required_cur_prot = *cur_prot;
12385 	required_max_prot = *max_prot;
12386 
12387 	/*
12388 	 *	Allocate a header element for the list.
12389 	 *
12390 	 *	Use the start and end in the header to
12391 	 *	remember the endpoints prior to rounding.
12392 	 */
12393 
12394 	copy = vm_map_copy_allocate();
12395 	copy->type = VM_MAP_COPY_ENTRY_LIST;
12396 	copy->cpy_hdr.entries_pageable = vmk_flags.vmkf_copy_pageable;
12397 
12398 	vm_map_store_init(&copy->cpy_hdr);
12399 
12400 	copy->offset = 0;
12401 	copy->size = len;
12402 
12403 	kr = vm_map_remap_extract(src_map,
12404 	    src_addr,
12405 	    len,
12406 	    do_copy,             /* copy */
12407 	    &copy->cpy_hdr,
12408 	    cur_prot,            /* IN/OUT */
12409 	    max_prot,            /* IN/OUT */
12410 	    inheritance,
12411 	    vmk_flags);
12412 	if (kr != KERN_SUCCESS) {
12413 		vm_map_copy_discard(copy);
12414 		return kr;
12415 	}
12416 	if (required_cur_prot != VM_PROT_NONE) {
12417 		assert((*cur_prot & required_cur_prot) == required_cur_prot);
12418 		assert((*max_prot & required_max_prot) == required_max_prot);
12419 	}
12420 
12421 	*copy_result = copy;
12422 	return KERN_SUCCESS;
12423 }
12424 
12425 /*
12426  *	vm_map_copyin_object:
12427  *
12428  *	Create a copy object from an object.
12429  *	Our caller donates an object reference.
12430  */
12431 
12432 kern_return_t
vm_map_copyin_object(vm_object_t object,vm_object_offset_t offset,vm_object_size_t size,vm_map_copy_t * copy_result)12433 vm_map_copyin_object(
12434 	vm_object_t             object,
12435 	vm_object_offset_t      offset, /* offset of region in object */
12436 	vm_object_size_t        size,   /* size of region in object */
12437 	vm_map_copy_t   *copy_result)   /* OUT */
12438 {
12439 	vm_map_copy_t   copy;           /* Resulting copy */
12440 
12441 	/*
12442 	 *	We drop the object into a special copy object
12443 	 *	that contains the object directly.
12444 	 */
12445 
12446 	copy = vm_map_copy_allocate();
12447 	copy->type = VM_MAP_COPY_OBJECT;
12448 	copy->cpy_object = object;
12449 	copy->offset = offset;
12450 	copy->size = size;
12451 
12452 	*copy_result = copy;
12453 	return KERN_SUCCESS;
12454 }
12455 
12456 static void
vm_map_fork_share(vm_map_t old_map,vm_map_entry_t old_entry,vm_map_t new_map)12457 vm_map_fork_share(
12458 	vm_map_t        old_map,
12459 	vm_map_entry_t  old_entry,
12460 	vm_map_t        new_map)
12461 {
12462 	vm_object_t     object;
12463 	vm_map_entry_t  new_entry;
12464 
12465 	/*
12466 	 *	New sharing code.  New map entry
12467 	 *	references original object.  Internal
12468 	 *	objects use asynchronous copy algorithm for
12469 	 *	future copies.  First make sure we have
12470 	 *	the right object.  If we need a shadow,
12471 	 *	or someone else already has one, then
12472 	 *	make a new shadow and share it.
12473 	 */
12474 
12475 	object = VME_OBJECT(old_entry);
12476 	if (old_entry->is_sub_map) {
12477 		assert(old_entry->wired_count == 0);
12478 #ifndef NO_NESTED_PMAP
12479 		if (old_entry->use_pmap) {
12480 			kern_return_t   result;
12481 
12482 			result = pmap_nest(new_map->pmap,
12483 			    (VME_SUBMAP(old_entry))->pmap,
12484 			    (addr64_t)old_entry->vme_start,
12485 			    (uint64_t)(old_entry->vme_end - old_entry->vme_start));
12486 			if (result) {
12487 				panic("vm_map_fork_share: pmap_nest failed!");
12488 			}
12489 		}
12490 #endif  /* NO_NESTED_PMAP */
12491 	} else if (object == VM_OBJECT_NULL) {
12492 		object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
12493 		    old_entry->vme_start));
12494 		VME_OFFSET_SET(old_entry, 0);
12495 		VME_OBJECT_SET(old_entry, object);
12496 		old_entry->use_pmap = TRUE;
12497 //		assert(!old_entry->needs_copy);
12498 	} else if (object->copy_strategy !=
12499 	    MEMORY_OBJECT_COPY_SYMMETRIC) {
12500 		/*
12501 		 *	We are already using an asymmetric
12502 		 *	copy, and therefore we already have
12503 		 *	the right object.
12504 		 */
12505 
12506 		assert(!old_entry->needs_copy);
12507 	} else if (old_entry->needs_copy ||       /* case 1 */
12508 	    object->shadowed ||                 /* case 2 */
12509 	    (!object->true_share &&             /* case 3 */
12510 	    !old_entry->is_shared &&
12511 	    (object->vo_size >
12512 	    (vm_map_size_t)(old_entry->vme_end -
12513 	    old_entry->vme_start)))) {
12514 		/*
12515 		 *	We need to create a shadow.
12516 		 *	There are three cases here.
12517 		 *	In the first case, we need to
12518 		 *	complete a deferred symmetrical
12519 		 *	copy that we participated in.
12520 		 *	In the second and third cases,
12521 		 *	we need to create the shadow so
12522 		 *	that changes that we make to the
12523 		 *	object do not interfere with
12524 		 *	any symmetrical copies which
12525 		 *	have occured (case 2) or which
12526 		 *	might occur (case 3).
12527 		 *
12528 		 *	The first case is when we had
12529 		 *	deferred shadow object creation
12530 		 *	via the entry->needs_copy mechanism.
12531 		 *	This mechanism only works when
12532 		 *	only one entry points to the source
12533 		 *	object, and we are about to create
12534 		 *	a second entry pointing to the
12535 		 *	same object. The problem is that
12536 		 *	there is no way of mapping from
12537 		 *	an object to the entries pointing
12538 		 *	to it. (Deferred shadow creation
12539 		 *	works with one entry because occurs
12540 		 *	at fault time, and we walk from the
12541 		 *	entry to the object when handling
12542 		 *	the fault.)
12543 		 *
12544 		 *	The second case is when the object
12545 		 *	to be shared has already been copied
12546 		 *	with a symmetric copy, but we point
12547 		 *	directly to the object without
12548 		 *	needs_copy set in our entry. (This
12549 		 *	can happen because different ranges
12550 		 *	of an object can be pointed to by
12551 		 *	different entries. In particular,
12552 		 *	a single entry pointing to an object
12553 		 *	can be split by a call to vm_inherit,
12554 		 *	which, combined with task_create, can
12555 		 *	result in the different entries
12556 		 *	having different needs_copy values.)
12557 		 *	The shadowed flag in the object allows
12558 		 *	us to detect this case. The problem
12559 		 *	with this case is that if this object
12560 		 *	has or will have shadows, then we
12561 		 *	must not perform an asymmetric copy
12562 		 *	of this object, since such a copy
12563 		 *	allows the object to be changed, which
12564 		 *	will break the previous symmetrical
12565 		 *	copies (which rely upon the object
12566 		 *	not changing). In a sense, the shadowed
12567 		 *	flag says "don't change this object".
12568 		 *	We fix this by creating a shadow
12569 		 *	object for this object, and sharing
12570 		 *	that. This works because we are free
12571 		 *	to change the shadow object (and thus
12572 		 *	to use an asymmetric copy strategy);
12573 		 *	this is also semantically correct,
12574 		 *	since this object is temporary, and
12575 		 *	therefore a copy of the object is
12576 		 *	as good as the object itself. (This
12577 		 *	is not true for permanent objects,
12578 		 *	since the pager needs to see changes,
12579 		 *	which won't happen if the changes
12580 		 *	are made to a copy.)
12581 		 *
12582 		 *	The third case is when the object
12583 		 *	to be shared has parts sticking
12584 		 *	outside of the entry we're working
12585 		 *	with, and thus may in the future
12586 		 *	be subject to a symmetrical copy.
12587 		 *	(This is a preemptive version of
12588 		 *	case 2.)
12589 		 */
12590 		VME_OBJECT_SHADOW(old_entry,
12591 		    (vm_map_size_t) (old_entry->vme_end -
12592 		    old_entry->vme_start));
12593 
12594 		/*
12595 		 *	If we're making a shadow for other than
12596 		 *	copy on write reasons, then we have
12597 		 *	to remove write permission.
12598 		 */
12599 
12600 		if (!old_entry->needs_copy &&
12601 		    (old_entry->protection & VM_PROT_WRITE)) {
12602 			vm_prot_t prot;
12603 
12604 			assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
12605 
12606 			prot = old_entry->protection & ~VM_PROT_WRITE;
12607 
12608 			assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
12609 
12610 			if (override_nx(old_map, VME_ALIAS(old_entry)) && prot) {
12611 				prot |= VM_PROT_EXECUTE;
12612 			}
12613 
12614 
12615 			if (old_map->mapped_in_other_pmaps) {
12616 				vm_object_pmap_protect(
12617 					VME_OBJECT(old_entry),
12618 					VME_OFFSET(old_entry),
12619 					(old_entry->vme_end -
12620 					old_entry->vme_start),
12621 					PMAP_NULL,
12622 					PAGE_SIZE,
12623 					old_entry->vme_start,
12624 					prot);
12625 			} else {
12626 				pmap_protect(old_map->pmap,
12627 				    old_entry->vme_start,
12628 				    old_entry->vme_end,
12629 				    prot);
12630 			}
12631 		}
12632 
12633 		old_entry->needs_copy = FALSE;
12634 		object = VME_OBJECT(old_entry);
12635 	}
12636 
12637 
12638 	/*
12639 	 *	If object was using a symmetric copy strategy,
12640 	 *	change its copy strategy to the default
12641 	 *	asymmetric copy strategy, which is copy_delay
12642 	 *	in the non-norma case and copy_call in the
12643 	 *	norma case. Bump the reference count for the
12644 	 *	new entry.
12645 	 */
12646 
12647 	if (old_entry->is_sub_map) {
12648 		vm_map_lock(VME_SUBMAP(old_entry));
12649 		vm_map_reference(VME_SUBMAP(old_entry));
12650 		vm_map_unlock(VME_SUBMAP(old_entry));
12651 	} else {
12652 		vm_object_lock(object);
12653 		vm_object_reference_locked(object);
12654 		if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
12655 			object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
12656 		}
12657 		vm_object_unlock(object);
12658 	}
12659 
12660 	/*
12661 	 *	Clone the entry, using object ref from above.
12662 	 *	Mark both entries as shared.
12663 	 */
12664 
12665 	new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
12666 	                                                  * map or descendants */
12667 	vm_map_entry_copy(old_map, new_entry, old_entry);
12668 	old_entry->is_shared = TRUE;
12669 	new_entry->is_shared = TRUE;
12670 
12671 	/*
12672 	 * We're dealing with a shared mapping, so the resulting mapping
12673 	 * should inherit some of the original mapping's accounting settings.
12674 	 * "iokit_acct" should have been cleared in vm_map_entry_copy().
12675 	 * "use_pmap" should stay the same as before (if it hasn't been reset
12676 	 * to TRUE when we cleared "iokit_acct").
12677 	 */
12678 	assert(!new_entry->iokit_acct);
12679 
12680 	/*
12681 	 *	If old entry's inheritence is VM_INHERIT_NONE,
12682 	 *	the new entry is for corpse fork, remove the
12683 	 *	write permission from the new entry.
12684 	 */
12685 	if (old_entry->inheritance == VM_INHERIT_NONE) {
12686 		new_entry->protection &= ~VM_PROT_WRITE;
12687 		new_entry->max_protection &= ~VM_PROT_WRITE;
12688 	}
12689 
12690 	/*
12691 	 *	Insert the entry into the new map -- we
12692 	 *	know we're inserting at the end of the new
12693 	 *	map.
12694 	 */
12695 
12696 	vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry,
12697 	    VM_MAP_KERNEL_FLAGS_NONE);
12698 
12699 	/*
12700 	 *	Update the physical map
12701 	 */
12702 
12703 	if (old_entry->is_sub_map) {
12704 		/* Bill Angell pmap support goes here */
12705 	} else {
12706 		pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
12707 		    old_entry->vme_end - old_entry->vme_start,
12708 		    old_entry->vme_start);
12709 	}
12710 }
12711 
12712 static boolean_t
vm_map_fork_copy(vm_map_t old_map,vm_map_entry_t * old_entry_p,vm_map_t new_map,int vm_map_copyin_flags)12713 vm_map_fork_copy(
12714 	vm_map_t        old_map,
12715 	vm_map_entry_t  *old_entry_p,
12716 	vm_map_t        new_map,
12717 	int             vm_map_copyin_flags)
12718 {
12719 	vm_map_entry_t old_entry = *old_entry_p;
12720 	vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
12721 	vm_map_offset_t start = old_entry->vme_start;
12722 	vm_map_copy_t copy;
12723 	vm_map_entry_t last = vm_map_last_entry(new_map);
12724 
12725 	vm_map_unlock(old_map);
12726 	/*
12727 	 *	Use maxprot version of copyin because we
12728 	 *	care about whether this memory can ever
12729 	 *	be accessed, not just whether it's accessible
12730 	 *	right now.
12731 	 */
12732 	vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
12733 	if (vm_map_copyin_internal(old_map, start, entry_size,
12734 	    vm_map_copyin_flags, &copy)
12735 	    != KERN_SUCCESS) {
12736 		/*
12737 		 *	The map might have changed while it
12738 		 *	was unlocked, check it again.  Skip
12739 		 *	any blank space or permanently
12740 		 *	unreadable region.
12741 		 */
12742 		vm_map_lock(old_map);
12743 		if (!vm_map_lookup_entry(old_map, start, &last) ||
12744 		    (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
12745 			last = last->vme_next;
12746 		}
12747 		*old_entry_p = last;
12748 
12749 		/*
12750 		 * XXX	For some error returns, want to
12751 		 * XXX	skip to the next element.  Note
12752 		 *	that INVALID_ADDRESS and
12753 		 *	PROTECTION_FAILURE are handled above.
12754 		 */
12755 
12756 		return FALSE;
12757 	}
12758 
12759 	/*
12760 	 * Assert that the vm_map_copy is coming from the right
12761 	 * zone and hasn't been forged
12762 	 */
12763 	vm_map_copy_require(copy);
12764 
12765 	/*
12766 	 *	Insert the copy into the new map
12767 	 */
12768 	vm_map_copy_insert(new_map, last, copy);
12769 
12770 	/*
12771 	 *	Pick up the traversal at the end of
12772 	 *	the copied region.
12773 	 */
12774 
12775 	vm_map_lock(old_map);
12776 	start += entry_size;
12777 	if (!vm_map_lookup_entry(old_map, start, &last)) {
12778 		last = last->vme_next;
12779 	} else {
12780 		if (last->vme_start == start) {
12781 			/*
12782 			 * No need to clip here and we don't
12783 			 * want to cause any unnecessary
12784 			 * unnesting...
12785 			 */
12786 		} else {
12787 			vm_map_clip_start(old_map, last, start);
12788 		}
12789 	}
12790 	*old_entry_p = last;
12791 
12792 	return TRUE;
12793 }
12794 
12795 /*
12796  *	vm_map_fork:
12797  *
12798  *	Create and return a new map based on the old
12799  *	map, according to the inheritance values on the
12800  *	regions in that map and the options.
12801  *
12802  *	The source map must not be locked.
12803  */
12804 vm_map_t
vm_map_fork(ledger_t ledger,vm_map_t old_map,int options)12805 vm_map_fork(
12806 	ledger_t        ledger,
12807 	vm_map_t        old_map,
12808 	int             options)
12809 {
12810 	pmap_t          new_pmap;
12811 	vm_map_t        new_map;
12812 	vm_map_entry_t  old_entry;
12813 	vm_map_size_t   new_size = 0, entry_size;
12814 	vm_map_entry_t  new_entry;
12815 	boolean_t       src_needs_copy;
12816 	boolean_t       new_entry_needs_copy;
12817 	boolean_t       pmap_is64bit;
12818 	int             vm_map_copyin_flags;
12819 	vm_inherit_t    old_entry_inheritance;
12820 	int             map_create_options;
12821 	kern_return_t   footprint_collect_kr;
12822 
12823 	if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
12824 	    VM_MAP_FORK_PRESERVE_PURGEABLE |
12825 	    VM_MAP_FORK_CORPSE_FOOTPRINT)) {
12826 		/* unsupported option */
12827 		return VM_MAP_NULL;
12828 	}
12829 
12830 	pmap_is64bit =
12831 #if defined(__i386__) || defined(__x86_64__)
12832 	    old_map->pmap->pm_task_map != TASK_MAP_32BIT;
12833 #elif defined(__arm64__)
12834 	    old_map->pmap->is_64bit;
12835 #elif defined(__arm__)
12836 	    FALSE;
12837 #else
12838 #error Unknown architecture.
12839 #endif
12840 
12841 	unsigned int pmap_flags = 0;
12842 	pmap_flags |= pmap_is64bit ? PMAP_CREATE_64BIT : 0;
12843 #if defined(HAS_APPLE_PAC)
12844 	pmap_flags |= old_map->pmap->disable_jop ? PMAP_CREATE_DISABLE_JOP : 0;
12845 #endif
12846 #if PMAP_CREATE_FORCE_4K_PAGES
12847 	if (VM_MAP_PAGE_SIZE(old_map) == FOURK_PAGE_SIZE &&
12848 	    PAGE_SIZE != FOURK_PAGE_SIZE) {
12849 		pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
12850 	}
12851 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
12852 	new_pmap = pmap_create_options(ledger, (vm_map_size_t) 0, pmap_flags);
12853 	if (new_pmap == NULL) {
12854 		return VM_MAP_NULL;
12855 	}
12856 
12857 	vm_map_reference(old_map);
12858 	vm_map_lock(old_map);
12859 
12860 	map_create_options = 0;
12861 	if (old_map->hdr.entries_pageable) {
12862 		map_create_options |= VM_MAP_CREATE_PAGEABLE;
12863 	}
12864 	if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12865 		map_create_options |= VM_MAP_CREATE_CORPSE_FOOTPRINT;
12866 		footprint_collect_kr = KERN_SUCCESS;
12867 	}
12868 	new_map = vm_map_create_options(new_pmap,
12869 	    old_map->min_offset,
12870 	    old_map->max_offset,
12871 	    map_create_options);
12872 	/* inherit cs_enforcement */
12873 	vm_map_cs_enforcement_set(new_map, old_map->cs_enforcement);
12874 	vm_map_lock(new_map);
12875 	vm_commit_pagezero_status(new_map);
12876 	/* inherit the parent map's page size */
12877 	vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
12878 
12879 	/* ensure PMAP_CS structures are prepared for the fork */
12880 	pmap_cs_fork_prepare(old_map->pmap, new_pmap);
12881 
12882 	for (old_entry = vm_map_first_entry(old_map); old_entry != vm_map_to_entry(old_map);) {
12883 		/*
12884 		 * Abort any corpse collection if the system is shutting down.
12885 		 */
12886 		if ((options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
12887 		    get_system_inshutdown()) {
12888 			vm_map_corpse_footprint_collect_done(new_map);
12889 			vm_map_unlock(new_map);
12890 			vm_map_unlock(old_map);
12891 			vm_map_deallocate(new_map);
12892 			vm_map_deallocate(old_map);
12893 			printf("Aborting corpse map due to system shutdown\n");
12894 			return VM_MAP_NULL;
12895 		}
12896 
12897 		entry_size = old_entry->vme_end - old_entry->vme_start;
12898 
12899 		old_entry_inheritance = old_entry->inheritance;
12900 		/*
12901 		 * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
12902 		 * share VM_INHERIT_NONE entries that are not backed by a
12903 		 * device pager.
12904 		 */
12905 		if (old_entry_inheritance == VM_INHERIT_NONE &&
12906 		    (options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE) &&
12907 		    (old_entry->protection & VM_PROT_READ) &&
12908 		    !(!old_entry->is_sub_map &&
12909 		    VME_OBJECT(old_entry) != NULL &&
12910 		    VME_OBJECT(old_entry)->pager != NULL &&
12911 		    is_device_pager_ops(
12912 			    VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
12913 			old_entry_inheritance = VM_INHERIT_SHARE;
12914 		}
12915 
12916 		if (old_entry_inheritance != VM_INHERIT_NONE &&
12917 		    (options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
12918 		    footprint_collect_kr == KERN_SUCCESS) {
12919 			/*
12920 			 * The corpse won't have old_map->pmap to query
12921 			 * footprint information, so collect that data now
12922 			 * and store it in new_map->vmmap_corpse_footprint
12923 			 * for later autopsy.
12924 			 */
12925 			footprint_collect_kr =
12926 			    vm_map_corpse_footprint_collect(old_map,
12927 			    old_entry,
12928 			    new_map);
12929 		}
12930 
12931 		switch (old_entry_inheritance) {
12932 		case VM_INHERIT_NONE:
12933 			break;
12934 
12935 		case VM_INHERIT_SHARE:
12936 			vm_map_fork_share(old_map, old_entry, new_map);
12937 			new_size += entry_size;
12938 			break;
12939 
12940 		case VM_INHERIT_COPY:
12941 
12942 			/*
12943 			 *	Inline the copy_quickly case;
12944 			 *	upon failure, fall back on call
12945 			 *	to vm_map_fork_copy.
12946 			 */
12947 
12948 			if (old_entry->is_sub_map) {
12949 				break;
12950 			}
12951 			if ((old_entry->wired_count != 0) ||
12952 			    ((VME_OBJECT(old_entry) != NULL) &&
12953 			    (VME_OBJECT(old_entry)->true_share))) {
12954 				goto slow_vm_map_fork_copy;
12955 			}
12956 
12957 			new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
12958 			vm_map_entry_copy(old_map, new_entry, old_entry);
12959 			if (old_entry->permanent) {
12960 				/* inherit "permanent" on fork() */
12961 				new_entry->permanent = TRUE;
12962 			}
12963 
12964 			if (new_entry->used_for_jit == TRUE && new_map->jit_entry_exists == FALSE) {
12965 				new_map->jit_entry_exists = TRUE;
12966 			}
12967 
12968 			if (new_entry->is_sub_map) {
12969 				/* clear address space specifics */
12970 				new_entry->use_pmap = FALSE;
12971 			} else {
12972 				/*
12973 				 * We're dealing with a copy-on-write operation,
12974 				 * so the resulting mapping should not inherit
12975 				 * the original mapping's accounting settings.
12976 				 * "iokit_acct" should have been cleared in
12977 				 * vm_map_entry_copy().
12978 				 * "use_pmap" should be reset to its default
12979 				 * (TRUE) so that the new mapping gets
12980 				 * accounted for in the task's memory footprint.
12981 				 */
12982 				assert(!new_entry->iokit_acct);
12983 				new_entry->use_pmap = TRUE;
12984 			}
12985 
12986 			if (!vm_object_copy_quickly(
12987 				    VME_OBJECT_PTR(new_entry),
12988 				    VME_OFFSET(old_entry),
12989 				    (old_entry->vme_end -
12990 				    old_entry->vme_start),
12991 				    &src_needs_copy,
12992 				    &new_entry_needs_copy)) {
12993 				vm_map_entry_dispose(new_map, new_entry);
12994 				goto slow_vm_map_fork_copy;
12995 			}
12996 
12997 			/*
12998 			 *	Handle copy-on-write obligations
12999 			 */
13000 
13001 			if (src_needs_copy && !old_entry->needs_copy) {
13002 				vm_prot_t prot;
13003 
13004 				assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
13005 
13006 				prot = old_entry->protection & ~VM_PROT_WRITE;
13007 
13008 				if (override_nx(old_map, VME_ALIAS(old_entry))
13009 				    && prot) {
13010 					prot |= VM_PROT_EXECUTE;
13011 				}
13012 
13013 				assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
13014 
13015 				vm_object_pmap_protect(
13016 					VME_OBJECT(old_entry),
13017 					VME_OFFSET(old_entry),
13018 					(old_entry->vme_end -
13019 					old_entry->vme_start),
13020 					((old_entry->is_shared
13021 					|| old_map->mapped_in_other_pmaps)
13022 					? PMAP_NULL :
13023 					old_map->pmap),
13024 					VM_MAP_PAGE_SIZE(old_map),
13025 					old_entry->vme_start,
13026 					prot);
13027 
13028 				assert(old_entry->wired_count == 0);
13029 				old_entry->needs_copy = TRUE;
13030 			}
13031 			new_entry->needs_copy = new_entry_needs_copy;
13032 
13033 			/*
13034 			 *	Insert the entry at the end
13035 			 *	of the map.
13036 			 */
13037 
13038 			vm_map_store_entry_link(new_map,
13039 			    vm_map_last_entry(new_map),
13040 			    new_entry,
13041 			    VM_MAP_KERNEL_FLAGS_NONE);
13042 			new_size += entry_size;
13043 			break;
13044 
13045 slow_vm_map_fork_copy:
13046 			vm_map_copyin_flags = 0;
13047 			if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
13048 				vm_map_copyin_flags |=
13049 				    VM_MAP_COPYIN_PRESERVE_PURGEABLE;
13050 			}
13051 			if (vm_map_fork_copy(old_map,
13052 			    &old_entry,
13053 			    new_map,
13054 			    vm_map_copyin_flags)) {
13055 				new_size += entry_size;
13056 			}
13057 			continue;
13058 		}
13059 		old_entry = old_entry->vme_next;
13060 	}
13061 
13062 #if defined(__arm64__)
13063 	pmap_insert_sharedpage(new_map->pmap);
13064 #endif /* __arm64__ */
13065 
13066 	new_map->size = new_size;
13067 
13068 	if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
13069 		vm_map_corpse_footprint_collect_done(new_map);
13070 	}
13071 
13072 	/* Propagate JIT entitlement for the pmap layer. */
13073 	if (pmap_get_jit_entitled(old_map->pmap)) {
13074 		/* Tell the pmap that it supports JIT. */
13075 		pmap_set_jit_entitled(new_map->pmap);
13076 	}
13077 
13078 	vm_map_unlock(new_map);
13079 	vm_map_unlock(old_map);
13080 	vm_map_deallocate(old_map);
13081 
13082 	return new_map;
13083 }
13084 
13085 /*
13086  * vm_map_exec:
13087  *
13088  *      Setup the "new_map" with the proper execution environment according
13089  *	to the type of executable (platform, 64bit, chroot environment).
13090  *	Map the comm page and shared region, etc...
13091  */
13092 kern_return_t
vm_map_exec(vm_map_t new_map,task_t task,boolean_t is64bit,void * fsroot,cpu_type_t cpu,cpu_subtype_t cpu_subtype,boolean_t reslide,boolean_t is_driverkit)13093 vm_map_exec(
13094 	vm_map_t        new_map,
13095 	task_t          task,
13096 	boolean_t       is64bit,
13097 	void            *fsroot,
13098 	cpu_type_t      cpu,
13099 	cpu_subtype_t   cpu_subtype,
13100 	boolean_t       reslide,
13101 	boolean_t       is_driverkit)
13102 {
13103 	SHARED_REGION_TRACE_DEBUG(
13104 		("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
13105 		(void *)VM_KERNEL_ADDRPERM(current_task()),
13106 		(void *)VM_KERNEL_ADDRPERM(new_map),
13107 		(void *)VM_KERNEL_ADDRPERM(task),
13108 		(void *)VM_KERNEL_ADDRPERM(fsroot),
13109 		cpu,
13110 		cpu_subtype));
13111 	(void) vm_commpage_enter(new_map, task, is64bit);
13112 
13113 	(void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu, cpu_subtype, reslide, is_driverkit);
13114 
13115 	SHARED_REGION_TRACE_DEBUG(
13116 		("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
13117 		(void *)VM_KERNEL_ADDRPERM(current_task()),
13118 		(void *)VM_KERNEL_ADDRPERM(new_map),
13119 		(void *)VM_KERNEL_ADDRPERM(task),
13120 		(void *)VM_KERNEL_ADDRPERM(fsroot),
13121 		cpu,
13122 		cpu_subtype));
13123 
13124 	/*
13125 	 * Some devices have region(s) of memory that shouldn't get allocated by
13126 	 * user processes. The following code creates dummy vm_map_entry_t's for each
13127 	 * of the regions that needs to be reserved to prevent any allocations in
13128 	 * those regions.
13129 	 */
13130 	kern_return_t kr = KERN_FAILURE;
13131 	vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
13132 	vmk_flags.vmkf_permanent = TRUE;
13133 	vmk_flags.vmkf_beyond_max = TRUE;
13134 
13135 	struct vm_reserved_region *regions = NULL;
13136 	size_t num_regions = ml_get_vm_reserved_regions(is64bit, &regions);
13137 	assert((num_regions == 0) || (num_regions > 0 && regions != NULL));
13138 
13139 	for (size_t i = 0; i < num_regions; ++i) {
13140 		kr = vm_map_enter(
13141 			new_map,
13142 			&regions[i].vmrr_addr,
13143 			regions[i].vmrr_size,
13144 			(vm_map_offset_t)0,
13145 			VM_FLAGS_FIXED,
13146 			vmk_flags,
13147 			VM_KERN_MEMORY_NONE,
13148 			VM_OBJECT_NULL,
13149 			(vm_object_offset_t)0,
13150 			FALSE,
13151 			VM_PROT_NONE,
13152 			VM_PROT_NONE,
13153 			VM_INHERIT_COPY);
13154 
13155 		if (kr != KERN_SUCCESS) {
13156 			panic("Failed to reserve %s region in user map %p %d", regions[i].vmrr_name, new_map, kr);
13157 		}
13158 	}
13159 
13160 	new_map->reserved_regions = (num_regions ? TRUE : FALSE);
13161 
13162 	return KERN_SUCCESS;
13163 }
13164 
13165 uint64_t vm_map_lookup_locked_copy_slowly_count = 0;
13166 uint64_t vm_map_lookup_locked_copy_slowly_size = 0;
13167 uint64_t vm_map_lookup_locked_copy_slowly_max = 0;
13168 uint64_t vm_map_lookup_locked_copy_slowly_restart = 0;
13169 uint64_t vm_map_lookup_locked_copy_slowly_error = 0;
13170 uint64_t vm_map_lookup_locked_copy_strategically_count = 0;
13171 uint64_t vm_map_lookup_locked_copy_strategically_size = 0;
13172 uint64_t vm_map_lookup_locked_copy_strategically_max = 0;
13173 uint64_t vm_map_lookup_locked_copy_strategically_restart = 0;
13174 uint64_t vm_map_lookup_locked_copy_strategically_error = 0;
13175 uint64_t vm_map_lookup_locked_copy_shadow_count = 0;
13176 uint64_t vm_map_lookup_locked_copy_shadow_size = 0;
13177 uint64_t vm_map_lookup_locked_copy_shadow_max = 0;
13178 /*
13179  *	vm_map_lookup_locked:
13180  *
13181  *	Finds the VM object, offset, and
13182  *	protection for a given virtual address in the
13183  *	specified map, assuming a page fault of the
13184  *	type specified.
13185  *
13186  *	Returns the (object, offset, protection) for
13187  *	this address, whether it is wired down, and whether
13188  *	this map has the only reference to the data in question.
13189  *	In order to later verify this lookup, a "version"
13190  *	is returned.
13191  *	If contended != NULL, *contended will be set to
13192  *	true iff the thread had to spin or block to acquire
13193  *	an exclusive lock.
13194  *
13195  *	The map MUST be locked by the caller and WILL be
13196  *	locked on exit.  In order to guarantee the
13197  *	existence of the returned object, it is returned
13198  *	locked.
13199  *
13200  *	If a lookup is requested with "write protection"
13201  *	specified, the map may be changed to perform virtual
13202  *	copying operations, although the data referenced will
13203  *	remain the same.
13204  */
13205 kern_return_t
vm_map_lookup_locked(vm_map_t * var_map,vm_map_offset_t vaddr,vm_prot_t fault_type,int object_lock_type,vm_map_version_t * out_version,vm_object_t * object,vm_object_offset_t * offset,vm_prot_t * out_prot,boolean_t * wired,vm_object_fault_info_t fault_info,vm_map_t * real_map,bool * contended)13206 vm_map_lookup_locked(
13207 	vm_map_t                *var_map,       /* IN/OUT */
13208 	vm_map_offset_t         vaddr,
13209 	vm_prot_t               fault_type,
13210 	int                     object_lock_type,
13211 	vm_map_version_t        *out_version,   /* OUT */
13212 	vm_object_t             *object,        /* OUT */
13213 	vm_object_offset_t      *offset,        /* OUT */
13214 	vm_prot_t               *out_prot,      /* OUT */
13215 	boolean_t               *wired,         /* OUT */
13216 	vm_object_fault_info_t  fault_info,     /* OUT */
13217 	vm_map_t                *real_map,      /* OUT */
13218 	bool                    *contended)     /* OUT */
13219 {
13220 	vm_map_entry_t                  entry;
13221 	vm_map_t                        map = *var_map;
13222 	vm_map_t                        old_map = *var_map;
13223 	vm_map_t                        cow_sub_map_parent = VM_MAP_NULL;
13224 	vm_map_offset_t                 cow_parent_vaddr = 0;
13225 	vm_map_offset_t                 old_start = 0;
13226 	vm_map_offset_t                 old_end = 0;
13227 	vm_prot_t                       prot;
13228 	boolean_t                       mask_protections;
13229 	boolean_t                       force_copy;
13230 	boolean_t                       no_force_copy_if_executable;
13231 	boolean_t                       submap_needed_copy;
13232 	vm_prot_t                       original_fault_type;
13233 	vm_map_size_t                   fault_page_mask;
13234 
13235 	/*
13236 	 * VM_PROT_MASK means that the caller wants us to use "fault_type"
13237 	 * as a mask against the mapping's actual protections, not as an
13238 	 * absolute value.
13239 	 */
13240 	mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
13241 	force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
13242 	no_force_copy_if_executable = (fault_type & VM_PROT_COPY_FAIL_IF_EXECUTABLE) ? TRUE : FALSE;
13243 	fault_type &= VM_PROT_ALL;
13244 	original_fault_type = fault_type;
13245 	if (contended) {
13246 		*contended = false;
13247 	}
13248 
13249 	*real_map = map;
13250 
13251 	fault_page_mask = MIN(VM_MAP_PAGE_MASK(map), PAGE_MASK);
13252 	vaddr = VM_MAP_TRUNC_PAGE(vaddr, fault_page_mask);
13253 
13254 RetryLookup:
13255 	fault_type = original_fault_type;
13256 
13257 	/*
13258 	 *	If the map has an interesting hint, try it before calling
13259 	 *	full blown lookup routine.
13260 	 */
13261 	entry = map->hint;
13262 
13263 	if ((entry == vm_map_to_entry(map)) ||
13264 	    (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
13265 		vm_map_entry_t  tmp_entry;
13266 
13267 		/*
13268 		 *	Entry was either not a valid hint, or the vaddr
13269 		 *	was not contained in the entry, so do a full lookup.
13270 		 */
13271 		if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
13272 			if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13273 				vm_map_unlock(cow_sub_map_parent);
13274 			}
13275 			if ((*real_map != map)
13276 			    && (*real_map != cow_sub_map_parent)) {
13277 				vm_map_unlock(*real_map);
13278 			}
13279 			return KERN_INVALID_ADDRESS;
13280 		}
13281 
13282 		entry = tmp_entry;
13283 	}
13284 	if (map == old_map) {
13285 		old_start = entry->vme_start;
13286 		old_end = entry->vme_end;
13287 	}
13288 
13289 	/*
13290 	 *	Handle submaps.  Drop lock on upper map, submap is
13291 	 *	returned locked.
13292 	 */
13293 
13294 	submap_needed_copy = FALSE;
13295 submap_recurse:
13296 	if (entry->is_sub_map) {
13297 		vm_map_offset_t         local_vaddr;
13298 		vm_map_offset_t         end_delta;
13299 		vm_map_offset_t         start_delta;
13300 		vm_map_entry_t          submap_entry, saved_submap_entry;
13301 		vm_object_offset_t      submap_entry_offset;
13302 		vm_object_size_t        submap_entry_size;
13303 		vm_prot_t               subentry_protection;
13304 		vm_prot_t               subentry_max_protection;
13305 		boolean_t               subentry_no_copy_on_read;
13306 		boolean_t               mapped_needs_copy = FALSE;
13307 		vm_map_version_t        version;
13308 
13309 		assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) >= VM_MAP_PAGE_SHIFT(map),
13310 		    "map %p (%d) entry %p submap %p (%d)\n",
13311 		    map, VM_MAP_PAGE_SHIFT(map), entry,
13312 		    VME_SUBMAP(entry), VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
13313 
13314 		local_vaddr = vaddr;
13315 
13316 		if ((entry->use_pmap &&
13317 		    !((fault_type & VM_PROT_WRITE) ||
13318 		    force_copy))) {
13319 			/* if real_map equals map we unlock below */
13320 			if ((*real_map != map) &&
13321 			    (*real_map != cow_sub_map_parent)) {
13322 				vm_map_unlock(*real_map);
13323 			}
13324 			*real_map = VME_SUBMAP(entry);
13325 		}
13326 
13327 		if (entry->needs_copy &&
13328 		    ((fault_type & VM_PROT_WRITE) ||
13329 		    force_copy)) {
13330 			if (!mapped_needs_copy) {
13331 				if (vm_map_lock_read_to_write(map)) {
13332 					vm_map_lock_read(map);
13333 					*real_map = map;
13334 					goto RetryLookup;
13335 				}
13336 				vm_map_lock_read(VME_SUBMAP(entry));
13337 				*var_map = VME_SUBMAP(entry);
13338 				cow_sub_map_parent = map;
13339 				/* reset base to map before cow object */
13340 				/* this is the map which will accept   */
13341 				/* the new cow object */
13342 				old_start = entry->vme_start;
13343 				old_end = entry->vme_end;
13344 				cow_parent_vaddr = vaddr;
13345 				mapped_needs_copy = TRUE;
13346 			} else {
13347 				vm_map_lock_read(VME_SUBMAP(entry));
13348 				*var_map = VME_SUBMAP(entry);
13349 				if ((cow_sub_map_parent != map) &&
13350 				    (*real_map != map)) {
13351 					vm_map_unlock(map);
13352 				}
13353 			}
13354 		} else {
13355 			if (entry->needs_copy) {
13356 				submap_needed_copy = TRUE;
13357 			}
13358 			vm_map_lock_read(VME_SUBMAP(entry));
13359 			*var_map = VME_SUBMAP(entry);
13360 			/* leave map locked if it is a target */
13361 			/* cow sub_map above otherwise, just  */
13362 			/* follow the maps down to the object */
13363 			/* here we unlock knowing we are not  */
13364 			/* revisiting the map.  */
13365 			if ((*real_map != map) && (map != cow_sub_map_parent)) {
13366 				vm_map_unlock_read(map);
13367 			}
13368 		}
13369 
13370 		map = *var_map;
13371 
13372 		/* calculate the offset in the submap for vaddr */
13373 		local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
13374 		assertf(VM_MAP_PAGE_ALIGNED(local_vaddr, fault_page_mask),
13375 		    "local_vaddr 0x%llx entry->vme_start 0x%llx fault_page_mask 0x%llx\n",
13376 		    (uint64_t)local_vaddr, (uint64_t)entry->vme_start, (uint64_t)fault_page_mask);
13377 
13378 RetrySubMap:
13379 		if (!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
13380 			if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13381 				vm_map_unlock(cow_sub_map_parent);
13382 			}
13383 			if ((*real_map != map)
13384 			    && (*real_map != cow_sub_map_parent)) {
13385 				vm_map_unlock(*real_map);
13386 			}
13387 			*real_map = map;
13388 			return KERN_INVALID_ADDRESS;
13389 		}
13390 
13391 		/* find the attenuated shadow of the underlying object */
13392 		/* on our target map */
13393 
13394 		/* in english the submap object may extend beyond the     */
13395 		/* region mapped by the entry or, may only fill a portion */
13396 		/* of it.  For our purposes, we only care if the object   */
13397 		/* doesn't fill.  In this case the area which will        */
13398 		/* ultimately be clipped in the top map will only need    */
13399 		/* to be as big as the portion of the underlying entry    */
13400 		/* which is mapped */
13401 		start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
13402 		    submap_entry->vme_start - VME_OFFSET(entry) : 0;
13403 
13404 		end_delta =
13405 		    (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
13406 		    submap_entry->vme_end ?
13407 		    0 : (VME_OFFSET(entry) +
13408 		    (old_end - old_start))
13409 		    - submap_entry->vme_end;
13410 
13411 		old_start += start_delta;
13412 		old_end -= end_delta;
13413 
13414 		if (submap_entry->is_sub_map) {
13415 			entry = submap_entry;
13416 			vaddr = local_vaddr;
13417 			goto submap_recurse;
13418 		}
13419 
13420 		if (((fault_type & VM_PROT_WRITE) ||
13421 		    force_copy)
13422 		    && cow_sub_map_parent) {
13423 			vm_object_t     sub_object, copy_object;
13424 			vm_object_offset_t copy_offset;
13425 			vm_map_offset_t local_start;
13426 			vm_map_offset_t local_end;
13427 			boolean_t       object_copied = FALSE;
13428 			vm_object_offset_t object_copied_offset = 0;
13429 			boolean_t       object_copied_needs_copy = FALSE;
13430 			kern_return_t   kr = KERN_SUCCESS;
13431 
13432 			if (vm_map_lock_read_to_write(map)) {
13433 				vm_map_lock_read(map);
13434 				old_start -= start_delta;
13435 				old_end += end_delta;
13436 				goto RetrySubMap;
13437 			}
13438 
13439 
13440 			sub_object = VME_OBJECT(submap_entry);
13441 			if (sub_object == VM_OBJECT_NULL) {
13442 				sub_object =
13443 				    vm_object_allocate(
13444 					(vm_map_size_t)
13445 					(submap_entry->vme_end -
13446 					submap_entry->vme_start));
13447 				VME_OBJECT_SET(submap_entry, sub_object);
13448 				VME_OFFSET_SET(submap_entry, 0);
13449 				assert(!submap_entry->is_sub_map);
13450 				assert(submap_entry->use_pmap);
13451 			}
13452 			local_start =  local_vaddr -
13453 			    (cow_parent_vaddr - old_start);
13454 			local_end = local_vaddr +
13455 			    (old_end - cow_parent_vaddr);
13456 			vm_map_clip_start(map, submap_entry, local_start);
13457 			vm_map_clip_end(map, submap_entry, local_end);
13458 			if (submap_entry->is_sub_map) {
13459 				/* unnesting was done when clipping */
13460 				assert(!submap_entry->use_pmap);
13461 			}
13462 
13463 			/* This is the COW case, lets connect */
13464 			/* an entry in our space to the underlying */
13465 			/* object in the submap, bypassing the  */
13466 			/* submap. */
13467 			submap_entry_offset = VME_OFFSET(submap_entry);
13468 			submap_entry_size = submap_entry->vme_end - submap_entry->vme_start;
13469 
13470 			if ((submap_entry->wired_count != 0 ||
13471 			    sub_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) &&
13472 			    (submap_entry->protection & VM_PROT_EXECUTE) &&
13473 			    no_force_copy_if_executable) {
13474 //				printf("FBDP map %p entry %p start 0x%llx end 0x%llx wired %d strat %d\n", map, submap_entry, (uint64_t)local_start, (uint64_t)local_end, submap_entry->wired_count, sub_object->copy_strategy);
13475 				if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13476 					vm_map_unlock(cow_sub_map_parent);
13477 				}
13478 				if ((*real_map != map)
13479 				    && (*real_map != cow_sub_map_parent)) {
13480 					vm_map_unlock(*real_map);
13481 				}
13482 				*real_map = map;
13483 				kernel_triage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_SUBMAP_NO_COW_ON_EXECUTABLE), 0 /* arg */);
13484 				vm_map_lock_write_to_read(map);
13485 				kr = KERN_PROTECTION_FAILURE;
13486 				DTRACE_VM4(submap_no_copy_executable,
13487 				    vm_map_t, map,
13488 				    vm_object_offset_t, submap_entry_offset,
13489 				    vm_object_size_t, submap_entry_size,
13490 				    int, kr);
13491 				return kr;
13492 			}
13493 
13494 			if (submap_entry->wired_count != 0) {
13495 				vm_object_reference(sub_object);
13496 
13497 				assertf(VM_MAP_PAGE_ALIGNED(VME_OFFSET(submap_entry), VM_MAP_PAGE_MASK(map)),
13498 				    "submap_entry %p offset 0x%llx\n",
13499 				    submap_entry, VME_OFFSET(submap_entry));
13500 
13501 				DTRACE_VM6(submap_copy_slowly,
13502 				    vm_map_t, cow_sub_map_parent,
13503 				    vm_map_offset_t, vaddr,
13504 				    vm_map_t, map,
13505 				    vm_object_size_t, submap_entry_size,
13506 				    int, submap_entry->wired_count,
13507 				    int, sub_object->copy_strategy);
13508 
13509 				saved_submap_entry = submap_entry;
13510 				version.main_timestamp = map->timestamp;
13511 				vm_map_unlock(map); /* Increments timestamp by 1 */
13512 				submap_entry = VM_MAP_ENTRY_NULL;
13513 
13514 				vm_object_lock(sub_object);
13515 				kr = vm_object_copy_slowly(sub_object,
13516 				    submap_entry_offset,
13517 				    submap_entry_size,
13518 				    FALSE,
13519 				    &copy_object);
13520 				object_copied = TRUE;
13521 				object_copied_offset = 0;
13522 				/* 4k: account for extra offset in physical page */
13523 				object_copied_offset += submap_entry_offset - vm_object_trunc_page(submap_entry_offset);
13524 				object_copied_needs_copy = FALSE;
13525 				vm_object_deallocate(sub_object);
13526 
13527 				vm_map_lock(map);
13528 
13529 				if (kr != KERN_SUCCESS &&
13530 				    kr != KERN_MEMORY_RESTART_COPY) {
13531 					if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13532 						vm_map_unlock(cow_sub_map_parent);
13533 					}
13534 					if ((*real_map != map)
13535 					    && (*real_map != cow_sub_map_parent)) {
13536 						vm_map_unlock(*real_map);
13537 					}
13538 					*real_map = map;
13539 					vm_object_deallocate(copy_object);
13540 					copy_object = VM_OBJECT_NULL;
13541 					kernel_triage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_SUBMAP_COPY_SLOWLY_FAILED), 0 /* arg */);
13542 					vm_map_lock_write_to_read(map);
13543 					DTRACE_VM4(submap_copy_error_slowly,
13544 					    vm_object_t, sub_object,
13545 					    vm_object_offset_t, submap_entry_offset,
13546 					    vm_object_size_t, submap_entry_size,
13547 					    int, kr);
13548 					vm_map_lookup_locked_copy_slowly_error++;
13549 					return kr;
13550 				}
13551 
13552 				if ((kr == KERN_SUCCESS) &&
13553 				    (version.main_timestamp + 1) == map->timestamp) {
13554 					submap_entry = saved_submap_entry;
13555 				} else {
13556 					saved_submap_entry = NULL;
13557 					old_start -= start_delta;
13558 					old_end += end_delta;
13559 					vm_object_deallocate(copy_object);
13560 					copy_object = VM_OBJECT_NULL;
13561 					vm_map_lock_write_to_read(map);
13562 					vm_map_lookup_locked_copy_slowly_restart++;
13563 					goto RetrySubMap;
13564 				}
13565 				vm_map_lookup_locked_copy_slowly_count++;
13566 				vm_map_lookup_locked_copy_slowly_size += submap_entry_size;
13567 				if (submap_entry_size > vm_map_lookup_locked_copy_slowly_max) {
13568 					vm_map_lookup_locked_copy_slowly_max = submap_entry_size;
13569 				}
13570 			} else if (sub_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
13571 				submap_entry_offset = VME_OFFSET(submap_entry);
13572 				copy_object = VM_OBJECT_NULL;
13573 				object_copied_offset = submap_entry_offset;
13574 				object_copied_needs_copy = FALSE;
13575 				DTRACE_VM6(submap_copy_strategically,
13576 				    vm_map_t, cow_sub_map_parent,
13577 				    vm_map_offset_t, vaddr,
13578 				    vm_map_t, map,
13579 				    vm_object_size_t, submap_entry_size,
13580 				    int, submap_entry->wired_count,
13581 				    int, sub_object->copy_strategy);
13582 				kr = vm_object_copy_strategically(
13583 					sub_object,
13584 					submap_entry_offset,
13585 					submap_entry->vme_end - submap_entry->vme_start,
13586 					&copy_object,
13587 					&object_copied_offset,
13588 					&object_copied_needs_copy);
13589 				if (kr == KERN_MEMORY_RESTART_COPY) {
13590 					old_start -= start_delta;
13591 					old_end += end_delta;
13592 					vm_object_deallocate(copy_object);
13593 					copy_object = VM_OBJECT_NULL;
13594 					vm_map_lock_write_to_read(map);
13595 					vm_map_lookup_locked_copy_strategically_restart++;
13596 					goto RetrySubMap;
13597 				}
13598 				if (kr != KERN_SUCCESS) {
13599 					if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13600 						vm_map_unlock(cow_sub_map_parent);
13601 					}
13602 					if ((*real_map != map)
13603 					    && (*real_map != cow_sub_map_parent)) {
13604 						vm_map_unlock(*real_map);
13605 					}
13606 					*real_map = map;
13607 					vm_object_deallocate(copy_object);
13608 					copy_object = VM_OBJECT_NULL;
13609 					kernel_triage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_SUBMAP_COPY_STRAT_FAILED), 0 /* arg */);
13610 					vm_map_lock_write_to_read(map);
13611 					DTRACE_VM4(submap_copy_error_strategically,
13612 					    vm_object_t, sub_object,
13613 					    vm_object_offset_t, submap_entry_offset,
13614 					    vm_object_size_t, submap_entry_size,
13615 					    int, kr);
13616 					vm_map_lookup_locked_copy_strategically_error++;
13617 					return kr;
13618 				}
13619 				assert(copy_object != VM_OBJECT_NULL);
13620 				assert(copy_object != sub_object);
13621 				object_copied = TRUE;
13622 				vm_map_lookup_locked_copy_strategically_count++;
13623 				vm_map_lookup_locked_copy_strategically_size += submap_entry_size;
13624 				if (submap_entry_size > vm_map_lookup_locked_copy_strategically_max) {
13625 					vm_map_lookup_locked_copy_strategically_max = submap_entry_size;
13626 				}
13627 			} else {
13628 				/* set up shadow object */
13629 				object_copied = FALSE;
13630 				copy_object = sub_object;
13631 				vm_object_lock(sub_object);
13632 				vm_object_reference_locked(sub_object);
13633 				sub_object->shadowed = TRUE;
13634 				vm_object_unlock(sub_object);
13635 
13636 				assert(submap_entry->wired_count == 0);
13637 				submap_entry->needs_copy = TRUE;
13638 
13639 				prot = submap_entry->protection;
13640 				assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
13641 				prot = prot & ~VM_PROT_WRITE;
13642 				assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
13643 
13644 				if (override_nx(old_map,
13645 				    VME_ALIAS(submap_entry))
13646 				    && prot) {
13647 					prot |= VM_PROT_EXECUTE;
13648 				}
13649 
13650 				vm_object_pmap_protect(
13651 					sub_object,
13652 					VME_OFFSET(submap_entry),
13653 					submap_entry->vme_end -
13654 					submap_entry->vme_start,
13655 					(submap_entry->is_shared
13656 					|| map->mapped_in_other_pmaps) ?
13657 					PMAP_NULL : map->pmap,
13658 					VM_MAP_PAGE_SIZE(map),
13659 					submap_entry->vme_start,
13660 					prot);
13661 				vm_map_lookup_locked_copy_shadow_count++;
13662 				vm_map_lookup_locked_copy_shadow_size += submap_entry_size;
13663 				if (submap_entry_size > vm_map_lookup_locked_copy_shadow_max) {
13664 					vm_map_lookup_locked_copy_shadow_max = submap_entry_size;
13665 				}
13666 			}
13667 
13668 			/*
13669 			 * Adjust the fault offset to the submap entry.
13670 			 */
13671 			copy_offset = (local_vaddr -
13672 			    submap_entry->vme_start +
13673 			    VME_OFFSET(submap_entry));
13674 
13675 			/* This works diffently than the   */
13676 			/* normal submap case. We go back  */
13677 			/* to the parent of the cow map and*/
13678 			/* clip out the target portion of  */
13679 			/* the sub_map, substituting the   */
13680 			/* new copy object,                */
13681 
13682 			subentry_protection = submap_entry->protection;
13683 			subentry_max_protection = submap_entry->max_protection;
13684 			subentry_no_copy_on_read = submap_entry->vme_no_copy_on_read;
13685 			vm_map_unlock(map);
13686 			submap_entry = NULL; /* not valid after map unlock */
13687 
13688 			local_start = old_start;
13689 			local_end = old_end;
13690 			map = cow_sub_map_parent;
13691 			*var_map = cow_sub_map_parent;
13692 			vaddr = cow_parent_vaddr;
13693 			cow_sub_map_parent = NULL;
13694 
13695 			if (!vm_map_lookup_entry(map,
13696 			    vaddr, &entry)) {
13697 				if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13698 					vm_map_unlock(cow_sub_map_parent);
13699 				}
13700 				if ((*real_map != map)
13701 				    && (*real_map != cow_sub_map_parent)) {
13702 					vm_map_unlock(*real_map);
13703 				}
13704 				*real_map = map;
13705 				vm_object_deallocate(
13706 					copy_object);
13707 				copy_object = VM_OBJECT_NULL;
13708 				vm_map_lock_write_to_read(map);
13709 				DTRACE_VM4(submap_lookup_post_unlock,
13710 				    uint64_t, (uint64_t)entry->vme_start,
13711 				    uint64_t, (uint64_t)entry->vme_end,
13712 				    vm_map_offset_t, vaddr,
13713 				    int, object_copied);
13714 				return KERN_INVALID_ADDRESS;
13715 			}
13716 
13717 			/* clip out the portion of space */
13718 			/* mapped by the sub map which   */
13719 			/* corresponds to the underlying */
13720 			/* object */
13721 
13722 			/*
13723 			 * Clip (and unnest) the smallest nested chunk
13724 			 * possible around the faulting address...
13725 			 */
13726 			local_start = vaddr & ~(pmap_shared_region_size_min(map->pmap) - 1);
13727 			local_end = local_start + pmap_shared_region_size_min(map->pmap);
13728 			/*
13729 			 * ... but don't go beyond the "old_start" to "old_end"
13730 			 * range, to avoid spanning over another VM region
13731 			 * with a possibly different VM object and/or offset.
13732 			 */
13733 			if (local_start < old_start) {
13734 				local_start = old_start;
13735 			}
13736 			if (local_end > old_end) {
13737 				local_end = old_end;
13738 			}
13739 			/*
13740 			 * Adjust copy_offset to the start of the range.
13741 			 */
13742 			copy_offset -= (vaddr - local_start);
13743 
13744 			vm_map_clip_start(map, entry, local_start);
13745 			vm_map_clip_end(map, entry, local_end);
13746 			if (entry->is_sub_map) {
13747 				/* unnesting was done when clipping */
13748 				assert(!entry->use_pmap);
13749 			}
13750 
13751 			/* substitute copy object for */
13752 			/* shared map entry           */
13753 			vm_map_deallocate(VME_SUBMAP(entry));
13754 			assert(!entry->iokit_acct);
13755 			entry->is_sub_map = FALSE;
13756 			entry->use_pmap = TRUE;
13757 			VME_OBJECT_SET(entry, copy_object);
13758 
13759 			/* propagate the submap entry's protections */
13760 			if (entry->protection != VM_PROT_READ) {
13761 				/*
13762 				 * Someone has already altered the top entry's
13763 				 * protections via vm_protect(VM_PROT_COPY).
13764 				 * Respect these new values and ignore the
13765 				 * submap entry's protections.
13766 				 */
13767 			} else {
13768 				/*
13769 				 * Regular copy-on-write: propagate the submap
13770 				 * entry's protections to the top map entry.
13771 				 */
13772 				entry->protection |= subentry_protection;
13773 			}
13774 			entry->max_protection |= subentry_max_protection;
13775 			/* propagate no_copy_on_read */
13776 			entry->vme_no_copy_on_read = subentry_no_copy_on_read;
13777 
13778 			if ((entry->protection & VM_PROT_WRITE) &&
13779 			    (entry->protection & VM_PROT_EXECUTE) &&
13780 #if XNU_TARGET_OS_OSX
13781 			    map->pmap != kernel_pmap &&
13782 			    (vm_map_cs_enforcement(map)
13783 #if __arm64__
13784 			    || !VM_MAP_IS_EXOTIC(map)
13785 #endif /* __arm64__ */
13786 			    ) &&
13787 #endif /* XNU_TARGET_OS_OSX */
13788 			    !(entry->used_for_jit) &&
13789 			    VM_MAP_POLICY_WX_STRIP_X(map)) {
13790 				DTRACE_VM3(cs_wx,
13791 				    uint64_t, (uint64_t)entry->vme_start,
13792 				    uint64_t, (uint64_t)entry->vme_end,
13793 				    vm_prot_t, entry->protection);
13794 				printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
13795 				    proc_selfpid(),
13796 				    (current_task()->bsd_info
13797 				    ? proc_name_address(current_task()->bsd_info)
13798 				    : "?"),
13799 				    __FUNCTION__);
13800 				entry->protection &= ~VM_PROT_EXECUTE;
13801 			}
13802 
13803 			if (object_copied) {
13804 				VME_OFFSET_SET(entry, local_start - old_start + object_copied_offset);
13805 				entry->needs_copy = object_copied_needs_copy;
13806 				entry->is_shared = FALSE;
13807 			} else {
13808 				assert(VME_OBJECT(entry) != VM_OBJECT_NULL);
13809 				assert(VME_OBJECT(entry)->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
13810 				assert(entry->wired_count == 0);
13811 				VME_OFFSET_SET(entry, copy_offset);
13812 				entry->needs_copy = TRUE;
13813 				if (map != old_map) {
13814 					entry->is_shared = TRUE;
13815 				}
13816 			}
13817 			if (entry->inheritance == VM_INHERIT_SHARE) {
13818 				entry->inheritance = VM_INHERIT_COPY;
13819 			}
13820 
13821 			vm_map_lock_write_to_read(map);
13822 		} else {
13823 			if ((cow_sub_map_parent)
13824 			    && (cow_sub_map_parent != *real_map)
13825 			    && (cow_sub_map_parent != map)) {
13826 				vm_map_unlock(cow_sub_map_parent);
13827 			}
13828 			entry = submap_entry;
13829 			vaddr = local_vaddr;
13830 		}
13831 	}
13832 
13833 	/*
13834 	 *	Check whether this task is allowed to have
13835 	 *	this page.
13836 	 */
13837 
13838 	prot = entry->protection;
13839 
13840 	if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
13841 		/*
13842 		 * HACK -- if not a stack, then allow execution
13843 		 */
13844 		prot |= VM_PROT_EXECUTE;
13845 	}
13846 
13847 	if (mask_protections) {
13848 		fault_type &= prot;
13849 		if (fault_type == VM_PROT_NONE) {
13850 			goto protection_failure;
13851 		}
13852 	}
13853 	if (((fault_type & prot) != fault_type)
13854 #if __arm64__
13855 	    /* prefetch abort in execute-only page */
13856 	    && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
13857 #elif defined(__x86_64__)
13858 	    /* Consider the UEXEC bit when handling an EXECUTE fault */
13859 	    && !((fault_type & VM_PROT_EXECUTE) && !(prot & VM_PROT_EXECUTE) && (prot & VM_PROT_UEXEC))
13860 #endif
13861 	    ) {
13862 protection_failure:
13863 		if (*real_map != map) {
13864 			vm_map_unlock(*real_map);
13865 		}
13866 		*real_map = map;
13867 
13868 		if ((fault_type & VM_PROT_EXECUTE) && prot) {
13869 			log_stack_execution_failure((addr64_t)vaddr, prot);
13870 		}
13871 
13872 		DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
13873 		DTRACE_VM3(prot_fault_detailed, vm_prot_t, fault_type, vm_prot_t, prot, void *, vaddr);
13874 		/*
13875 		 * Noisy (esp. internally) and can be inferred from CrashReports. So OFF for now.
13876 		 *
13877 		 * kernel_triage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_PROTECTION_FAILURE), 0);
13878 		 */
13879 		return KERN_PROTECTION_FAILURE;
13880 	}
13881 
13882 	/*
13883 	 *	If this page is not pageable, we have to get
13884 	 *	it for all possible accesses.
13885 	 */
13886 
13887 	*wired = (entry->wired_count != 0);
13888 	if (*wired) {
13889 		fault_type = prot;
13890 	}
13891 
13892 	/*
13893 	 *	If the entry was copy-on-write, we either ...
13894 	 */
13895 
13896 	if (entry->needs_copy) {
13897 		/*
13898 		 *	If we want to write the page, we may as well
13899 		 *	handle that now since we've got the map locked.
13900 		 *
13901 		 *	If we don't need to write the page, we just
13902 		 *	demote the permissions allowed.
13903 		 */
13904 
13905 		if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
13906 			/*
13907 			 *	Make a new object, and place it in the
13908 			 *	object chain.  Note that no new references
13909 			 *	have appeared -- one just moved from the
13910 			 *	map to the new object.
13911 			 */
13912 
13913 			if (vm_map_lock_read_to_write(map)) {
13914 				vm_map_lock_read(map);
13915 				goto RetryLookup;
13916 			}
13917 
13918 			if (VME_OBJECT(entry)->shadowed == FALSE) {
13919 				vm_object_lock(VME_OBJECT(entry));
13920 				VME_OBJECT(entry)->shadowed = TRUE;
13921 				vm_object_unlock(VME_OBJECT(entry));
13922 			}
13923 			VME_OBJECT_SHADOW(entry,
13924 			    (vm_map_size_t) (entry->vme_end -
13925 			    entry->vme_start));
13926 			entry->needs_copy = FALSE;
13927 
13928 			vm_map_lock_write_to_read(map);
13929 		}
13930 		if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
13931 			/*
13932 			 *	We're attempting to read a copy-on-write
13933 			 *	page -- don't allow writes.
13934 			 */
13935 
13936 			prot &= (~VM_PROT_WRITE);
13937 		}
13938 	}
13939 
13940 	if (submap_needed_copy && (prot & VM_PROT_WRITE)) {
13941 		/*
13942 		 * We went through a "needs_copy" submap without triggering
13943 		 * a copy, so granting write access to the page would bypass
13944 		 * that submap's "needs_copy".
13945 		 */
13946 		assert(!(fault_type & VM_PROT_WRITE));
13947 		assert(!*wired);
13948 		assert(!force_copy);
13949 		// printf("FBDP %d[%s] submap_needed_copy for %p 0x%llx\n", proc_selfpid(), proc_name_address(current_task()->bsd_info), map, vaddr);
13950 		prot &= ~VM_PROT_WRITE;
13951 	}
13952 
13953 	/*
13954 	 *	Create an object if necessary.
13955 	 */
13956 	if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
13957 		if (vm_map_lock_read_to_write(map)) {
13958 			vm_map_lock_read(map);
13959 			goto RetryLookup;
13960 		}
13961 
13962 		VME_OBJECT_SET(entry,
13963 		    vm_object_allocate(
13964 			    (vm_map_size_t)(entry->vme_end -
13965 			    entry->vme_start)));
13966 		VME_OFFSET_SET(entry, 0);
13967 		assert(entry->use_pmap);
13968 		vm_map_lock_write_to_read(map);
13969 	}
13970 
13971 	/*
13972 	 *	Return the object/offset from this entry.  If the entry
13973 	 *	was copy-on-write or empty, it has been fixed up.  Also
13974 	 *	return the protection.
13975 	 */
13976 
13977 	*offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
13978 	*object = VME_OBJECT(entry);
13979 	*out_prot = prot;
13980 	KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_MAP_LOOKUP_OBJECT), VM_KERNEL_UNSLIDE_OR_PERM(*object), (unsigned long) VME_ALIAS(entry), 0, 0);
13981 
13982 	if (fault_info) {
13983 		fault_info->interruptible = THREAD_UNINT; /* for now... */
13984 		/* ... the caller will change "interruptible" if needed */
13985 		fault_info->cluster_size = 0;
13986 		fault_info->user_tag = VME_ALIAS(entry);
13987 		fault_info->pmap_options = 0;
13988 		if (entry->iokit_acct ||
13989 		    (!entry->is_sub_map && !entry->use_pmap)) {
13990 			fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
13991 		}
13992 		fault_info->behavior = entry->behavior;
13993 		fault_info->lo_offset = VME_OFFSET(entry);
13994 		fault_info->hi_offset =
13995 		    (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
13996 		fault_info->no_cache  = entry->no_cache;
13997 		fault_info->stealth = FALSE;
13998 		fault_info->io_sync = FALSE;
13999 		if (entry->used_for_jit ||
14000 		    entry->vme_resilient_codesign) {
14001 			fault_info->cs_bypass = TRUE;
14002 		} else {
14003 			fault_info->cs_bypass = FALSE;
14004 		}
14005 		fault_info->pmap_cs_associated = FALSE;
14006 #if CONFIG_PMAP_CS
14007 		if (entry->pmap_cs_associated) {
14008 			/*
14009 			 * The pmap layer will validate this page
14010 			 * before allowing it to be executed from.
14011 			 */
14012 			fault_info->pmap_cs_associated = TRUE;
14013 		}
14014 #endif /* CONFIG_PMAP_CS */
14015 		fault_info->mark_zf_absent = FALSE;
14016 		fault_info->batch_pmap_op = FALSE;
14017 		fault_info->resilient_media = entry->vme_resilient_media;
14018 		fault_info->no_copy_on_read = entry->vme_no_copy_on_read;
14019 		if (entry->translated_allow_execute) {
14020 			fault_info->pmap_options |= PMAP_OPTIONS_TRANSLATED_ALLOW_EXECUTE;
14021 		}
14022 	}
14023 
14024 	/*
14025 	 *	Lock the object to prevent it from disappearing
14026 	 */
14027 	if (object_lock_type == OBJECT_LOCK_EXCLUSIVE) {
14028 		if (contended == NULL) {
14029 			vm_object_lock(*object);
14030 		} else {
14031 			*contended = vm_object_lock_check_contended(*object);
14032 		}
14033 	} else {
14034 		vm_object_lock_shared(*object);
14035 	}
14036 
14037 	/*
14038 	 *	Save the version number
14039 	 */
14040 
14041 	out_version->main_timestamp = map->timestamp;
14042 
14043 	return KERN_SUCCESS;
14044 }
14045 
14046 
14047 /*
14048  *	vm_map_verify:
14049  *
14050  *	Verifies that the map in question has not changed
14051  *	since the given version. The map has to be locked
14052  *	("shared" mode is fine) before calling this function
14053  *	and it will be returned locked too.
14054  */
14055 boolean_t
vm_map_verify(vm_map_t map,vm_map_version_t * version)14056 vm_map_verify(
14057 	vm_map_t                map,
14058 	vm_map_version_t        *version)       /* REF */
14059 {
14060 	boolean_t       result;
14061 
14062 	vm_map_lock_assert_held(map);
14063 	result = (map->timestamp == version->main_timestamp);
14064 
14065 	return result;
14066 }
14067 
14068 /*
14069  *	TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
14070  *	Goes away after regular vm_region_recurse function migrates to
14071  *	64 bits
14072  *	vm_region_recurse: A form of vm_region which follows the
14073  *	submaps in a target map
14074  *
14075  */
14076 
14077 kern_return_t
vm_map_region_recurse_64(vm_map_t map,vm_map_offset_t * address,vm_map_size_t * size,natural_t * nesting_depth,vm_region_submap_info_64_t submap_info,mach_msg_type_number_t * count)14078 vm_map_region_recurse_64(
14079 	vm_map_t                 map,
14080 	vm_map_offset_t *address,               /* IN/OUT */
14081 	vm_map_size_t           *size,                  /* OUT */
14082 	natural_t               *nesting_depth, /* IN/OUT */
14083 	vm_region_submap_info_64_t      submap_info,    /* IN/OUT */
14084 	mach_msg_type_number_t  *count) /* IN/OUT */
14085 {
14086 	mach_msg_type_number_t  original_count;
14087 	vm_region_extended_info_data_t  extended;
14088 	vm_map_entry_t                  tmp_entry;
14089 	vm_map_offset_t                 user_address;
14090 	unsigned int                    user_max_depth;
14091 
14092 	/*
14093 	 * "curr_entry" is the VM map entry preceding or including the
14094 	 * address we're looking for.
14095 	 * "curr_map" is the map or sub-map containing "curr_entry".
14096 	 * "curr_address" is the equivalent of the top map's "user_address"
14097 	 * in the current map.
14098 	 * "curr_offset" is the cumulated offset of "curr_map" in the
14099 	 * target task's address space.
14100 	 * "curr_depth" is the depth of "curr_map" in the chain of
14101 	 * sub-maps.
14102 	 *
14103 	 * "curr_max_below" and "curr_max_above" limit the range (around
14104 	 * "curr_address") we should take into account in the current (sub)map.
14105 	 * They limit the range to what's visible through the map entries
14106 	 * we've traversed from the top map to the current map.
14107 	 *
14108 	 */
14109 	vm_map_entry_t                  curr_entry;
14110 	vm_map_address_t                curr_address;
14111 	vm_map_offset_t                 curr_offset;
14112 	vm_map_t                        curr_map;
14113 	unsigned int                    curr_depth;
14114 	vm_map_offset_t                 curr_max_below, curr_max_above;
14115 	vm_map_offset_t                 curr_skip;
14116 
14117 	/*
14118 	 * "next_" is the same as "curr_" but for the VM region immediately
14119 	 * after the address we're looking for.  We need to keep track of this
14120 	 * too because we want to return info about that region if the
14121 	 * address we're looking for is not mapped.
14122 	 */
14123 	vm_map_entry_t                  next_entry;
14124 	vm_map_offset_t                 next_offset;
14125 	vm_map_offset_t                 next_address;
14126 	vm_map_t                        next_map;
14127 	unsigned int                    next_depth;
14128 	vm_map_offset_t                 next_max_below, next_max_above;
14129 	vm_map_offset_t                 next_skip;
14130 
14131 	boolean_t                       look_for_pages;
14132 	vm_region_submap_short_info_64_t short_info;
14133 	boolean_t                       do_region_footprint;
14134 	int                             effective_page_size, effective_page_shift;
14135 	boolean_t                       submap_needed_copy;
14136 
14137 	if (map == VM_MAP_NULL) {
14138 		/* no address space to work on */
14139 		return KERN_INVALID_ARGUMENT;
14140 	}
14141 
14142 	effective_page_shift = vm_self_region_page_shift(map);
14143 	effective_page_size = (1 << effective_page_shift);
14144 
14145 	if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
14146 		/*
14147 		 * "info" structure is not big enough and
14148 		 * would overflow
14149 		 */
14150 		return KERN_INVALID_ARGUMENT;
14151 	}
14152 
14153 	do_region_footprint = task_self_region_footprint();
14154 	original_count = *count;
14155 
14156 	if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
14157 		*count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
14158 		look_for_pages = FALSE;
14159 		short_info = (vm_region_submap_short_info_64_t) submap_info;
14160 		submap_info = NULL;
14161 	} else {
14162 		look_for_pages = TRUE;
14163 		*count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
14164 		short_info = NULL;
14165 
14166 		if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
14167 			*count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
14168 		}
14169 		if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
14170 			*count = VM_REGION_SUBMAP_INFO_V2_COUNT_64;
14171 		}
14172 	}
14173 
14174 	user_address = *address;
14175 	user_max_depth = *nesting_depth;
14176 	submap_needed_copy = FALSE;
14177 
14178 	if (not_in_kdp) {
14179 		vm_map_lock_read(map);
14180 	}
14181 
14182 recurse_again:
14183 	curr_entry = NULL;
14184 	curr_map = map;
14185 	curr_address = user_address;
14186 	curr_offset = 0;
14187 	curr_skip = 0;
14188 	curr_depth = 0;
14189 	curr_max_above = ((vm_map_offset_t) -1) - curr_address;
14190 	curr_max_below = curr_address;
14191 
14192 	next_entry = NULL;
14193 	next_map = NULL;
14194 	next_address = 0;
14195 	next_offset = 0;
14196 	next_skip = 0;
14197 	next_depth = 0;
14198 	next_max_above = (vm_map_offset_t) -1;
14199 	next_max_below = (vm_map_offset_t) -1;
14200 
14201 	for (;;) {
14202 		if (vm_map_lookup_entry(curr_map,
14203 		    curr_address,
14204 		    &tmp_entry)) {
14205 			/* tmp_entry contains the address we're looking for */
14206 			curr_entry = tmp_entry;
14207 		} else {
14208 			vm_map_offset_t skip;
14209 			/*
14210 			 * The address is not mapped.  "tmp_entry" is the
14211 			 * map entry preceding the address.  We want the next
14212 			 * one, if it exists.
14213 			 */
14214 			curr_entry = tmp_entry->vme_next;
14215 
14216 			if (curr_entry == vm_map_to_entry(curr_map) ||
14217 			    (curr_entry->vme_start >=
14218 			    curr_address + curr_max_above)) {
14219 				/* no next entry at this level: stop looking */
14220 				if (not_in_kdp) {
14221 					vm_map_unlock_read(curr_map);
14222 				}
14223 				curr_entry = NULL;
14224 				curr_map = NULL;
14225 				curr_skip = 0;
14226 				curr_offset = 0;
14227 				curr_depth = 0;
14228 				curr_max_above = 0;
14229 				curr_max_below = 0;
14230 				break;
14231 			}
14232 
14233 			/* adjust current address and offset */
14234 			skip = curr_entry->vme_start - curr_address;
14235 			curr_address = curr_entry->vme_start;
14236 			curr_skip += skip;
14237 			curr_offset += skip;
14238 			curr_max_above -= skip;
14239 			curr_max_below = 0;
14240 		}
14241 
14242 		/*
14243 		 * Is the next entry at this level closer to the address (or
14244 		 * deeper in the submap chain) than the one we had
14245 		 * so far ?
14246 		 */
14247 		tmp_entry = curr_entry->vme_next;
14248 		if (tmp_entry == vm_map_to_entry(curr_map)) {
14249 			/* no next entry at this level */
14250 		} else if (tmp_entry->vme_start >=
14251 		    curr_address + curr_max_above) {
14252 			/*
14253 			 * tmp_entry is beyond the scope of what we mapped of
14254 			 * this submap in the upper level: ignore it.
14255 			 */
14256 		} else if ((next_entry == NULL) ||
14257 		    (tmp_entry->vme_start + curr_offset <=
14258 		    next_entry->vme_start + next_offset)) {
14259 			/*
14260 			 * We didn't have a "next_entry" or this one is
14261 			 * closer to the address we're looking for:
14262 			 * use this "tmp_entry" as the new "next_entry".
14263 			 */
14264 			if (next_entry != NULL) {
14265 				/* unlock the last "next_map" */
14266 				if (next_map != curr_map && not_in_kdp) {
14267 					vm_map_unlock_read(next_map);
14268 				}
14269 			}
14270 			next_entry = tmp_entry;
14271 			next_map = curr_map;
14272 			next_depth = curr_depth;
14273 			next_address = next_entry->vme_start;
14274 			next_skip = curr_skip;
14275 			next_skip += (next_address - curr_address);
14276 			next_offset = curr_offset;
14277 			next_offset += (next_address - curr_address);
14278 			next_max_above = MIN(next_max_above, curr_max_above);
14279 			next_max_above = MIN(next_max_above,
14280 			    next_entry->vme_end - next_address);
14281 			next_max_below = MIN(next_max_below, curr_max_below);
14282 			next_max_below = MIN(next_max_below,
14283 			    next_address - next_entry->vme_start);
14284 		}
14285 
14286 		/*
14287 		 * "curr_max_{above,below}" allow us to keep track of the
14288 		 * portion of the submap that is actually mapped at this level:
14289 		 * the rest of that submap is irrelevant to us, since it's not
14290 		 * mapped here.
14291 		 * The relevant portion of the map starts at
14292 		 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
14293 		 */
14294 		curr_max_above = MIN(curr_max_above,
14295 		    curr_entry->vme_end - curr_address);
14296 		curr_max_below = MIN(curr_max_below,
14297 		    curr_address - curr_entry->vme_start);
14298 
14299 		if (!curr_entry->is_sub_map ||
14300 		    curr_depth >= user_max_depth) {
14301 			/*
14302 			 * We hit a leaf map or we reached the maximum depth
14303 			 * we could, so stop looking.  Keep the current map
14304 			 * locked.
14305 			 */
14306 			break;
14307 		}
14308 
14309 		/*
14310 		 * Get down to the next submap level.
14311 		 */
14312 
14313 		if (curr_entry->needs_copy) {
14314 			/* everything below this is effectively copy-on-write */
14315 			submap_needed_copy = TRUE;
14316 		}
14317 
14318 		/*
14319 		 * Lock the next level and unlock the current level,
14320 		 * unless we need to keep it locked to access the "next_entry"
14321 		 * later.
14322 		 */
14323 		if (not_in_kdp) {
14324 			vm_map_lock_read(VME_SUBMAP(curr_entry));
14325 		}
14326 		if (curr_map == next_map) {
14327 			/* keep "next_map" locked in case we need it */
14328 		} else {
14329 			/* release this map */
14330 			if (not_in_kdp) {
14331 				vm_map_unlock_read(curr_map);
14332 			}
14333 		}
14334 
14335 		/*
14336 		 * Adjust the offset.  "curr_entry" maps the submap
14337 		 * at relative address "curr_entry->vme_start" in the
14338 		 * curr_map but skips the first "VME_OFFSET(curr_entry)"
14339 		 * bytes of the submap.
14340 		 * "curr_offset" always represents the offset of a virtual
14341 		 * address in the curr_map relative to the absolute address
14342 		 * space (i.e. the top-level VM map).
14343 		 */
14344 		curr_offset +=
14345 		    (VME_OFFSET(curr_entry) - curr_entry->vme_start);
14346 		curr_address = user_address + curr_offset;
14347 		/* switch to the submap */
14348 		curr_map = VME_SUBMAP(curr_entry);
14349 		curr_depth++;
14350 		curr_entry = NULL;
14351 	}
14352 
14353 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
14354 // so probably should be a real 32b ID vs. ptr.
14355 // Current users just check for equality
14356 
14357 	if (curr_entry == NULL) {
14358 		/* no VM region contains the address... */
14359 
14360 		if (do_region_footprint && /* we want footprint numbers */
14361 		    next_entry == NULL && /* & there are no more regions */
14362 		    /* & we haven't already provided our fake region: */
14363 		    user_address <= vm_map_last_entry(map)->vme_end) {
14364 			ledger_amount_t ledger_resident, ledger_compressed;
14365 
14366 			/*
14367 			 * Add a fake memory region to account for
14368 			 * purgeable and/or ledger-tagged memory that
14369 			 * counts towards this task's memory footprint,
14370 			 * i.e. the resident/compressed pages of non-volatile
14371 			 * objects owned by that task.
14372 			 */
14373 			task_ledgers_footprint(map->pmap->ledger,
14374 			    &ledger_resident,
14375 			    &ledger_compressed);
14376 			if (ledger_resident + ledger_compressed == 0) {
14377 				/* no purgeable memory usage to report */
14378 				return KERN_INVALID_ADDRESS;
14379 			}
14380 			/* fake region to show nonvolatile footprint */
14381 			if (look_for_pages) {
14382 				submap_info->protection = VM_PROT_DEFAULT;
14383 				submap_info->max_protection = VM_PROT_DEFAULT;
14384 				submap_info->inheritance = VM_INHERIT_DEFAULT;
14385 				submap_info->offset = 0;
14386 				submap_info->user_tag = -1;
14387 				submap_info->pages_resident = (unsigned int) (ledger_resident / effective_page_size);
14388 				submap_info->pages_shared_now_private = 0;
14389 				submap_info->pages_swapped_out = (unsigned int) (ledger_compressed / effective_page_size);
14390 				submap_info->pages_dirtied = submap_info->pages_resident;
14391 				submap_info->ref_count = 1;
14392 				submap_info->shadow_depth = 0;
14393 				submap_info->external_pager = 0;
14394 				submap_info->share_mode = SM_PRIVATE;
14395 				if (submap_needed_copy) {
14396 					submap_info->share_mode = SM_COW;
14397 				}
14398 				submap_info->is_submap = 0;
14399 				submap_info->behavior = VM_BEHAVIOR_DEFAULT;
14400 				submap_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
14401 				submap_info->user_wired_count = 0;
14402 				submap_info->pages_reusable = 0;
14403 			} else {
14404 				short_info->user_tag = -1;
14405 				short_info->offset = 0;
14406 				short_info->protection = VM_PROT_DEFAULT;
14407 				short_info->inheritance = VM_INHERIT_DEFAULT;
14408 				short_info->max_protection = VM_PROT_DEFAULT;
14409 				short_info->behavior = VM_BEHAVIOR_DEFAULT;
14410 				short_info->user_wired_count = 0;
14411 				short_info->is_submap = 0;
14412 				short_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
14413 				short_info->external_pager = 0;
14414 				short_info->shadow_depth = 0;
14415 				short_info->share_mode = SM_PRIVATE;
14416 				if (submap_needed_copy) {
14417 					short_info->share_mode = SM_COW;
14418 				}
14419 				short_info->ref_count = 1;
14420 			}
14421 			*nesting_depth = 0;
14422 			*size = (vm_map_size_t) (ledger_resident + ledger_compressed);
14423 //			*address = user_address;
14424 			*address = vm_map_last_entry(map)->vme_end;
14425 			return KERN_SUCCESS;
14426 		}
14427 
14428 		if (next_entry == NULL) {
14429 			/* ... and no VM region follows it either */
14430 			return KERN_INVALID_ADDRESS;
14431 		}
14432 		/* ... gather info about the next VM region */
14433 		curr_entry = next_entry;
14434 		curr_map = next_map;    /* still locked ... */
14435 		curr_address = next_address;
14436 		curr_skip = next_skip;
14437 		curr_offset = next_offset;
14438 		curr_depth = next_depth;
14439 		curr_max_above = next_max_above;
14440 		curr_max_below = next_max_below;
14441 	} else {
14442 		/* we won't need "next_entry" after all */
14443 		if (next_entry != NULL) {
14444 			/* release "next_map" */
14445 			if (next_map != curr_map && not_in_kdp) {
14446 				vm_map_unlock_read(next_map);
14447 			}
14448 		}
14449 	}
14450 	next_entry = NULL;
14451 	next_map = NULL;
14452 	next_offset = 0;
14453 	next_skip = 0;
14454 	next_depth = 0;
14455 	next_max_below = -1;
14456 	next_max_above = -1;
14457 
14458 	if (curr_entry->is_sub_map &&
14459 	    curr_depth < user_max_depth) {
14460 		/*
14461 		 * We're not as deep as we could be:  we must have
14462 		 * gone back up after not finding anything mapped
14463 		 * below the original top-level map entry's.
14464 		 * Let's move "curr_address" forward and recurse again.
14465 		 */
14466 		user_address = curr_address;
14467 		goto recurse_again;
14468 	}
14469 
14470 	*nesting_depth = curr_depth;
14471 	*size = curr_max_above + curr_max_below;
14472 	*address = user_address + curr_skip - curr_max_below;
14473 
14474 	if (look_for_pages) {
14475 		submap_info->user_tag = VME_ALIAS(curr_entry);
14476 		submap_info->offset = VME_OFFSET(curr_entry);
14477 		submap_info->protection = curr_entry->protection;
14478 		submap_info->inheritance = curr_entry->inheritance;
14479 		submap_info->max_protection = curr_entry->max_protection;
14480 		submap_info->behavior = curr_entry->behavior;
14481 		submap_info->user_wired_count = curr_entry->user_wired_count;
14482 		submap_info->is_submap = curr_entry->is_sub_map;
14483 		submap_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
14484 	} else {
14485 		short_info->user_tag = VME_ALIAS(curr_entry);
14486 		short_info->offset = VME_OFFSET(curr_entry);
14487 		short_info->protection = curr_entry->protection;
14488 		short_info->inheritance = curr_entry->inheritance;
14489 		short_info->max_protection = curr_entry->max_protection;
14490 		short_info->behavior = curr_entry->behavior;
14491 		short_info->user_wired_count = curr_entry->user_wired_count;
14492 		short_info->is_submap = curr_entry->is_sub_map;
14493 		short_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
14494 	}
14495 
14496 	extended.pages_resident = 0;
14497 	extended.pages_swapped_out = 0;
14498 	extended.pages_shared_now_private = 0;
14499 	extended.pages_dirtied = 0;
14500 	extended.pages_reusable = 0;
14501 	extended.external_pager = 0;
14502 	extended.shadow_depth = 0;
14503 	extended.share_mode = SM_EMPTY;
14504 	extended.ref_count = 0;
14505 
14506 	if (not_in_kdp) {
14507 		if (!curr_entry->is_sub_map) {
14508 			vm_map_offset_t range_start, range_end;
14509 			range_start = MAX((curr_address - curr_max_below),
14510 			    curr_entry->vme_start);
14511 			range_end = MIN((curr_address + curr_max_above),
14512 			    curr_entry->vme_end);
14513 			vm_map_region_walk(curr_map,
14514 			    range_start,
14515 			    curr_entry,
14516 			    (VME_OFFSET(curr_entry) +
14517 			    (range_start -
14518 			    curr_entry->vme_start)),
14519 			    range_end - range_start,
14520 			    &extended,
14521 			    look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
14522 			if (extended.external_pager &&
14523 			    extended.ref_count == 2 &&
14524 			    extended.share_mode == SM_SHARED) {
14525 				extended.share_mode = SM_PRIVATE;
14526 			}
14527 			if (submap_needed_copy) {
14528 				extended.share_mode = SM_COW;
14529 			}
14530 		} else {
14531 			if (curr_entry->use_pmap) {
14532 				extended.share_mode = SM_TRUESHARED;
14533 			} else {
14534 				extended.share_mode = SM_PRIVATE;
14535 			}
14536 			extended.ref_count = os_ref_get_count(&VME_SUBMAP(curr_entry)->map_refcnt);
14537 		}
14538 	}
14539 
14540 	if (look_for_pages) {
14541 		submap_info->pages_resident = extended.pages_resident;
14542 		submap_info->pages_swapped_out = extended.pages_swapped_out;
14543 		submap_info->pages_shared_now_private =
14544 		    extended.pages_shared_now_private;
14545 		submap_info->pages_dirtied = extended.pages_dirtied;
14546 		submap_info->external_pager = extended.external_pager;
14547 		submap_info->shadow_depth = extended.shadow_depth;
14548 		submap_info->share_mode = extended.share_mode;
14549 		submap_info->ref_count = extended.ref_count;
14550 
14551 		if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
14552 			submap_info->pages_reusable = extended.pages_reusable;
14553 		}
14554 		if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
14555 			submap_info->object_id_full = (vm_object_id_t) (VME_OBJECT(curr_entry) != NULL) ? VM_KERNEL_ADDRPERM(VME_OBJECT(curr_entry)) : 0ULL;
14556 		}
14557 	} else {
14558 		short_info->external_pager = extended.external_pager;
14559 		short_info->shadow_depth = extended.shadow_depth;
14560 		short_info->share_mode = extended.share_mode;
14561 		short_info->ref_count = extended.ref_count;
14562 	}
14563 
14564 	if (not_in_kdp) {
14565 		vm_map_unlock_read(curr_map);
14566 	}
14567 
14568 	return KERN_SUCCESS;
14569 }
14570 
14571 /*
14572  *	vm_region:
14573  *
14574  *	User call to obtain information about a region in
14575  *	a task's address map. Currently, only one flavor is
14576  *	supported.
14577  *
14578  *	XXX The reserved and behavior fields cannot be filled
14579  *	    in until the vm merge from the IK is completed, and
14580  *	    vm_reserve is implemented.
14581  */
14582 
14583 kern_return_t
vm_map_region(vm_map_t map,vm_map_offset_t * address,vm_map_size_t * size,vm_region_flavor_t flavor,vm_region_info_t info,mach_msg_type_number_t * count,mach_port_t * object_name)14584 vm_map_region(
14585 	vm_map_t                 map,
14586 	vm_map_offset_t *address,               /* IN/OUT */
14587 	vm_map_size_t           *size,                  /* OUT */
14588 	vm_region_flavor_t       flavor,                /* IN */
14589 	vm_region_info_t         info,                  /* OUT */
14590 	mach_msg_type_number_t  *count, /* IN/OUT */
14591 	mach_port_t             *object_name)           /* OUT */
14592 {
14593 	vm_map_entry_t          tmp_entry;
14594 	vm_map_entry_t          entry;
14595 	vm_map_offset_t         start;
14596 
14597 	if (map == VM_MAP_NULL) {
14598 		return KERN_INVALID_ARGUMENT;
14599 	}
14600 
14601 	switch (flavor) {
14602 	case VM_REGION_BASIC_INFO:
14603 		/* legacy for old 32-bit objects info */
14604 	{
14605 		vm_region_basic_info_t  basic;
14606 
14607 		if (*count < VM_REGION_BASIC_INFO_COUNT) {
14608 			return KERN_INVALID_ARGUMENT;
14609 		}
14610 
14611 		basic = (vm_region_basic_info_t) info;
14612 		*count = VM_REGION_BASIC_INFO_COUNT;
14613 
14614 		vm_map_lock_read(map);
14615 
14616 		start = *address;
14617 		if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14618 			if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14619 				vm_map_unlock_read(map);
14620 				return KERN_INVALID_ADDRESS;
14621 			}
14622 		} else {
14623 			entry = tmp_entry;
14624 		}
14625 
14626 		start = entry->vme_start;
14627 
14628 		basic->offset = (uint32_t)VME_OFFSET(entry);
14629 		basic->protection = entry->protection;
14630 		basic->inheritance = entry->inheritance;
14631 		basic->max_protection = entry->max_protection;
14632 		basic->behavior = entry->behavior;
14633 		basic->user_wired_count = entry->user_wired_count;
14634 		basic->reserved = entry->is_sub_map;
14635 		*address = start;
14636 		*size = (entry->vme_end - start);
14637 
14638 		if (object_name) {
14639 			*object_name = IP_NULL;
14640 		}
14641 		if (entry->is_sub_map) {
14642 			basic->shared = FALSE;
14643 		} else {
14644 			basic->shared = entry->is_shared;
14645 		}
14646 
14647 		vm_map_unlock_read(map);
14648 		return KERN_SUCCESS;
14649 	}
14650 
14651 	case VM_REGION_BASIC_INFO_64:
14652 	{
14653 		vm_region_basic_info_64_t       basic;
14654 
14655 		if (*count < VM_REGION_BASIC_INFO_COUNT_64) {
14656 			return KERN_INVALID_ARGUMENT;
14657 		}
14658 
14659 		basic = (vm_region_basic_info_64_t) info;
14660 		*count = VM_REGION_BASIC_INFO_COUNT_64;
14661 
14662 		vm_map_lock_read(map);
14663 
14664 		start = *address;
14665 		if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14666 			if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14667 				vm_map_unlock_read(map);
14668 				return KERN_INVALID_ADDRESS;
14669 			}
14670 		} else {
14671 			entry = tmp_entry;
14672 		}
14673 
14674 		start = entry->vme_start;
14675 
14676 		basic->offset = VME_OFFSET(entry);
14677 		basic->protection = entry->protection;
14678 		basic->inheritance = entry->inheritance;
14679 		basic->max_protection = entry->max_protection;
14680 		basic->behavior = entry->behavior;
14681 		basic->user_wired_count = entry->user_wired_count;
14682 		basic->reserved = entry->is_sub_map;
14683 		*address = start;
14684 		*size = (entry->vme_end - start);
14685 
14686 		if (object_name) {
14687 			*object_name = IP_NULL;
14688 		}
14689 		if (entry->is_sub_map) {
14690 			basic->shared = FALSE;
14691 		} else {
14692 			basic->shared = entry->is_shared;
14693 		}
14694 
14695 		vm_map_unlock_read(map);
14696 		return KERN_SUCCESS;
14697 	}
14698 	case VM_REGION_EXTENDED_INFO:
14699 		if (*count < VM_REGION_EXTENDED_INFO_COUNT) {
14700 			return KERN_INVALID_ARGUMENT;
14701 		}
14702 		OS_FALLTHROUGH;
14703 	case VM_REGION_EXTENDED_INFO__legacy:
14704 		if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy) {
14705 			return KERN_INVALID_ARGUMENT;
14706 		}
14707 
14708 		{
14709 			vm_region_extended_info_t       extended;
14710 			mach_msg_type_number_t original_count;
14711 			int effective_page_size, effective_page_shift;
14712 
14713 			extended = (vm_region_extended_info_t) info;
14714 
14715 			effective_page_shift = vm_self_region_page_shift(map);
14716 			effective_page_size = (1 << effective_page_shift);
14717 
14718 			vm_map_lock_read(map);
14719 
14720 			start = *address;
14721 			if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14722 				if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14723 					vm_map_unlock_read(map);
14724 					return KERN_INVALID_ADDRESS;
14725 				}
14726 			} else {
14727 				entry = tmp_entry;
14728 			}
14729 			start = entry->vme_start;
14730 
14731 			extended->protection = entry->protection;
14732 			extended->user_tag = VME_ALIAS(entry);
14733 			extended->pages_resident = 0;
14734 			extended->pages_swapped_out = 0;
14735 			extended->pages_shared_now_private = 0;
14736 			extended->pages_dirtied = 0;
14737 			extended->external_pager = 0;
14738 			extended->shadow_depth = 0;
14739 
14740 			original_count = *count;
14741 			if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
14742 				*count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
14743 			} else {
14744 				extended->pages_reusable = 0;
14745 				*count = VM_REGION_EXTENDED_INFO_COUNT;
14746 			}
14747 
14748 			vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
14749 
14750 			if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) {
14751 				extended->share_mode = SM_PRIVATE;
14752 			}
14753 
14754 			if (object_name) {
14755 				*object_name = IP_NULL;
14756 			}
14757 			*address = start;
14758 			*size = (entry->vme_end - start);
14759 
14760 			vm_map_unlock_read(map);
14761 			return KERN_SUCCESS;
14762 		}
14763 	case VM_REGION_TOP_INFO:
14764 	{
14765 		vm_region_top_info_t    top;
14766 
14767 		if (*count < VM_REGION_TOP_INFO_COUNT) {
14768 			return KERN_INVALID_ARGUMENT;
14769 		}
14770 
14771 		top = (vm_region_top_info_t) info;
14772 		*count = VM_REGION_TOP_INFO_COUNT;
14773 
14774 		vm_map_lock_read(map);
14775 
14776 		start = *address;
14777 		if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14778 			if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14779 				vm_map_unlock_read(map);
14780 				return KERN_INVALID_ADDRESS;
14781 			}
14782 		} else {
14783 			entry = tmp_entry;
14784 		}
14785 		start = entry->vme_start;
14786 
14787 		top->private_pages_resident = 0;
14788 		top->shared_pages_resident = 0;
14789 
14790 		vm_map_region_top_walk(entry, top);
14791 
14792 		if (object_name) {
14793 			*object_name = IP_NULL;
14794 		}
14795 		*address = start;
14796 		*size = (entry->vme_end - start);
14797 
14798 		vm_map_unlock_read(map);
14799 		return KERN_SUCCESS;
14800 	}
14801 	default:
14802 		return KERN_INVALID_ARGUMENT;
14803 	}
14804 }
14805 
14806 #define OBJ_RESIDENT_COUNT(obj, entry_size)                             \
14807 	MIN((entry_size),                                               \
14808 	    ((obj)->all_reusable ?                                      \
14809 	     (obj)->wired_page_count :                                  \
14810 	     (obj)->resident_page_count - (obj)->reusable_page_count))
14811 
14812 void
vm_map_region_top_walk(vm_map_entry_t entry,vm_region_top_info_t top)14813 vm_map_region_top_walk(
14814 	vm_map_entry_t             entry,
14815 	vm_region_top_info_t       top)
14816 {
14817 	if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
14818 		top->share_mode = SM_EMPTY;
14819 		top->ref_count = 0;
14820 		top->obj_id = 0;
14821 		return;
14822 	}
14823 
14824 	{
14825 		struct  vm_object *obj, *tmp_obj;
14826 		int             ref_count;
14827 		uint32_t        entry_size;
14828 
14829 		entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
14830 
14831 		obj = VME_OBJECT(entry);
14832 
14833 		vm_object_lock(obj);
14834 
14835 		if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14836 			ref_count--;
14837 		}
14838 
14839 		assert(obj->reusable_page_count <= obj->resident_page_count);
14840 		if (obj->shadow) {
14841 			if (ref_count == 1) {
14842 				top->private_pages_resident =
14843 				    OBJ_RESIDENT_COUNT(obj, entry_size);
14844 			} else {
14845 				top->shared_pages_resident =
14846 				    OBJ_RESIDENT_COUNT(obj, entry_size);
14847 			}
14848 			top->ref_count  = ref_count;
14849 			top->share_mode = SM_COW;
14850 
14851 			while ((tmp_obj = obj->shadow)) {
14852 				vm_object_lock(tmp_obj);
14853 				vm_object_unlock(obj);
14854 				obj = tmp_obj;
14855 
14856 				if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14857 					ref_count--;
14858 				}
14859 
14860 				assert(obj->reusable_page_count <= obj->resident_page_count);
14861 				top->shared_pages_resident +=
14862 				    OBJ_RESIDENT_COUNT(obj, entry_size);
14863 				top->ref_count += ref_count - 1;
14864 			}
14865 		} else {
14866 			if (entry->superpage_size) {
14867 				top->share_mode = SM_LARGE_PAGE;
14868 				top->shared_pages_resident = 0;
14869 				top->private_pages_resident = entry_size;
14870 			} else if (entry->needs_copy) {
14871 				top->share_mode = SM_COW;
14872 				top->shared_pages_resident =
14873 				    OBJ_RESIDENT_COUNT(obj, entry_size);
14874 			} else {
14875 				if (ref_count == 1 ||
14876 				    (ref_count == 2 && obj->named)) {
14877 					top->share_mode = SM_PRIVATE;
14878 					top->private_pages_resident =
14879 					    OBJ_RESIDENT_COUNT(obj,
14880 					    entry_size);
14881 				} else {
14882 					top->share_mode = SM_SHARED;
14883 					top->shared_pages_resident =
14884 					    OBJ_RESIDENT_COUNT(obj,
14885 					    entry_size);
14886 				}
14887 			}
14888 			top->ref_count = ref_count;
14889 		}
14890 		/* XXX K64: obj_id will be truncated */
14891 		top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
14892 
14893 		vm_object_unlock(obj);
14894 	}
14895 }
14896 
14897 void
vm_map_region_walk(vm_map_t map,vm_map_offset_t va,vm_map_entry_t entry,vm_object_offset_t offset,vm_object_size_t range,vm_region_extended_info_t extended,boolean_t look_for_pages,mach_msg_type_number_t count)14898 vm_map_region_walk(
14899 	vm_map_t                        map,
14900 	vm_map_offset_t                 va,
14901 	vm_map_entry_t                  entry,
14902 	vm_object_offset_t              offset,
14903 	vm_object_size_t                range,
14904 	vm_region_extended_info_t       extended,
14905 	boolean_t                       look_for_pages,
14906 	mach_msg_type_number_t count)
14907 {
14908 	struct vm_object *obj, *tmp_obj;
14909 	vm_map_offset_t       last_offset;
14910 	int               i;
14911 	int               ref_count;
14912 	struct vm_object        *shadow_object;
14913 	unsigned short          shadow_depth;
14914 	boolean_t         do_region_footprint;
14915 	int                     effective_page_size, effective_page_shift;
14916 	vm_map_offset_t         effective_page_mask;
14917 
14918 	do_region_footprint = task_self_region_footprint();
14919 
14920 	if ((VME_OBJECT(entry) == 0) ||
14921 	    (entry->is_sub_map) ||
14922 	    (VME_OBJECT(entry)->phys_contiguous &&
14923 	    !entry->superpage_size)) {
14924 		extended->share_mode = SM_EMPTY;
14925 		extended->ref_count = 0;
14926 		return;
14927 	}
14928 
14929 	if (entry->superpage_size) {
14930 		extended->shadow_depth = 0;
14931 		extended->share_mode = SM_LARGE_PAGE;
14932 		extended->ref_count = 1;
14933 		extended->external_pager = 0;
14934 
14935 		/* TODO4K: Superpage in 4k mode? */
14936 		extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
14937 		extended->shadow_depth = 0;
14938 		return;
14939 	}
14940 
14941 	effective_page_shift = vm_self_region_page_shift(map);
14942 	effective_page_size = (1 << effective_page_shift);
14943 	effective_page_mask = effective_page_size - 1;
14944 
14945 	offset = vm_map_trunc_page(offset, effective_page_mask);
14946 
14947 	obj = VME_OBJECT(entry);
14948 
14949 	vm_object_lock(obj);
14950 
14951 	if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14952 		ref_count--;
14953 	}
14954 
14955 	if (look_for_pages) {
14956 		for (last_offset = offset + range;
14957 		    offset < last_offset;
14958 		    offset += effective_page_size, va += effective_page_size) {
14959 			if (do_region_footprint) {
14960 				int disp;
14961 
14962 				disp = 0;
14963 				if (map->has_corpse_footprint) {
14964 					/*
14965 					 * Query the page info data we saved
14966 					 * while forking the corpse.
14967 					 */
14968 					vm_map_corpse_footprint_query_page_info(
14969 						map,
14970 						va,
14971 						&disp);
14972 				} else {
14973 					/*
14974 					 * Query the pmap.
14975 					 */
14976 					vm_map_footprint_query_page_info(
14977 						map,
14978 						entry,
14979 						va,
14980 						&disp);
14981 				}
14982 				if (disp & VM_PAGE_QUERY_PAGE_PRESENT) {
14983 					extended->pages_resident++;
14984 				}
14985 				if (disp & VM_PAGE_QUERY_PAGE_REUSABLE) {
14986 					extended->pages_reusable++;
14987 				}
14988 				if (disp & VM_PAGE_QUERY_PAGE_DIRTY) {
14989 					extended->pages_dirtied++;
14990 				}
14991 				if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
14992 					extended->pages_swapped_out++;
14993 				}
14994 				continue;
14995 			}
14996 
14997 			vm_map_region_look_for_page(map, va, obj,
14998 			    vm_object_trunc_page(offset), ref_count,
14999 			    0, extended, count);
15000 		}
15001 
15002 		if (do_region_footprint) {
15003 			goto collect_object_info;
15004 		}
15005 	} else {
15006 collect_object_info:
15007 		shadow_object = obj->shadow;
15008 		shadow_depth = 0;
15009 
15010 		if (!(obj->internal)) {
15011 			extended->external_pager = 1;
15012 		}
15013 
15014 		if (shadow_object != VM_OBJECT_NULL) {
15015 			vm_object_lock(shadow_object);
15016 			for (;
15017 			    shadow_object != VM_OBJECT_NULL;
15018 			    shadow_depth++) {
15019 				vm_object_t     next_shadow;
15020 
15021 				if (!(shadow_object->internal)) {
15022 					extended->external_pager = 1;
15023 				}
15024 
15025 				next_shadow = shadow_object->shadow;
15026 				if (next_shadow) {
15027 					vm_object_lock(next_shadow);
15028 				}
15029 				vm_object_unlock(shadow_object);
15030 				shadow_object = next_shadow;
15031 			}
15032 		}
15033 		extended->shadow_depth = shadow_depth;
15034 	}
15035 
15036 	if (extended->shadow_depth || entry->needs_copy) {
15037 		extended->share_mode = SM_COW;
15038 	} else {
15039 		if (ref_count == 1) {
15040 			extended->share_mode = SM_PRIVATE;
15041 		} else {
15042 			if (obj->true_share) {
15043 				extended->share_mode = SM_TRUESHARED;
15044 			} else {
15045 				extended->share_mode = SM_SHARED;
15046 			}
15047 		}
15048 	}
15049 	extended->ref_count = ref_count - extended->shadow_depth;
15050 
15051 	for (i = 0; i < extended->shadow_depth; i++) {
15052 		if ((tmp_obj = obj->shadow) == 0) {
15053 			break;
15054 		}
15055 		vm_object_lock(tmp_obj);
15056 		vm_object_unlock(obj);
15057 
15058 		if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) {
15059 			ref_count--;
15060 		}
15061 
15062 		extended->ref_count += ref_count;
15063 		obj = tmp_obj;
15064 	}
15065 	vm_object_unlock(obj);
15066 
15067 	if (extended->share_mode == SM_SHARED) {
15068 		vm_map_entry_t       cur;
15069 		vm_map_entry_t       last;
15070 		int      my_refs;
15071 
15072 		obj = VME_OBJECT(entry);
15073 		last = vm_map_to_entry(map);
15074 		my_refs = 0;
15075 
15076 		if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
15077 			ref_count--;
15078 		}
15079 		for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) {
15080 			my_refs += vm_map_region_count_obj_refs(cur, obj);
15081 		}
15082 
15083 		if (my_refs == ref_count) {
15084 			extended->share_mode = SM_PRIVATE_ALIASED;
15085 		} else if (my_refs > 1) {
15086 			extended->share_mode = SM_SHARED_ALIASED;
15087 		}
15088 	}
15089 }
15090 
15091 
15092 /* object is locked on entry and locked on return */
15093 
15094 
15095 static void
vm_map_region_look_for_page(__unused vm_map_t map,__unused vm_map_offset_t va,vm_object_t object,vm_object_offset_t offset,int max_refcnt,unsigned short depth,vm_region_extended_info_t extended,mach_msg_type_number_t count)15096 vm_map_region_look_for_page(
15097 	__unused vm_map_t               map,
15098 	__unused vm_map_offset_t        va,
15099 	vm_object_t                     object,
15100 	vm_object_offset_t              offset,
15101 	int                             max_refcnt,
15102 	unsigned short                  depth,
15103 	vm_region_extended_info_t       extended,
15104 	mach_msg_type_number_t count)
15105 {
15106 	vm_page_t       p;
15107 	vm_object_t     shadow;
15108 	int             ref_count;
15109 	vm_object_t     caller_object;
15110 
15111 	shadow = object->shadow;
15112 	caller_object = object;
15113 
15114 
15115 	while (TRUE) {
15116 		if (!(object->internal)) {
15117 			extended->external_pager = 1;
15118 		}
15119 
15120 		if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
15121 			if (shadow && (max_refcnt == 1)) {
15122 				extended->pages_shared_now_private++;
15123 			}
15124 
15125 			if (!p->vmp_fictitious &&
15126 			    (p->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p)))) {
15127 				extended->pages_dirtied++;
15128 			} else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
15129 				if (p->vmp_reusable || object->all_reusable) {
15130 					extended->pages_reusable++;
15131 				}
15132 			}
15133 
15134 			extended->pages_resident++;
15135 
15136 			if (object != caller_object) {
15137 				vm_object_unlock(object);
15138 			}
15139 
15140 			return;
15141 		}
15142 		if (object->internal &&
15143 		    object->alive &&
15144 		    !object->terminating &&
15145 		    object->pager_ready) {
15146 			if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
15147 			    == VM_EXTERNAL_STATE_EXISTS) {
15148 				/* the pager has that page */
15149 				extended->pages_swapped_out++;
15150 				if (object != caller_object) {
15151 					vm_object_unlock(object);
15152 				}
15153 				return;
15154 			}
15155 		}
15156 
15157 		if (shadow) {
15158 			vm_object_lock(shadow);
15159 
15160 			if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) {
15161 				ref_count--;
15162 			}
15163 
15164 			if (++depth > extended->shadow_depth) {
15165 				extended->shadow_depth = depth;
15166 			}
15167 
15168 			if (ref_count > max_refcnt) {
15169 				max_refcnt = ref_count;
15170 			}
15171 
15172 			if (object != caller_object) {
15173 				vm_object_unlock(object);
15174 			}
15175 
15176 			offset = offset + object->vo_shadow_offset;
15177 			object = shadow;
15178 			shadow = object->shadow;
15179 			continue;
15180 		}
15181 		if (object != caller_object) {
15182 			vm_object_unlock(object);
15183 		}
15184 		break;
15185 	}
15186 }
15187 
15188 static int
vm_map_region_count_obj_refs(vm_map_entry_t entry,vm_object_t object)15189 vm_map_region_count_obj_refs(
15190 	vm_map_entry_t    entry,
15191 	vm_object_t       object)
15192 {
15193 	int ref_count;
15194 	vm_object_t chk_obj;
15195 	vm_object_t tmp_obj;
15196 
15197 	if (VME_OBJECT(entry) == 0) {
15198 		return 0;
15199 	}
15200 
15201 	if (entry->is_sub_map) {
15202 		return 0;
15203 	} else {
15204 		ref_count = 0;
15205 
15206 		chk_obj = VME_OBJECT(entry);
15207 		vm_object_lock(chk_obj);
15208 
15209 		while (chk_obj) {
15210 			if (chk_obj == object) {
15211 				ref_count++;
15212 			}
15213 			tmp_obj = chk_obj->shadow;
15214 			if (tmp_obj) {
15215 				vm_object_lock(tmp_obj);
15216 			}
15217 			vm_object_unlock(chk_obj);
15218 
15219 			chk_obj = tmp_obj;
15220 		}
15221 	}
15222 	return ref_count;
15223 }
15224 
15225 
15226 /*
15227  *	Routine:	vm_map_simplify
15228  *
15229  *	Description:
15230  *		Attempt to simplify the map representation in
15231  *		the vicinity of the given starting address.
15232  *	Note:
15233  *		This routine is intended primarily to keep the
15234  *		kernel maps more compact -- they generally don't
15235  *		benefit from the "expand a map entry" technology
15236  *		at allocation time because the adjacent entry
15237  *		is often wired down.
15238  */
15239 void
vm_map_simplify_entry(vm_map_t map,vm_map_entry_t this_entry)15240 vm_map_simplify_entry(
15241 	vm_map_t        map,
15242 	vm_map_entry_t  this_entry)
15243 {
15244 	vm_map_entry_t  prev_entry;
15245 
15246 	prev_entry = this_entry->vme_prev;
15247 
15248 	if ((this_entry != vm_map_to_entry(map)) &&
15249 	    (prev_entry != vm_map_to_entry(map)) &&
15250 
15251 	    (prev_entry->vme_end == this_entry->vme_start) &&
15252 
15253 	    (prev_entry->is_sub_map == this_entry->is_sub_map) &&
15254 	    (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
15255 	    ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
15256 	    prev_entry->vme_start))
15257 	    == VME_OFFSET(this_entry)) &&
15258 
15259 	    (prev_entry->behavior == this_entry->behavior) &&
15260 	    (prev_entry->needs_copy == this_entry->needs_copy) &&
15261 	    (prev_entry->protection == this_entry->protection) &&
15262 	    (prev_entry->max_protection == this_entry->max_protection) &&
15263 	    (prev_entry->inheritance == this_entry->inheritance) &&
15264 	    (prev_entry->use_pmap == this_entry->use_pmap) &&
15265 	    (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
15266 	    (prev_entry->no_cache == this_entry->no_cache) &&
15267 	    (prev_entry->permanent == this_entry->permanent) &&
15268 	    (prev_entry->map_aligned == this_entry->map_aligned) &&
15269 	    (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
15270 	    (prev_entry->used_for_jit == this_entry->used_for_jit) &&
15271 	    (prev_entry->pmap_cs_associated == this_entry->pmap_cs_associated) &&
15272 	    (prev_entry->iokit_acct == this_entry->iokit_acct) &&
15273 	    (prev_entry->vme_resilient_codesign ==
15274 	    this_entry->vme_resilient_codesign) &&
15275 	    (prev_entry->vme_resilient_media ==
15276 	    this_entry->vme_resilient_media) &&
15277 	    (prev_entry->vme_no_copy_on_read == this_entry->vme_no_copy_on_read) &&
15278 
15279 	    (prev_entry->wired_count == this_entry->wired_count) &&
15280 	    (prev_entry->user_wired_count == this_entry->user_wired_count) &&
15281 
15282 	    ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
15283 	    (prev_entry->in_transition == FALSE) &&
15284 	    (this_entry->in_transition == FALSE) &&
15285 	    (prev_entry->needs_wakeup == FALSE) &&
15286 	    (this_entry->needs_wakeup == FALSE) &&
15287 	    (prev_entry->is_shared == this_entry->is_shared) &&
15288 	    (prev_entry->superpage_size == FALSE) &&
15289 	    (this_entry->superpage_size == FALSE)
15290 	    ) {
15291 		vm_map_store_entry_unlink(map, prev_entry);
15292 		assert(prev_entry->vme_start < this_entry->vme_end);
15293 		if (prev_entry->map_aligned) {
15294 			assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
15295 			    VM_MAP_PAGE_MASK(map)));
15296 		}
15297 		this_entry->vme_start = prev_entry->vme_start;
15298 		VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
15299 
15300 		if (map->holelistenabled) {
15301 			vm_map_store_update_first_free(map, this_entry, TRUE);
15302 		}
15303 
15304 		if (prev_entry->is_sub_map) {
15305 			vm_map_deallocate(VME_SUBMAP(prev_entry));
15306 		} else {
15307 			vm_object_deallocate(VME_OBJECT(prev_entry));
15308 		}
15309 		vm_map_entry_dispose(map, prev_entry);
15310 		SAVE_HINT_MAP_WRITE(map, this_entry);
15311 	}
15312 }
15313 
15314 void
vm_map_simplify(vm_map_t map,vm_map_offset_t start)15315 vm_map_simplify(
15316 	vm_map_t        map,
15317 	vm_map_offset_t start)
15318 {
15319 	vm_map_entry_t  this_entry;
15320 
15321 	vm_map_lock(map);
15322 	if (vm_map_lookup_entry(map, start, &this_entry)) {
15323 		vm_map_simplify_entry(map, this_entry);
15324 		vm_map_simplify_entry(map, this_entry->vme_next);
15325 	}
15326 	vm_map_unlock(map);
15327 }
15328 
15329 static void
vm_map_simplify_range(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)15330 vm_map_simplify_range(
15331 	vm_map_t        map,
15332 	vm_map_offset_t start,
15333 	vm_map_offset_t end)
15334 {
15335 	vm_map_entry_t  entry;
15336 
15337 	/*
15338 	 * The map should be locked (for "write") by the caller.
15339 	 */
15340 
15341 	if (start >= end) {
15342 		/* invalid address range */
15343 		return;
15344 	}
15345 
15346 	start = vm_map_trunc_page(start,
15347 	    VM_MAP_PAGE_MASK(map));
15348 	end = vm_map_round_page(end,
15349 	    VM_MAP_PAGE_MASK(map));
15350 
15351 	if (!vm_map_lookup_entry(map, start, &entry)) {
15352 		/* "start" is not mapped and "entry" ends before "start" */
15353 		if (entry == vm_map_to_entry(map)) {
15354 			/* start with first entry in the map */
15355 			entry = vm_map_first_entry(map);
15356 		} else {
15357 			/* start with next entry */
15358 			entry = entry->vme_next;
15359 		}
15360 	}
15361 
15362 	while (entry != vm_map_to_entry(map) &&
15363 	    entry->vme_start <= end) {
15364 		/* try and coalesce "entry" with its previous entry */
15365 		vm_map_simplify_entry(map, entry);
15366 		entry = entry->vme_next;
15367 	}
15368 }
15369 
15370 
15371 /*
15372  *	Routine:	vm_map_machine_attribute
15373  *	Purpose:
15374  *		Provide machine-specific attributes to mappings,
15375  *		such as cachability etc. for machines that provide
15376  *		them.  NUMA architectures and machines with big/strange
15377  *		caches will use this.
15378  *	Note:
15379  *		Responsibilities for locking and checking are handled here,
15380  *		everything else in the pmap module. If any non-volatile
15381  *		information must be kept, the pmap module should handle
15382  *		it itself. [This assumes that attributes do not
15383  *		need to be inherited, which seems ok to me]
15384  */
15385 kern_return_t
vm_map_machine_attribute(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_machine_attribute_t attribute,vm_machine_attribute_val_t * value)15386 vm_map_machine_attribute(
15387 	vm_map_t                        map,
15388 	vm_map_offset_t         start,
15389 	vm_map_offset_t         end,
15390 	vm_machine_attribute_t  attribute,
15391 	vm_machine_attribute_val_t* value)              /* IN/OUT */
15392 {
15393 	kern_return_t   ret;
15394 	vm_map_size_t sync_size;
15395 	vm_map_entry_t entry;
15396 
15397 	if (start < vm_map_min(map) || end > vm_map_max(map)) {
15398 		return KERN_INVALID_ADDRESS;
15399 	}
15400 
15401 	/* Figure how much memory we need to flush (in page increments) */
15402 	sync_size = end - start;
15403 
15404 	vm_map_lock(map);
15405 
15406 	if (attribute != MATTR_CACHE) {
15407 		/* If we don't have to find physical addresses, we */
15408 		/* don't have to do an explicit traversal here.    */
15409 		ret = pmap_attribute(map->pmap, start, end - start,
15410 		    attribute, value);
15411 		vm_map_unlock(map);
15412 		return ret;
15413 	}
15414 
15415 	ret = KERN_SUCCESS;                                                                             /* Assume it all worked */
15416 
15417 	while (sync_size) {
15418 		if (vm_map_lookup_entry(map, start, &entry)) {
15419 			vm_map_size_t   sub_size;
15420 			if ((entry->vme_end - start) > sync_size) {
15421 				sub_size = sync_size;
15422 				sync_size = 0;
15423 			} else {
15424 				sub_size = entry->vme_end - start;
15425 				sync_size -= sub_size;
15426 			}
15427 			if (entry->is_sub_map) {
15428 				vm_map_offset_t sub_start;
15429 				vm_map_offset_t sub_end;
15430 
15431 				sub_start = (start - entry->vme_start)
15432 				    + VME_OFFSET(entry);
15433 				sub_end = sub_start + sub_size;
15434 				vm_map_machine_attribute(
15435 					VME_SUBMAP(entry),
15436 					sub_start,
15437 					sub_end,
15438 					attribute, value);
15439 			} else {
15440 				if (VME_OBJECT(entry)) {
15441 					vm_page_t               m;
15442 					vm_object_t             object;
15443 					vm_object_t             base_object;
15444 					vm_object_t             last_object;
15445 					vm_object_offset_t      offset;
15446 					vm_object_offset_t      base_offset;
15447 					vm_map_size_t           range;
15448 					range = sub_size;
15449 					offset = (start - entry->vme_start)
15450 					    + VME_OFFSET(entry);
15451 					offset = vm_object_trunc_page(offset);
15452 					base_offset = offset;
15453 					object = VME_OBJECT(entry);
15454 					base_object = object;
15455 					last_object = NULL;
15456 
15457 					vm_object_lock(object);
15458 
15459 					while (range) {
15460 						m = vm_page_lookup(
15461 							object, offset);
15462 
15463 						if (m && !m->vmp_fictitious) {
15464 							ret =
15465 							    pmap_attribute_cache_sync(
15466 								VM_PAGE_GET_PHYS_PAGE(m),
15467 								PAGE_SIZE,
15468 								attribute, value);
15469 						} else if (object->shadow) {
15470 							offset = offset + object->vo_shadow_offset;
15471 							last_object = object;
15472 							object = object->shadow;
15473 							vm_object_lock(last_object->shadow);
15474 							vm_object_unlock(last_object);
15475 							continue;
15476 						}
15477 						if (range < PAGE_SIZE) {
15478 							range = 0;
15479 						} else {
15480 							range -= PAGE_SIZE;
15481 						}
15482 
15483 						if (base_object != object) {
15484 							vm_object_unlock(object);
15485 							vm_object_lock(base_object);
15486 							object = base_object;
15487 						}
15488 						/* Bump to the next page */
15489 						base_offset += PAGE_SIZE;
15490 						offset = base_offset;
15491 					}
15492 					vm_object_unlock(object);
15493 				}
15494 			}
15495 			start += sub_size;
15496 		} else {
15497 			vm_map_unlock(map);
15498 			return KERN_FAILURE;
15499 		}
15500 	}
15501 
15502 	vm_map_unlock(map);
15503 
15504 	return ret;
15505 }
15506 
15507 /*
15508  *	vm_map_behavior_set:
15509  *
15510  *	Sets the paging reference behavior of the specified address
15511  *	range in the target map.  Paging reference behavior affects
15512  *	how pagein operations resulting from faults on the map will be
15513  *	clustered.
15514  */
15515 kern_return_t
vm_map_behavior_set(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_behavior_t new_behavior)15516 vm_map_behavior_set(
15517 	vm_map_t        map,
15518 	vm_map_offset_t start,
15519 	vm_map_offset_t end,
15520 	vm_behavior_t   new_behavior)
15521 {
15522 	vm_map_entry_t  entry;
15523 	vm_map_entry_t  temp_entry;
15524 
15525 	if (start > end ||
15526 	    start < vm_map_min(map) ||
15527 	    end > vm_map_max(map)) {
15528 		return KERN_NO_SPACE;
15529 	}
15530 
15531 	switch (new_behavior) {
15532 	/*
15533 	 * This first block of behaviors all set a persistent state on the specified
15534 	 * memory range.  All we have to do here is to record the desired behavior
15535 	 * in the vm_map_entry_t's.
15536 	 */
15537 
15538 	case VM_BEHAVIOR_DEFAULT:
15539 	case VM_BEHAVIOR_RANDOM:
15540 	case VM_BEHAVIOR_SEQUENTIAL:
15541 	case VM_BEHAVIOR_RSEQNTL:
15542 	case VM_BEHAVIOR_ZERO_WIRED_PAGES:
15543 		vm_map_lock(map);
15544 
15545 		/*
15546 		 *	The entire address range must be valid for the map.
15547 		 *      Note that vm_map_range_check() does a
15548 		 *	vm_map_lookup_entry() internally and returns the
15549 		 *	entry containing the start of the address range if
15550 		 *	the entire range is valid.
15551 		 */
15552 		if (vm_map_range_check(map, start, end, &temp_entry)) {
15553 			entry = temp_entry;
15554 			vm_map_clip_start(map, entry, start);
15555 		} else {
15556 			vm_map_unlock(map);
15557 			return KERN_INVALID_ADDRESS;
15558 		}
15559 
15560 		while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
15561 			vm_map_clip_end(map, entry, end);
15562 			if (entry->is_sub_map) {
15563 				assert(!entry->use_pmap);
15564 			}
15565 
15566 			if (new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES) {
15567 				entry->zero_wired_pages = TRUE;
15568 			} else {
15569 				entry->behavior = new_behavior;
15570 			}
15571 			entry = entry->vme_next;
15572 		}
15573 
15574 		vm_map_unlock(map);
15575 		break;
15576 
15577 	/*
15578 	 * The rest of these are different from the above in that they cause
15579 	 * an immediate action to take place as opposed to setting a behavior that
15580 	 * affects future actions.
15581 	 */
15582 
15583 	case VM_BEHAVIOR_WILLNEED:
15584 		return vm_map_willneed(map, start, end);
15585 
15586 	case VM_BEHAVIOR_DONTNEED:
15587 		return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
15588 
15589 	case VM_BEHAVIOR_FREE:
15590 		return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
15591 
15592 	case VM_BEHAVIOR_REUSABLE:
15593 		return vm_map_reusable_pages(map, start, end);
15594 
15595 	case VM_BEHAVIOR_REUSE:
15596 		return vm_map_reuse_pages(map, start, end);
15597 
15598 	case VM_BEHAVIOR_CAN_REUSE:
15599 		return vm_map_can_reuse(map, start, end);
15600 
15601 #if MACH_ASSERT
15602 	case VM_BEHAVIOR_PAGEOUT:
15603 		return vm_map_pageout(map, start, end);
15604 #endif /* MACH_ASSERT */
15605 
15606 	default:
15607 		return KERN_INVALID_ARGUMENT;
15608 	}
15609 
15610 	return KERN_SUCCESS;
15611 }
15612 
15613 
15614 /*
15615  * Internals for madvise(MADV_WILLNEED) system call.
15616  *
15617  * The implementation is to do:-
15618  * a) read-ahead if the mapping corresponds to a mapped regular file
15619  * b) or, fault in the pages (zero-fill, decompress etc) if it's an anonymous mapping
15620  */
15621 
15622 
15623 static kern_return_t
vm_map_willneed(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)15624 vm_map_willneed(
15625 	vm_map_t        map,
15626 	vm_map_offset_t start,
15627 	vm_map_offset_t end
15628 	)
15629 {
15630 	vm_map_entry_t                  entry;
15631 	vm_object_t                     object;
15632 	memory_object_t                 pager;
15633 	struct vm_object_fault_info     fault_info = {};
15634 	kern_return_t                   kr;
15635 	vm_object_size_t                len;
15636 	vm_object_offset_t              offset;
15637 
15638 	fault_info.interruptible = THREAD_UNINT;        /* ignored value */
15639 	fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
15640 	fault_info.stealth       = TRUE;
15641 
15642 	/*
15643 	 * The MADV_WILLNEED operation doesn't require any changes to the
15644 	 * vm_map_entry_t's, so the read lock is sufficient.
15645 	 */
15646 
15647 	vm_map_lock_read(map);
15648 
15649 	/*
15650 	 * The madvise semantics require that the address range be fully
15651 	 * allocated with no holes.  Otherwise, we're required to return
15652 	 * an error.
15653 	 */
15654 
15655 	if (!vm_map_range_check(map, start, end, &entry)) {
15656 		vm_map_unlock_read(map);
15657 		return KERN_INVALID_ADDRESS;
15658 	}
15659 
15660 	/*
15661 	 * Examine each vm_map_entry_t in the range.
15662 	 */
15663 	for (; entry != vm_map_to_entry(map) && start < end;) {
15664 		/*
15665 		 * The first time through, the start address could be anywhere
15666 		 * within the vm_map_entry we found.  So adjust the offset to
15667 		 * correspond.  After that, the offset will always be zero to
15668 		 * correspond to the beginning of the current vm_map_entry.
15669 		 */
15670 		offset = (start - entry->vme_start) + VME_OFFSET(entry);
15671 
15672 		/*
15673 		 * Set the length so we don't go beyond the end of the
15674 		 * map_entry or beyond the end of the range we were given.
15675 		 * This range could span also multiple map entries all of which
15676 		 * map different files, so make sure we only do the right amount
15677 		 * of I/O for each object.  Note that it's possible for there
15678 		 * to be multiple map entries all referring to the same object
15679 		 * but with different page permissions, but it's not worth
15680 		 * trying to optimize that case.
15681 		 */
15682 		len = MIN(entry->vme_end - start, end - start);
15683 
15684 		if ((vm_size_t) len != len) {
15685 			/* 32-bit overflow */
15686 			len = (vm_size_t) (0 - PAGE_SIZE);
15687 		}
15688 		fault_info.cluster_size = (vm_size_t) len;
15689 		fault_info.lo_offset    = offset;
15690 		fault_info.hi_offset    = offset + len;
15691 		fault_info.user_tag     = VME_ALIAS(entry);
15692 		fault_info.pmap_options = 0;
15693 		if (entry->iokit_acct ||
15694 		    (!entry->is_sub_map && !entry->use_pmap)) {
15695 			fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
15696 		}
15697 
15698 		/*
15699 		 * If the entry is a submap OR there's no read permission
15700 		 * to this mapping, then just skip it.
15701 		 */
15702 		if ((entry->is_sub_map) || (entry->protection & VM_PROT_READ) == 0) {
15703 			entry = entry->vme_next;
15704 			start = entry->vme_start;
15705 			continue;
15706 		}
15707 
15708 		object = VME_OBJECT(entry);
15709 
15710 		if (object == NULL ||
15711 		    (object && object->internal)) {
15712 			/*
15713 			 * Memory range backed by anonymous memory.
15714 			 */
15715 			vm_size_t region_size = 0, effective_page_size = 0;
15716 			vm_map_offset_t addr = 0, effective_page_mask = 0;
15717 
15718 			region_size = len;
15719 			addr = start;
15720 
15721 			effective_page_mask = MIN(vm_map_page_mask(current_map()), PAGE_MASK);
15722 			effective_page_size = effective_page_mask + 1;
15723 
15724 			vm_map_unlock_read(map);
15725 
15726 			while (region_size) {
15727 				vm_pre_fault(
15728 					vm_map_trunc_page(addr, effective_page_mask),
15729 					VM_PROT_READ | VM_PROT_WRITE);
15730 
15731 				region_size -= effective_page_size;
15732 				addr += effective_page_size;
15733 			}
15734 		} else {
15735 			/*
15736 			 * Find the file object backing this map entry.  If there is
15737 			 * none, then we simply ignore the "will need" advice for this
15738 			 * entry and go on to the next one.
15739 			 */
15740 			if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
15741 				entry = entry->vme_next;
15742 				start = entry->vme_start;
15743 				continue;
15744 			}
15745 
15746 			vm_object_paging_begin(object);
15747 			pager = object->pager;
15748 			vm_object_unlock(object);
15749 
15750 			/*
15751 			 * The data_request() could take a long time, so let's
15752 			 * release the map lock to avoid blocking other threads.
15753 			 */
15754 			vm_map_unlock_read(map);
15755 
15756 			/*
15757 			 * Get the data from the object asynchronously.
15758 			 *
15759 			 * Note that memory_object_data_request() places limits on the
15760 			 * amount of I/O it will do.  Regardless of the len we
15761 			 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
15762 			 * silently truncates the len to that size.  This isn't
15763 			 * necessarily bad since madvise shouldn't really be used to
15764 			 * page in unlimited amounts of data.  Other Unix variants
15765 			 * limit the willneed case as well.  If this turns out to be an
15766 			 * issue for developers, then we can always adjust the policy
15767 			 * here and still be backwards compatible since this is all
15768 			 * just "advice".
15769 			 */
15770 			kr = memory_object_data_request(
15771 				pager,
15772 				vm_object_trunc_page(offset) + object->paging_offset,
15773 				0,      /* ignored */
15774 				VM_PROT_READ,
15775 				(memory_object_fault_info_t)&fault_info);
15776 
15777 			vm_object_lock(object);
15778 			vm_object_paging_end(object);
15779 			vm_object_unlock(object);
15780 
15781 			/*
15782 			 * If we couldn't do the I/O for some reason, just give up on
15783 			 * the madvise.  We still return success to the user since
15784 			 * madvise isn't supposed to fail when the advice can't be
15785 			 * taken.
15786 			 */
15787 
15788 			if (kr != KERN_SUCCESS) {
15789 				return KERN_SUCCESS;
15790 			}
15791 		}
15792 
15793 		start += len;
15794 		if (start >= end) {
15795 			/* done */
15796 			return KERN_SUCCESS;
15797 		}
15798 
15799 		/* look up next entry */
15800 		vm_map_lock_read(map);
15801 		if (!vm_map_lookup_entry(map, start, &entry)) {
15802 			/*
15803 			 * There's a new hole in the address range.
15804 			 */
15805 			vm_map_unlock_read(map);
15806 			return KERN_INVALID_ADDRESS;
15807 		}
15808 	}
15809 
15810 	vm_map_unlock_read(map);
15811 	return KERN_SUCCESS;
15812 }
15813 
15814 static boolean_t
vm_map_entry_is_reusable(vm_map_entry_t entry)15815 vm_map_entry_is_reusable(
15816 	vm_map_entry_t entry)
15817 {
15818 	/* Only user map entries */
15819 
15820 	vm_object_t object;
15821 
15822 	if (entry->is_sub_map) {
15823 		return FALSE;
15824 	}
15825 
15826 	switch (VME_ALIAS(entry)) {
15827 	case VM_MEMORY_MALLOC:
15828 	case VM_MEMORY_MALLOC_SMALL:
15829 	case VM_MEMORY_MALLOC_LARGE:
15830 	case VM_MEMORY_REALLOC:
15831 	case VM_MEMORY_MALLOC_TINY:
15832 	case VM_MEMORY_MALLOC_LARGE_REUSABLE:
15833 	case VM_MEMORY_MALLOC_LARGE_REUSED:
15834 		/*
15835 		 * This is a malloc() memory region: check if it's still
15836 		 * in its original state and can be re-used for more
15837 		 * malloc() allocations.
15838 		 */
15839 		break;
15840 	default:
15841 		/*
15842 		 * Not a malloc() memory region: let the caller decide if
15843 		 * it's re-usable.
15844 		 */
15845 		return TRUE;
15846 	}
15847 
15848 	if (/*entry->is_shared ||*/
15849 		entry->is_sub_map ||
15850 		entry->in_transition ||
15851 		entry->protection != VM_PROT_DEFAULT ||
15852 		entry->max_protection != VM_PROT_ALL ||
15853 		entry->inheritance != VM_INHERIT_DEFAULT ||
15854 		entry->no_cache ||
15855 		entry->permanent ||
15856 		entry->superpage_size != FALSE ||
15857 		entry->zero_wired_pages ||
15858 		entry->wired_count != 0 ||
15859 		entry->user_wired_count != 0) {
15860 		return FALSE;
15861 	}
15862 
15863 	object = VME_OBJECT(entry);
15864 	if (object == VM_OBJECT_NULL) {
15865 		return TRUE;
15866 	}
15867 	if (
15868 #if 0
15869 		/*
15870 		 * Let's proceed even if the VM object is potentially
15871 		 * shared.
15872 		 * We check for this later when processing the actual
15873 		 * VM pages, so the contents will be safe if shared.
15874 		 *
15875 		 * But we can still mark this memory region as "reusable" to
15876 		 * acknowledge that the caller did let us know that the memory
15877 		 * could be re-used and should not be penalized for holding
15878 		 * on to it.  This allows its "resident size" to not include
15879 		 * the reusable range.
15880 		 */
15881 		object->ref_count == 1 &&
15882 #endif
15883 		object->wired_page_count == 0 &&
15884 		object->copy == VM_OBJECT_NULL &&
15885 		object->shadow == VM_OBJECT_NULL &&
15886 		object->internal &&
15887 		object->purgable == VM_PURGABLE_DENY &&
15888 		object->wimg_bits == VM_WIMG_USE_DEFAULT &&
15889 		!object->code_signed) {
15890 		return TRUE;
15891 	}
15892 	return FALSE;
15893 }
15894 
15895 static kern_return_t
vm_map_reuse_pages(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)15896 vm_map_reuse_pages(
15897 	vm_map_t        map,
15898 	vm_map_offset_t start,
15899 	vm_map_offset_t end)
15900 {
15901 	vm_map_entry_t                  entry;
15902 	vm_object_t                     object;
15903 	vm_object_offset_t              start_offset, end_offset;
15904 
15905 	/*
15906 	 * The MADV_REUSE operation doesn't require any changes to the
15907 	 * vm_map_entry_t's, so the read lock is sufficient.
15908 	 */
15909 
15910 	if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
15911 		/*
15912 		 * XXX TODO4K
15913 		 * need to figure out what reusable means for a
15914 		 * portion of a native page.
15915 		 */
15916 		return KERN_SUCCESS;
15917 	}
15918 
15919 	vm_map_lock_read(map);
15920 	assert(map->pmap != kernel_pmap);       /* protect alias access */
15921 
15922 	/*
15923 	 * The madvise semantics require that the address range be fully
15924 	 * allocated with no holes.  Otherwise, we're required to return
15925 	 * an error.
15926 	 */
15927 
15928 	if (!vm_map_range_check(map, start, end, &entry)) {
15929 		vm_map_unlock_read(map);
15930 		vm_page_stats_reusable.reuse_pages_failure++;
15931 		return KERN_INVALID_ADDRESS;
15932 	}
15933 
15934 	/*
15935 	 * Examine each vm_map_entry_t in the range.
15936 	 */
15937 	for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15938 	    entry = entry->vme_next) {
15939 		/*
15940 		 * Sanity check on the VM map entry.
15941 		 */
15942 		if (!vm_map_entry_is_reusable(entry)) {
15943 			vm_map_unlock_read(map);
15944 			vm_page_stats_reusable.reuse_pages_failure++;
15945 			return KERN_INVALID_ADDRESS;
15946 		}
15947 
15948 		/*
15949 		 * The first time through, the start address could be anywhere
15950 		 * within the vm_map_entry we found.  So adjust the offset to
15951 		 * correspond.
15952 		 */
15953 		if (entry->vme_start < start) {
15954 			start_offset = start - entry->vme_start;
15955 		} else {
15956 			start_offset = 0;
15957 		}
15958 		end_offset = MIN(end, entry->vme_end) - entry->vme_start;
15959 		start_offset += VME_OFFSET(entry);
15960 		end_offset += VME_OFFSET(entry);
15961 
15962 		assert(!entry->is_sub_map);
15963 		object = VME_OBJECT(entry);
15964 		if (object != VM_OBJECT_NULL) {
15965 			vm_object_lock(object);
15966 			vm_object_reuse_pages(object, start_offset, end_offset,
15967 			    TRUE);
15968 			vm_object_unlock(object);
15969 		}
15970 
15971 		if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
15972 			/*
15973 			 * XXX
15974 			 * We do not hold the VM map exclusively here.
15975 			 * The "alias" field is not that critical, so it's
15976 			 * safe to update it here, as long as it is the only
15977 			 * one that can be modified while holding the VM map
15978 			 * "shared".
15979 			 */
15980 			VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
15981 		}
15982 	}
15983 
15984 	vm_map_unlock_read(map);
15985 	vm_page_stats_reusable.reuse_pages_success++;
15986 	return KERN_SUCCESS;
15987 }
15988 
15989 
15990 static kern_return_t
vm_map_reusable_pages(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)15991 vm_map_reusable_pages(
15992 	vm_map_t        map,
15993 	vm_map_offset_t start,
15994 	vm_map_offset_t end)
15995 {
15996 	vm_map_entry_t                  entry;
15997 	vm_object_t                     object;
15998 	vm_object_offset_t              start_offset, end_offset;
15999 	vm_map_offset_t                 pmap_offset;
16000 
16001 	if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
16002 		/*
16003 		 * XXX TODO4K
16004 		 * need to figure out what reusable means for a portion
16005 		 * of a native page.
16006 		 */
16007 		return KERN_SUCCESS;
16008 	}
16009 
16010 	/*
16011 	 * The MADV_REUSABLE operation doesn't require any changes to the
16012 	 * vm_map_entry_t's, so the read lock is sufficient.
16013 	 */
16014 
16015 	vm_map_lock_read(map);
16016 	assert(map->pmap != kernel_pmap);       /* protect alias access */
16017 
16018 	/*
16019 	 * The madvise semantics require that the address range be fully
16020 	 * allocated with no holes.  Otherwise, we're required to return
16021 	 * an error.
16022 	 */
16023 
16024 	if (!vm_map_range_check(map, start, end, &entry)) {
16025 		vm_map_unlock_read(map);
16026 		vm_page_stats_reusable.reusable_pages_failure++;
16027 		return KERN_INVALID_ADDRESS;
16028 	}
16029 
16030 	/*
16031 	 * Examine each vm_map_entry_t in the range.
16032 	 */
16033 	for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16034 	    entry = entry->vme_next) {
16035 		int kill_pages = 0;
16036 
16037 		/*
16038 		 * Sanity check on the VM map entry.
16039 		 */
16040 		if (!vm_map_entry_is_reusable(entry)) {
16041 			vm_map_unlock_read(map);
16042 			vm_page_stats_reusable.reusable_pages_failure++;
16043 			return KERN_INVALID_ADDRESS;
16044 		}
16045 
16046 		if (!(entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
16047 			/* not writable: can't discard contents */
16048 			vm_map_unlock_read(map);
16049 			vm_page_stats_reusable.reusable_nonwritable++;
16050 			vm_page_stats_reusable.reusable_pages_failure++;
16051 			return KERN_PROTECTION_FAILURE;
16052 		}
16053 
16054 		/*
16055 		 * The first time through, the start address could be anywhere
16056 		 * within the vm_map_entry we found.  So adjust the offset to
16057 		 * correspond.
16058 		 */
16059 		if (entry->vme_start < start) {
16060 			start_offset = start - entry->vme_start;
16061 			pmap_offset = start;
16062 		} else {
16063 			start_offset = 0;
16064 			pmap_offset = entry->vme_start;
16065 		}
16066 		end_offset = MIN(end, entry->vme_end) - entry->vme_start;
16067 		start_offset += VME_OFFSET(entry);
16068 		end_offset += VME_OFFSET(entry);
16069 
16070 		assert(!entry->is_sub_map);
16071 		object = VME_OBJECT(entry);
16072 		if (object == VM_OBJECT_NULL) {
16073 			continue;
16074 		}
16075 
16076 
16077 		vm_object_lock(object);
16078 		if (((object->ref_count == 1) ||
16079 		    (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
16080 		    object->copy == VM_OBJECT_NULL)) &&
16081 		    object->shadow == VM_OBJECT_NULL &&
16082 		    /*
16083 		     * "iokit_acct" entries are billed for their virtual size
16084 		     * (rather than for their resident pages only), so they
16085 		     * wouldn't benefit from making pages reusable, and it
16086 		     * would be hard to keep track of pages that are both
16087 		     * "iokit_acct" and "reusable" in the pmap stats and
16088 		     * ledgers.
16089 		     */
16090 		    !(entry->iokit_acct ||
16091 		    (!entry->is_sub_map && !entry->use_pmap))) {
16092 			if (object->ref_count != 1) {
16093 				vm_page_stats_reusable.reusable_shared++;
16094 			}
16095 			kill_pages = 1;
16096 		} else {
16097 			kill_pages = -1;
16098 		}
16099 		if (kill_pages != -1) {
16100 			vm_object_deactivate_pages(object,
16101 			    start_offset,
16102 			    end_offset - start_offset,
16103 			    kill_pages,
16104 			    TRUE /*reusable_pages*/,
16105 			    map->pmap,
16106 			    pmap_offset);
16107 		} else {
16108 			vm_page_stats_reusable.reusable_pages_shared++;
16109 			DTRACE_VM4(vm_map_reusable_pages_shared,
16110 			    unsigned int, VME_ALIAS(entry),
16111 			    vm_map_t, map,
16112 			    vm_map_entry_t, entry,
16113 			    vm_object_t, object);
16114 		}
16115 		vm_object_unlock(object);
16116 
16117 		if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
16118 		    VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
16119 			/*
16120 			 * XXX
16121 			 * We do not hold the VM map exclusively here.
16122 			 * The "alias" field is not that critical, so it's
16123 			 * safe to update it here, as long as it is the only
16124 			 * one that can be modified while holding the VM map
16125 			 * "shared".
16126 			 */
16127 			VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
16128 		}
16129 	}
16130 
16131 	vm_map_unlock_read(map);
16132 	vm_page_stats_reusable.reusable_pages_success++;
16133 	return KERN_SUCCESS;
16134 }
16135 
16136 
16137 static kern_return_t
vm_map_can_reuse(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)16138 vm_map_can_reuse(
16139 	vm_map_t        map,
16140 	vm_map_offset_t start,
16141 	vm_map_offset_t end)
16142 {
16143 	vm_map_entry_t                  entry;
16144 
16145 	/*
16146 	 * The MADV_REUSABLE operation doesn't require any changes to the
16147 	 * vm_map_entry_t's, so the read lock is sufficient.
16148 	 */
16149 
16150 	vm_map_lock_read(map);
16151 	assert(map->pmap != kernel_pmap);       /* protect alias access */
16152 
16153 	/*
16154 	 * The madvise semantics require that the address range be fully
16155 	 * allocated with no holes.  Otherwise, we're required to return
16156 	 * an error.
16157 	 */
16158 
16159 	if (!vm_map_range_check(map, start, end, &entry)) {
16160 		vm_map_unlock_read(map);
16161 		vm_page_stats_reusable.can_reuse_failure++;
16162 		return KERN_INVALID_ADDRESS;
16163 	}
16164 
16165 	/*
16166 	 * Examine each vm_map_entry_t in the range.
16167 	 */
16168 	for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16169 	    entry = entry->vme_next) {
16170 		/*
16171 		 * Sanity check on the VM map entry.
16172 		 */
16173 		if (!vm_map_entry_is_reusable(entry)) {
16174 			vm_map_unlock_read(map);
16175 			vm_page_stats_reusable.can_reuse_failure++;
16176 			return KERN_INVALID_ADDRESS;
16177 		}
16178 	}
16179 
16180 	vm_map_unlock_read(map);
16181 	vm_page_stats_reusable.can_reuse_success++;
16182 	return KERN_SUCCESS;
16183 }
16184 
16185 
16186 #if MACH_ASSERT
16187 static kern_return_t
vm_map_pageout(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)16188 vm_map_pageout(
16189 	vm_map_t        map,
16190 	vm_map_offset_t start,
16191 	vm_map_offset_t end)
16192 {
16193 	vm_map_entry_t                  entry;
16194 
16195 	/*
16196 	 * The MADV_PAGEOUT operation doesn't require any changes to the
16197 	 * vm_map_entry_t's, so the read lock is sufficient.
16198 	 */
16199 
16200 	vm_map_lock_read(map);
16201 
16202 	/*
16203 	 * The madvise semantics require that the address range be fully
16204 	 * allocated with no holes.  Otherwise, we're required to return
16205 	 * an error.
16206 	 */
16207 
16208 	if (!vm_map_range_check(map, start, end, &entry)) {
16209 		vm_map_unlock_read(map);
16210 		return KERN_INVALID_ADDRESS;
16211 	}
16212 
16213 	/*
16214 	 * Examine each vm_map_entry_t in the range.
16215 	 */
16216 	for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16217 	    entry = entry->vme_next) {
16218 		vm_object_t     object;
16219 
16220 		/*
16221 		 * Sanity check on the VM map entry.
16222 		 */
16223 		if (entry->is_sub_map) {
16224 			vm_map_t submap;
16225 			vm_map_offset_t submap_start;
16226 			vm_map_offset_t submap_end;
16227 			vm_map_entry_t submap_entry;
16228 
16229 			submap = VME_SUBMAP(entry);
16230 			submap_start = VME_OFFSET(entry);
16231 			submap_end = submap_start + (entry->vme_end -
16232 			    entry->vme_start);
16233 
16234 			vm_map_lock_read(submap);
16235 
16236 			if (!vm_map_range_check(submap,
16237 			    submap_start,
16238 			    submap_end,
16239 			    &submap_entry)) {
16240 				vm_map_unlock_read(submap);
16241 				vm_map_unlock_read(map);
16242 				return KERN_INVALID_ADDRESS;
16243 			}
16244 
16245 			object = VME_OBJECT(submap_entry);
16246 			if (submap_entry->is_sub_map ||
16247 			    object == VM_OBJECT_NULL ||
16248 			    !object->internal) {
16249 				vm_map_unlock_read(submap);
16250 				continue;
16251 			}
16252 
16253 			vm_object_pageout(object);
16254 
16255 			vm_map_unlock_read(submap);
16256 			submap = VM_MAP_NULL;
16257 			submap_entry = VM_MAP_ENTRY_NULL;
16258 			continue;
16259 		}
16260 
16261 		object = VME_OBJECT(entry);
16262 		if (entry->is_sub_map ||
16263 		    object == VM_OBJECT_NULL ||
16264 		    !object->internal) {
16265 			continue;
16266 		}
16267 
16268 		vm_object_pageout(object);
16269 	}
16270 
16271 	vm_map_unlock_read(map);
16272 	return KERN_SUCCESS;
16273 }
16274 #endif /* MACH_ASSERT */
16275 
16276 
16277 /*
16278  *	Routine:	vm_map_entry_insert
16279  *
16280  *	Description:	This routine inserts a new vm_entry in a locked map.
16281  */
16282 vm_map_entry_t
vm_map_entry_insert(vm_map_t map,vm_map_entry_t insp_entry,vm_map_offset_t start,vm_map_offset_t end,vm_object_t object,vm_object_offset_t offset,vm_map_kernel_flags_t vmk_flags,boolean_t needs_copy,boolean_t is_shared,boolean_t in_transition,vm_prot_t cur_protection,vm_prot_t max_protection,vm_behavior_t behavior,vm_inherit_t inheritance,unsigned short wired_count,boolean_t no_cache,boolean_t permanent,boolean_t no_copy_on_read,unsigned int superpage_size,boolean_t clear_map_aligned,boolean_t is_submap,boolean_t used_for_jit,int alias,boolean_t translated_allow_execute)16283 vm_map_entry_insert(
16284 	vm_map_t                map,
16285 	vm_map_entry_t          insp_entry,
16286 	vm_map_offset_t         start,
16287 	vm_map_offset_t         end,
16288 	vm_object_t             object,
16289 	vm_object_offset_t      offset,
16290 	vm_map_kernel_flags_t   vmk_flags,
16291 	boolean_t               needs_copy,
16292 	boolean_t               is_shared,
16293 	boolean_t               in_transition,
16294 	vm_prot_t               cur_protection,
16295 	vm_prot_t               max_protection,
16296 	vm_behavior_t           behavior,
16297 	vm_inherit_t            inheritance,
16298 	unsigned short          wired_count,
16299 	boolean_t               no_cache,
16300 	boolean_t               permanent,
16301 	boolean_t               no_copy_on_read,
16302 	unsigned int            superpage_size,
16303 	boolean_t               clear_map_aligned,
16304 	boolean_t               is_submap,
16305 	boolean_t               used_for_jit,
16306 	int                     alias,
16307 	boolean_t               translated_allow_execute)
16308 {
16309 	vm_map_entry_t  new_entry;
16310 
16311 	assert(insp_entry != (vm_map_entry_t)0);
16312 	vm_map_lock_assert_exclusive(map);
16313 
16314 #if DEVELOPMENT || DEBUG
16315 	vm_object_offset_t      end_offset = 0;
16316 	assertf(!os_add_overflow(end - start, offset, &end_offset), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end - start), offset);
16317 #endif /* DEVELOPMENT || DEBUG */
16318 
16319 	new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
16320 
16321 	if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
16322 		new_entry->map_aligned = TRUE;
16323 	} else {
16324 		new_entry->map_aligned = FALSE;
16325 	}
16326 	if (clear_map_aligned &&
16327 	    (!VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
16328 	    !VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
16329 		new_entry->map_aligned = FALSE;
16330 	}
16331 
16332 	new_entry->vme_start = start;
16333 	new_entry->vme_end = end;
16334 	if (new_entry->map_aligned) {
16335 		assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
16336 		    VM_MAP_PAGE_MASK(map)));
16337 		assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
16338 		    VM_MAP_PAGE_MASK(map)));
16339 	} else {
16340 		assert(page_aligned(new_entry->vme_start));
16341 		assert(page_aligned(new_entry->vme_end));
16342 	}
16343 	assert(new_entry->vme_start < new_entry->vme_end);
16344 
16345 	VME_OBJECT_SET(new_entry, object);
16346 	VME_OFFSET_SET(new_entry, offset);
16347 	new_entry->is_shared = is_shared;
16348 	new_entry->is_sub_map = is_submap;
16349 	new_entry->needs_copy = needs_copy;
16350 	new_entry->in_transition = in_transition;
16351 	new_entry->needs_wakeup = FALSE;
16352 	new_entry->inheritance = inheritance;
16353 	new_entry->protection = cur_protection;
16354 	new_entry->max_protection = max_protection;
16355 	new_entry->behavior = behavior;
16356 	new_entry->wired_count = wired_count;
16357 	new_entry->user_wired_count = 0;
16358 	if (is_submap) {
16359 		/*
16360 		 * submap: "use_pmap" means "nested".
16361 		 * default: false.
16362 		 */
16363 		new_entry->use_pmap = FALSE;
16364 	} else {
16365 		/*
16366 		 * object: "use_pmap" means "use pmap accounting" for footprint.
16367 		 * default: true.
16368 		 */
16369 		new_entry->use_pmap = TRUE;
16370 	}
16371 	VME_ALIAS_SET(new_entry, alias);
16372 	new_entry->zero_wired_pages = FALSE;
16373 	new_entry->no_cache = no_cache;
16374 	new_entry->permanent = permanent;
16375 	if (superpage_size) {
16376 		new_entry->superpage_size = TRUE;
16377 	} else {
16378 		new_entry->superpage_size = FALSE;
16379 	}
16380 	if (used_for_jit) {
16381 		if (!(map->jit_entry_exists) ||
16382 		    VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map)) {
16383 			new_entry->used_for_jit = TRUE;
16384 			map->jit_entry_exists = TRUE;
16385 		}
16386 	} else {
16387 		new_entry->used_for_jit = FALSE;
16388 	}
16389 	if (translated_allow_execute) {
16390 		new_entry->translated_allow_execute = TRUE;
16391 	} else {
16392 		new_entry->translated_allow_execute = FALSE;
16393 	}
16394 	new_entry->pmap_cs_associated = FALSE;
16395 	new_entry->iokit_acct = FALSE;
16396 	new_entry->vme_resilient_codesign = FALSE;
16397 	new_entry->vme_resilient_media = FALSE;
16398 	new_entry->vme_atomic = FALSE;
16399 	new_entry->vme_no_copy_on_read = no_copy_on_read;
16400 
16401 	/*
16402 	 *	Insert the new entry into the list.
16403 	 */
16404 
16405 	vm_map_store_entry_link(map, insp_entry, new_entry, vmk_flags);
16406 	map->size += end - start;
16407 
16408 	/*
16409 	 *	Update the free space hint and the lookup hint.
16410 	 */
16411 
16412 	SAVE_HINT_MAP_WRITE(map, new_entry);
16413 	return new_entry;
16414 }
16415 
16416 /*
16417  *	Routine:	vm_map_remap_extract
16418  *
16419  *	Description:	This routine returns a vm_entry list from a map.
16420  */
16421 static kern_return_t
vm_map_remap_extract(vm_map_t map,vm_map_offset_t addr,vm_map_size_t size,boolean_t copy,struct vm_map_header * map_header,vm_prot_t * cur_protection,vm_prot_t * max_protection,vm_inherit_t inheritance,vm_map_kernel_flags_t vmk_flags)16422 vm_map_remap_extract(
16423 	vm_map_t                map,
16424 	vm_map_offset_t         addr,
16425 	vm_map_size_t           size,
16426 	boolean_t               copy,
16427 	struct vm_map_header    *map_header,
16428 	vm_prot_t               *cur_protection,   /* IN/OUT */
16429 	vm_prot_t               *max_protection,   /* IN/OUT */
16430 	/* What, no behavior? */
16431 	vm_inherit_t            inheritance,
16432 	vm_map_kernel_flags_t   vmk_flags)
16433 {
16434 	kern_return_t           result;
16435 	vm_map_size_t           mapped_size;
16436 	vm_map_size_t           tmp_size;
16437 	vm_map_entry_t          src_entry;     /* result of last map lookup */
16438 	vm_map_entry_t          new_entry;
16439 	vm_object_offset_t      offset;
16440 	vm_map_offset_t         map_address;
16441 	vm_map_offset_t         src_start;     /* start of entry to map */
16442 	vm_map_offset_t         src_end;       /* end of region to be mapped */
16443 	vm_object_t             object;
16444 	vm_map_version_t        version;
16445 	boolean_t               src_needs_copy;
16446 	boolean_t               new_entry_needs_copy;
16447 	vm_map_entry_t          saved_src_entry;
16448 	boolean_t               src_entry_was_wired;
16449 	vm_prot_t               max_prot_for_prot_copy;
16450 	vm_map_offset_t         effective_page_mask;
16451 	boolean_t               pageable, same_map;
16452 	boolean_t               vm_remap_legacy;
16453 	vm_prot_t               required_cur_prot, required_max_prot;
16454 
16455 	pageable = vmk_flags.vmkf_copy_pageable;
16456 	same_map = vmk_flags.vmkf_copy_same_map;
16457 
16458 	effective_page_mask = MIN(PAGE_MASK, VM_MAP_PAGE_MASK(map));
16459 
16460 	assert(map != VM_MAP_NULL);
16461 	assert(size != 0);
16462 	assert(size == vm_map_round_page(size, effective_page_mask));
16463 	assert(inheritance == VM_INHERIT_NONE ||
16464 	    inheritance == VM_INHERIT_COPY ||
16465 	    inheritance == VM_INHERIT_SHARE);
16466 	assert(!(*cur_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)));
16467 	assert(!(*max_protection & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)));
16468 	assert((*cur_protection & *max_protection) == *cur_protection);
16469 
16470 	/*
16471 	 *	Compute start and end of region.
16472 	 */
16473 	src_start = vm_map_trunc_page(addr, effective_page_mask);
16474 	src_end = vm_map_round_page(src_start + size, effective_page_mask);
16475 
16476 	/*
16477 	 *	Initialize map_header.
16478 	 */
16479 	map_header->links.next = CAST_TO_VM_MAP_ENTRY(&map_header->links);
16480 	map_header->links.prev = CAST_TO_VM_MAP_ENTRY(&map_header->links);
16481 	map_header->nentries = 0;
16482 	map_header->entries_pageable = pageable;
16483 //	map_header->page_shift = MIN(VM_MAP_PAGE_SHIFT(map), PAGE_SHIFT);
16484 	map_header->page_shift = VM_MAP_PAGE_SHIFT(map);
16485 	map_header->rb_head_store.rbh_root = (void *)(int)SKIP_RB_TREE;
16486 
16487 	vm_map_store_init( map_header );
16488 
16489 	if (copy && vmk_flags.vmkf_remap_prot_copy) {
16490 		/*
16491 		 * Special case for vm_map_protect(VM_PROT_COPY):
16492 		 * we want to set the new mappings' max protection to the
16493 		 * specified *max_protection...
16494 		 */
16495 		max_prot_for_prot_copy = *max_protection & (VM_PROT_ALL | VM_PROT_ALLEXEC);
16496 		/* ... but we want to use the vm_remap() legacy mode */
16497 		*max_protection = VM_PROT_NONE;
16498 		*cur_protection = VM_PROT_NONE;
16499 	} else {
16500 		max_prot_for_prot_copy = VM_PROT_NONE;
16501 	}
16502 
16503 	if (*cur_protection == VM_PROT_NONE &&
16504 	    *max_protection == VM_PROT_NONE) {
16505 		/*
16506 		 * vm_remap() legacy mode:
16507 		 * Extract all memory regions in the specified range and
16508 		 * collect the strictest set of protections allowed on the
16509 		 * entire range, so the caller knows what they can do with
16510 		 * the remapped range.
16511 		 * We start with VM_PROT_ALL and we'll remove the protections
16512 		 * missing from each memory region.
16513 		 */
16514 		vm_remap_legacy = TRUE;
16515 		*cur_protection = VM_PROT_ALL;
16516 		*max_protection = VM_PROT_ALL;
16517 		required_cur_prot = VM_PROT_NONE;
16518 		required_max_prot = VM_PROT_NONE;
16519 	} else {
16520 		/*
16521 		 * vm_remap_new() mode:
16522 		 * Extract all memory regions in the specified range and
16523 		 * ensure that they have at least the protections specified
16524 		 * by the caller via *cur_protection and *max_protection.
16525 		 * The resulting mapping should have these protections.
16526 		 */
16527 		vm_remap_legacy = FALSE;
16528 		if (copy) {
16529 			required_cur_prot = VM_PROT_NONE;
16530 			required_max_prot = VM_PROT_READ;
16531 		} else {
16532 			required_cur_prot = *cur_protection;
16533 			required_max_prot = *max_protection;
16534 		}
16535 	}
16536 
16537 	map_address = 0;
16538 	mapped_size = 0;
16539 	result = KERN_SUCCESS;
16540 
16541 	/*
16542 	 *	The specified source virtual space might correspond to
16543 	 *	multiple map entries, need to loop on them.
16544 	 */
16545 	vm_map_lock(map);
16546 	if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
16547 		/*
16548 		 * This address space uses sub-pages so the range might
16549 		 * not be re-mappable in an address space with larger
16550 		 * pages. Re-assemble any broken-up VM map entries to
16551 		 * improve our chances of making it work.
16552 		 */
16553 		vm_map_simplify_range(map, src_start, src_end);
16554 	}
16555 	while (mapped_size != size) {
16556 		vm_map_size_t   entry_size;
16557 
16558 		/*
16559 		 *	Find the beginning of the region.
16560 		 */
16561 		if (!vm_map_lookup_entry(map, src_start, &src_entry)) {
16562 			result = KERN_INVALID_ADDRESS;
16563 			break;
16564 		}
16565 
16566 		if (src_start < src_entry->vme_start ||
16567 		    (mapped_size && src_start != src_entry->vme_start)) {
16568 			result = KERN_INVALID_ADDRESS;
16569 			break;
16570 		}
16571 
16572 		tmp_size = size - mapped_size;
16573 		if (src_end > src_entry->vme_end) {
16574 			tmp_size -= (src_end - src_entry->vme_end);
16575 		}
16576 
16577 		entry_size = (vm_map_size_t)(src_entry->vme_end -
16578 		    src_entry->vme_start);
16579 
16580 		if (src_entry->is_sub_map &&
16581 		    vmk_flags.vmkf_copy_single_object) {
16582 			vm_map_t submap;
16583 			vm_map_offset_t submap_start;
16584 			vm_map_size_t submap_size;
16585 			boolean_t submap_needs_copy;
16586 
16587 			/*
16588 			 * No check for "required protection" on "src_entry"
16589 			 * because the protections that matter are the ones
16590 			 * on the submap's VM map entry, which will be checked
16591 			 * during the call to vm_map_remap_extract() below.
16592 			 */
16593 			submap_size = src_entry->vme_end - src_start;
16594 			if (submap_size > size) {
16595 				submap_size = size;
16596 			}
16597 			submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
16598 			submap = VME_SUBMAP(src_entry);
16599 			if (copy) {
16600 				/*
16601 				 * The caller wants a copy-on-write re-mapping,
16602 				 * so let's extract from the submap accordingly.
16603 				 */
16604 				submap_needs_copy = TRUE;
16605 			} else if (src_entry->needs_copy) {
16606 				/*
16607 				 * The caller wants a shared re-mapping but the
16608 				 * submap is mapped with "needs_copy", so its
16609 				 * contents can't be shared as is. Extract the
16610 				 * contents of the submap as "copy-on-write".
16611 				 * The re-mapping won't be shared with the
16612 				 * original mapping but this is equivalent to
16613 				 * what happened with the original "remap from
16614 				 * submap" code.
16615 				 * The shared region is mapped "needs_copy", for
16616 				 * example.
16617 				 */
16618 				submap_needs_copy = TRUE;
16619 			} else {
16620 				/*
16621 				 * The caller wants a shared re-mapping and
16622 				 * this mapping can be shared (no "needs_copy"),
16623 				 * so let's extract from the submap accordingly.
16624 				 * Kernel submaps are mapped without
16625 				 * "needs_copy", for example.
16626 				 */
16627 				submap_needs_copy = FALSE;
16628 			}
16629 			vm_map_reference(submap);
16630 			vm_map_unlock(map);
16631 			src_entry = NULL;
16632 			if (vm_remap_legacy) {
16633 				*cur_protection = VM_PROT_NONE;
16634 				*max_protection = VM_PROT_NONE;
16635 			}
16636 
16637 			DTRACE_VM7(remap_submap_recurse,
16638 			    vm_map_t, map,
16639 			    vm_map_offset_t, addr,
16640 			    vm_map_size_t, size,
16641 			    boolean_t, copy,
16642 			    vm_map_offset_t, submap_start,
16643 			    vm_map_size_t, submap_size,
16644 			    boolean_t, submap_needs_copy);
16645 
16646 			result = vm_map_remap_extract(submap,
16647 			    submap_start,
16648 			    submap_size,
16649 			    submap_needs_copy,
16650 			    map_header,
16651 			    cur_protection,
16652 			    max_protection,
16653 			    inheritance,
16654 			    vmk_flags);
16655 			vm_map_deallocate(submap);
16656 			return result;
16657 		}
16658 
16659 		if (src_entry->is_sub_map) {
16660 			/* protections for submap mapping are irrelevant here */
16661 		} else if (((src_entry->protection & required_cur_prot) !=
16662 		    required_cur_prot) ||
16663 		    ((src_entry->max_protection & required_max_prot) !=
16664 		    required_max_prot)) {
16665 			if (vmk_flags.vmkf_copy_single_object &&
16666 			    mapped_size != 0) {
16667 				/*
16668 				 * Single object extraction.
16669 				 * We can't extract more with the required
16670 				 * protection but we've extracted some, so
16671 				 * stop there and declare success.
16672 				 * The caller should check the size of
16673 				 * the copy entry we've extracted.
16674 				 */
16675 				result = KERN_SUCCESS;
16676 			} else {
16677 				/*
16678 				 * VM range extraction.
16679 				 * Required proctection is not available
16680 				 * for this part of the range: fail.
16681 				 */
16682 				result = KERN_PROTECTION_FAILURE;
16683 			}
16684 			break;
16685 		}
16686 
16687 		if (src_entry->is_sub_map) {
16688 			vm_map_t submap;
16689 			vm_map_offset_t submap_start;
16690 			vm_map_size_t submap_size;
16691 			vm_map_copy_t submap_copy;
16692 			vm_prot_t submap_curprot, submap_maxprot;
16693 			boolean_t submap_needs_copy;
16694 
16695 			/*
16696 			 * No check for "required protection" on "src_entry"
16697 			 * because the protections that matter are the ones
16698 			 * on the submap's VM map entry, which will be checked
16699 			 * during the call to vm_map_copy_extract() below.
16700 			 */
16701 			object = VM_OBJECT_NULL;
16702 			submap_copy = VM_MAP_COPY_NULL;
16703 
16704 			/* find equivalent range in the submap */
16705 			submap = VME_SUBMAP(src_entry);
16706 			submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
16707 			submap_size = tmp_size;
16708 			if (copy) {
16709 				/*
16710 				 * The caller wants a copy-on-write re-mapping,
16711 				 * so let's extract from the submap accordingly.
16712 				 */
16713 				submap_needs_copy = TRUE;
16714 			} else if (src_entry->needs_copy) {
16715 				/*
16716 				 * The caller wants a shared re-mapping but the
16717 				 * submap is mapped with "needs_copy", so its
16718 				 * contents can't be shared as is. Extract the
16719 				 * contents of the submap as "copy-on-write".
16720 				 * The re-mapping won't be shared with the
16721 				 * original mapping but this is equivalent to
16722 				 * what happened with the original "remap from
16723 				 * submap" code.
16724 				 * The shared region is mapped "needs_copy", for
16725 				 * example.
16726 				 */
16727 				submap_needs_copy = TRUE;
16728 			} else {
16729 				/*
16730 				 * The caller wants a shared re-mapping and
16731 				 * this mapping can be shared (no "needs_copy"),
16732 				 * so let's extract from the submap accordingly.
16733 				 * Kernel submaps are mapped without
16734 				 * "needs_copy", for example.
16735 				 */
16736 				submap_needs_copy = FALSE;
16737 			}
16738 			/* extra ref to keep submap alive */
16739 			vm_map_reference(submap);
16740 
16741 			DTRACE_VM7(remap_submap_recurse,
16742 			    vm_map_t, map,
16743 			    vm_map_offset_t, addr,
16744 			    vm_map_size_t, size,
16745 			    boolean_t, copy,
16746 			    vm_map_offset_t, submap_start,
16747 			    vm_map_size_t, submap_size,
16748 			    boolean_t, submap_needs_copy);
16749 
16750 			/*
16751 			 * The map can be safely unlocked since we
16752 			 * already hold a reference on the submap.
16753 			 *
16754 			 * No timestamp since we don't care if the map
16755 			 * gets modified while we're down in the submap.
16756 			 * We'll resume the extraction at src_start + tmp_size
16757 			 * anyway.
16758 			 */
16759 			vm_map_unlock(map);
16760 			src_entry = NULL; /* not valid once map is unlocked */
16761 
16762 			if (vm_remap_legacy) {
16763 				submap_curprot = VM_PROT_NONE;
16764 				submap_maxprot = VM_PROT_NONE;
16765 				if (max_prot_for_prot_copy) {
16766 					submap_maxprot = max_prot_for_prot_copy;
16767 				}
16768 			} else {
16769 				assert(!max_prot_for_prot_copy);
16770 				submap_curprot = *cur_protection;
16771 				submap_maxprot = *max_protection;
16772 			}
16773 			result = vm_map_copy_extract(submap,
16774 			    submap_start,
16775 			    submap_size,
16776 			    submap_needs_copy,
16777 			    &submap_copy,
16778 			    &submap_curprot,
16779 			    &submap_maxprot,
16780 			    inheritance,
16781 			    vmk_flags);
16782 
16783 			/* release extra ref on submap */
16784 			vm_map_deallocate(submap);
16785 			submap = VM_MAP_NULL;
16786 
16787 			if (result != KERN_SUCCESS) {
16788 				vm_map_lock(map);
16789 				break;
16790 			}
16791 
16792 			/* transfer submap_copy entries to map_header */
16793 			while (vm_map_copy_first_entry(submap_copy) !=
16794 			    vm_map_copy_to_entry(submap_copy)) {
16795 				vm_map_entry_t copy_entry;
16796 				vm_map_size_t copy_entry_size;
16797 
16798 				copy_entry = vm_map_copy_first_entry(submap_copy);
16799 				assert(!copy_entry->is_sub_map);
16800 				object = VME_OBJECT(copy_entry);
16801 
16802 				/*
16803 				 * Prevent kernel_object from being exposed to
16804 				 * user space.
16805 				 */
16806 				if (__improbable(object == kernel_object)) {
16807 					printf("%d[%s]: rejecting attempt to extract from kernel_object\n",
16808 					    proc_selfpid(),
16809 					    (current_task()->bsd_info
16810 					    ? proc_name_address(current_task()->bsd_info)
16811 					    : "?"));
16812 					DTRACE_VM(extract_kernel_only);
16813 					result = KERN_INVALID_RIGHT;
16814 					vm_map_copy_discard(submap_copy);
16815 					submap_copy = VM_MAP_COPY_NULL;
16816 					vm_map_lock(map);
16817 					break;
16818 				}
16819 
16820 				vm_map_copy_entry_unlink(submap_copy, copy_entry);
16821 				copy_entry_size = copy_entry->vme_end - copy_entry->vme_start;
16822 				copy_entry->vme_start = map_address;
16823 				copy_entry->vme_end = map_address + copy_entry_size;
16824 				map_address += copy_entry_size;
16825 				mapped_size += copy_entry_size;
16826 				src_start += copy_entry_size;
16827 				assert(src_start <= src_end);
16828 				_vm_map_store_entry_link(map_header,
16829 				    map_header->links.prev,
16830 				    copy_entry);
16831 			}
16832 			/* done with submap_copy */
16833 			vm_map_copy_discard(submap_copy);
16834 
16835 			if (vm_remap_legacy) {
16836 				*cur_protection &= submap_curprot;
16837 				*max_protection &= submap_maxprot;
16838 			}
16839 
16840 			/* re-acquire the map lock and continue to next entry */
16841 			vm_map_lock(map);
16842 			continue;
16843 		} else {
16844 			object = VME_OBJECT(src_entry);
16845 
16846 			/*
16847 			 * Prevent kernel_object from being exposed to
16848 			 * user space.
16849 			 */
16850 			if (__improbable(object == kernel_object)) {
16851 				printf("%d[%s]: rejecting attempt to extract from kernel_object\n",
16852 				    proc_selfpid(),
16853 				    (current_task()->bsd_info
16854 				    ? proc_name_address(current_task()->bsd_info)
16855 				    : "?"));
16856 				DTRACE_VM(extract_kernel_only);
16857 				result = KERN_INVALID_RIGHT;
16858 				break;
16859 			}
16860 
16861 			if (src_entry->iokit_acct) {
16862 				/*
16863 				 * This entry uses "IOKit accounting".
16864 				 */
16865 			} else if (object != VM_OBJECT_NULL &&
16866 			    (object->purgable != VM_PURGABLE_DENY ||
16867 			    object->vo_ledger_tag != VM_LEDGER_TAG_NONE)) {
16868 				/*
16869 				 * Purgeable objects have their own accounting:
16870 				 * no pmap accounting for them.
16871 				 */
16872 				assertf(!src_entry->use_pmap,
16873 				    "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
16874 				    map,
16875 				    src_entry,
16876 				    (uint64_t)src_entry->vme_start,
16877 				    (uint64_t)src_entry->vme_end,
16878 				    src_entry->protection,
16879 				    src_entry->max_protection,
16880 				    VME_ALIAS(src_entry));
16881 			} else {
16882 				/*
16883 				 * Not IOKit or purgeable:
16884 				 * must be accounted by pmap stats.
16885 				 */
16886 				assertf(src_entry->use_pmap,
16887 				    "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
16888 				    map,
16889 				    src_entry,
16890 				    (uint64_t)src_entry->vme_start,
16891 				    (uint64_t)src_entry->vme_end,
16892 				    src_entry->protection,
16893 				    src_entry->max_protection,
16894 				    VME_ALIAS(src_entry));
16895 			}
16896 
16897 			if (object == VM_OBJECT_NULL) {
16898 				assert(!src_entry->needs_copy);
16899 				object = vm_object_allocate(entry_size);
16900 				VME_OFFSET_SET(src_entry, 0);
16901 				VME_OBJECT_SET(src_entry, object);
16902 				assert(src_entry->use_pmap);
16903 				assert(!map->mapped_in_other_pmaps);
16904 			} else if (src_entry->wired_count ||
16905 			    object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
16906 				/*
16907 				 * A wired memory region should not have
16908 				 * any pending copy-on-write and needs to
16909 				 * keep pointing at the VM object that
16910 				 * contains the wired pages.
16911 				 * If we're sharing this memory (copy=false),
16912 				 * we'll share this VM object.
16913 				 * If we're copying this memory (copy=true),
16914 				 * we'll call vm_object_copy_slowly() below
16915 				 * and use the new VM object for the remapping.
16916 				 *
16917 				 * Or, we are already using an asymmetric
16918 				 * copy, and therefore we already have
16919 				 * the right object.
16920 				 */
16921 				assert(!src_entry->needs_copy);
16922 			} else if (src_entry->needs_copy || object->shadowed ||
16923 			    (object->internal && !object->true_share &&
16924 			    !src_entry->is_shared &&
16925 			    object->vo_size > entry_size)) {
16926 				VME_OBJECT_SHADOW(src_entry, entry_size);
16927 				assert(src_entry->use_pmap);
16928 
16929 				if (!src_entry->needs_copy &&
16930 				    (src_entry->protection & VM_PROT_WRITE)) {
16931 					vm_prot_t prot;
16932 
16933 					assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
16934 
16935 					prot = src_entry->protection & ~VM_PROT_WRITE;
16936 
16937 					if (override_nx(map,
16938 					    VME_ALIAS(src_entry))
16939 					    && prot) {
16940 						prot |= VM_PROT_EXECUTE;
16941 					}
16942 
16943 					assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
16944 
16945 					if (map->mapped_in_other_pmaps) {
16946 						vm_object_pmap_protect(
16947 							VME_OBJECT(src_entry),
16948 							VME_OFFSET(src_entry),
16949 							entry_size,
16950 							PMAP_NULL,
16951 							PAGE_SIZE,
16952 							src_entry->vme_start,
16953 							prot);
16954 #if MACH_ASSERT
16955 					} else if (__improbable(map->pmap == PMAP_NULL)) {
16956 						extern boolean_t vm_tests_in_progress;
16957 						assert(vm_tests_in_progress);
16958 						/*
16959 						 * Some VM tests (in vm_tests.c)
16960 						 * sometimes want to use a VM
16961 						 * map without a pmap.
16962 						 * Otherwise, this should never
16963 						 * happen.
16964 						 */
16965 #endif /* MACH_ASSERT */
16966 					} else {
16967 						pmap_protect(vm_map_pmap(map),
16968 						    src_entry->vme_start,
16969 						    src_entry->vme_end,
16970 						    prot);
16971 					}
16972 				}
16973 
16974 				object = VME_OBJECT(src_entry);
16975 				src_entry->needs_copy = FALSE;
16976 			}
16977 
16978 
16979 			vm_object_lock(object);
16980 			vm_object_reference_locked(object); /* object ref. for new entry */
16981 			assert(!src_entry->needs_copy);
16982 			if (object->copy_strategy ==
16983 			    MEMORY_OBJECT_COPY_SYMMETRIC) {
16984 				/*
16985 				 * If we want to share this object (copy==0),
16986 				 * it needs to be COPY_DELAY.
16987 				 * If we want to copy this object (copy==1),
16988 				 * we can't just set "needs_copy" on our side
16989 				 * and expect the other side to do the same
16990 				 * (symmetrically), so we can't let the object
16991 				 * stay COPY_SYMMETRIC.
16992 				 * So we always switch from COPY_SYMMETRIC to
16993 				 * COPY_DELAY.
16994 				 */
16995 				object->copy_strategy =
16996 				    MEMORY_OBJECT_COPY_DELAY;
16997 				object->true_share = TRUE;
16998 			}
16999 			vm_object_unlock(object);
17000 		}
17001 
17002 		offset = (VME_OFFSET(src_entry) +
17003 		    (src_start - src_entry->vme_start));
17004 
17005 		new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
17006 		vm_map_entry_copy(map, new_entry, src_entry);
17007 		if (new_entry->is_sub_map) {
17008 			/* clr address space specifics */
17009 			new_entry->use_pmap = FALSE;
17010 		} else if (copy) {
17011 			/*
17012 			 * We're dealing with a copy-on-write operation,
17013 			 * so the resulting mapping should not inherit the
17014 			 * original mapping's accounting settings.
17015 			 * "use_pmap" should be reset to its default (TRUE)
17016 			 * so that the new mapping gets accounted for in
17017 			 * the task's memory footprint.
17018 			 */
17019 			new_entry->use_pmap = TRUE;
17020 		}
17021 		/* "iokit_acct" was cleared in vm_map_entry_copy() */
17022 		assert(!new_entry->iokit_acct);
17023 
17024 		new_entry->map_aligned = FALSE;
17025 
17026 		new_entry->vme_start = map_address;
17027 		new_entry->vme_end = map_address + tmp_size;
17028 		assert(new_entry->vme_start < new_entry->vme_end);
17029 		if (copy && vmk_flags.vmkf_remap_prot_copy) {
17030 			/*
17031 			 * Remapping for vm_map_protect(VM_PROT_COPY)
17032 			 * to convert a read-only mapping into a
17033 			 * copy-on-write version of itself but
17034 			 * with write access:
17035 			 * keep the original inheritance and add
17036 			 * VM_PROT_WRITE to the max protection.
17037 			 */
17038 			new_entry->inheritance = src_entry->inheritance;
17039 			new_entry->protection &= max_prot_for_prot_copy;
17040 			new_entry->max_protection |= VM_PROT_WRITE;
17041 		} else {
17042 			new_entry->inheritance = inheritance;
17043 			if (!vm_remap_legacy) {
17044 				new_entry->protection = *cur_protection;
17045 				new_entry->max_protection = *max_protection;
17046 			}
17047 		}
17048 		VME_OFFSET_SET(new_entry, offset);
17049 
17050 		/*
17051 		 * The new region has to be copied now if required.
17052 		 */
17053 RestartCopy:
17054 		if (!copy) {
17055 			if (src_entry->used_for_jit == TRUE) {
17056 				if (same_map) {
17057 				} else if (!VM_MAP_POLICY_ALLOW_JIT_SHARING(map)) {
17058 					/*
17059 					 * Cannot allow an entry describing a JIT
17060 					 * region to be shared across address spaces.
17061 					 */
17062 					result = KERN_INVALID_ARGUMENT;
17063 					break;
17064 				}
17065 			}
17066 
17067 			src_entry->is_shared = TRUE;
17068 			new_entry->is_shared = TRUE;
17069 			if (!(new_entry->is_sub_map)) {
17070 				new_entry->needs_copy = FALSE;
17071 			}
17072 		} else if (src_entry->is_sub_map) {
17073 			/* make this a COW sub_map if not already */
17074 			assert(new_entry->wired_count == 0);
17075 			new_entry->needs_copy = TRUE;
17076 			object = VM_OBJECT_NULL;
17077 		} else if (src_entry->wired_count == 0 &&
17078 		    !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) &&
17079 		    vm_object_copy_quickly(VME_OBJECT_PTR(new_entry),
17080 		    VME_OFFSET(new_entry),
17081 		    (new_entry->vme_end -
17082 		    new_entry->vme_start),
17083 		    &src_needs_copy,
17084 		    &new_entry_needs_copy)) {
17085 			new_entry->needs_copy = new_entry_needs_copy;
17086 			new_entry->is_shared = FALSE;
17087 			assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
17088 
17089 			/*
17090 			 * Handle copy_on_write semantics.
17091 			 */
17092 			if (src_needs_copy && !src_entry->needs_copy) {
17093 				vm_prot_t prot;
17094 
17095 				assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
17096 
17097 				prot = src_entry->protection & ~VM_PROT_WRITE;
17098 
17099 				if (override_nx(map,
17100 				    VME_ALIAS(src_entry))
17101 				    && prot) {
17102 					prot |= VM_PROT_EXECUTE;
17103 				}
17104 
17105 				assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
17106 
17107 				vm_object_pmap_protect(object,
17108 				    offset,
17109 				    entry_size,
17110 				    ((src_entry->is_shared
17111 				    || map->mapped_in_other_pmaps) ?
17112 				    PMAP_NULL : map->pmap),
17113 				    VM_MAP_PAGE_SIZE(map),
17114 				    src_entry->vme_start,
17115 				    prot);
17116 
17117 				assert(src_entry->wired_count == 0);
17118 				src_entry->needs_copy = TRUE;
17119 			}
17120 			/*
17121 			 * Throw away the old object reference of the new entry.
17122 			 */
17123 			vm_object_deallocate(object);
17124 		} else {
17125 			new_entry->is_shared = FALSE;
17126 			assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
17127 
17128 			src_entry_was_wired = (src_entry->wired_count > 0);
17129 			saved_src_entry = src_entry;
17130 			src_entry = VM_MAP_ENTRY_NULL;
17131 
17132 			/*
17133 			 * The map can be safely unlocked since we
17134 			 * already hold a reference on the object.
17135 			 *
17136 			 * Record the timestamp of the map for later
17137 			 * verification, and unlock the map.
17138 			 */
17139 			version.main_timestamp = map->timestamp;
17140 			vm_map_unlock(map);     /* Increments timestamp once! */
17141 
17142 			/*
17143 			 * Perform the copy.
17144 			 */
17145 			if (src_entry_was_wired > 0 ||
17146 			    (debug4k_no_cow_copyin &&
17147 			    VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT)) {
17148 				vm_object_lock(object);
17149 				result = vm_object_copy_slowly(
17150 					object,
17151 					offset,
17152 					(new_entry->vme_end -
17153 					new_entry->vme_start),
17154 					THREAD_UNINT,
17155 					VME_OBJECT_PTR(new_entry));
17156 
17157 				VME_OFFSET_SET(new_entry, offset - vm_object_trunc_page(offset));
17158 				new_entry->needs_copy = FALSE;
17159 			} else {
17160 				vm_object_offset_t new_offset;
17161 
17162 				new_offset = VME_OFFSET(new_entry);
17163 				result = vm_object_copy_strategically(
17164 					object,
17165 					offset,
17166 					(new_entry->vme_end -
17167 					new_entry->vme_start),
17168 					VME_OBJECT_PTR(new_entry),
17169 					&new_offset,
17170 					&new_entry_needs_copy);
17171 				if (new_offset != VME_OFFSET(new_entry)) {
17172 					VME_OFFSET_SET(new_entry, new_offset);
17173 				}
17174 
17175 				new_entry->needs_copy = new_entry_needs_copy;
17176 			}
17177 
17178 			/*
17179 			 * Throw away the old object reference of the new entry.
17180 			 */
17181 			vm_object_deallocate(object);
17182 
17183 			if (result != KERN_SUCCESS &&
17184 			    result != KERN_MEMORY_RESTART_COPY) {
17185 				_vm_map_entry_dispose(map_header, new_entry);
17186 				vm_map_lock(map);
17187 				break;
17188 			}
17189 
17190 			/*
17191 			 * Verify that the map has not substantially
17192 			 * changed while the copy was being made.
17193 			 */
17194 
17195 			vm_map_lock(map);
17196 			if (version.main_timestamp + 1 != map->timestamp) {
17197 				/*
17198 				 * Simple version comparison failed.
17199 				 *
17200 				 * Retry the lookup and verify that the
17201 				 * same object/offset are still present.
17202 				 */
17203 				saved_src_entry = VM_MAP_ENTRY_NULL;
17204 				vm_object_deallocate(VME_OBJECT(new_entry));
17205 				_vm_map_entry_dispose(map_header, new_entry);
17206 				if (result == KERN_MEMORY_RESTART_COPY) {
17207 					result = KERN_SUCCESS;
17208 				}
17209 				continue;
17210 			}
17211 			/* map hasn't changed: src_entry is still valid */
17212 			src_entry = saved_src_entry;
17213 			saved_src_entry = VM_MAP_ENTRY_NULL;
17214 
17215 			if (result == KERN_MEMORY_RESTART_COPY) {
17216 				vm_object_reference(object);
17217 				goto RestartCopy;
17218 			}
17219 		}
17220 
17221 		_vm_map_store_entry_link(map_header,
17222 		    map_header->links.prev, new_entry);
17223 
17224 		/* protections for submap mapping are irrelevant here */
17225 		if (vm_remap_legacy && !src_entry->is_sub_map) {
17226 			*cur_protection &= src_entry->protection;
17227 			*max_protection &= src_entry->max_protection;
17228 		}
17229 
17230 		map_address += tmp_size;
17231 		mapped_size += tmp_size;
17232 		src_start += tmp_size;
17233 
17234 		if (vmk_flags.vmkf_copy_single_object) {
17235 			if (mapped_size != size) {
17236 				DEBUG4K_SHARE("map %p addr 0x%llx size 0x%llx clipped copy at mapped_size 0x%llx\n", map, (uint64_t)addr, (uint64_t)size, (uint64_t)mapped_size);
17237 				if (src_entry->vme_next != vm_map_to_entry(map) &&
17238 				    VME_OBJECT(src_entry->vme_next) == VME_OBJECT(src_entry)) {
17239 					/* XXX TODO4K */
17240 					DEBUG4K_ERROR("could have extended copy to next entry...\n");
17241 				}
17242 			}
17243 			break;
17244 		}
17245 	} /* end while */
17246 
17247 	vm_map_unlock(map);
17248 	if (result != KERN_SUCCESS) {
17249 		/*
17250 		 * Free all allocated elements.
17251 		 */
17252 		for (src_entry = map_header->links.next;
17253 		    src_entry != CAST_TO_VM_MAP_ENTRY(&map_header->links);
17254 		    src_entry = new_entry) {
17255 			new_entry = src_entry->vme_next;
17256 			_vm_map_store_entry_unlink(map_header, src_entry);
17257 			if (src_entry->is_sub_map) {
17258 				vm_map_deallocate(VME_SUBMAP(src_entry));
17259 			} else {
17260 				vm_object_deallocate(VME_OBJECT(src_entry));
17261 			}
17262 			_vm_map_entry_dispose(map_header, src_entry);
17263 		}
17264 	}
17265 	return result;
17266 }
17267 
17268 bool
vm_map_is_exotic(vm_map_t map)17269 vm_map_is_exotic(
17270 	vm_map_t map)
17271 {
17272 	return VM_MAP_IS_EXOTIC(map);
17273 }
17274 
17275 bool
vm_map_is_alien(vm_map_t map)17276 vm_map_is_alien(
17277 	vm_map_t map)
17278 {
17279 	return VM_MAP_IS_ALIEN(map);
17280 }
17281 
17282 #if XNU_TARGET_OS_OSX
17283 void
vm_map_mark_alien(vm_map_t map)17284 vm_map_mark_alien(
17285 	vm_map_t map)
17286 {
17287 	vm_map_lock(map);
17288 	map->is_alien = true;
17289 	vm_map_unlock(map);
17290 }
17291 
17292 void
vm_map_single_jit(vm_map_t map)17293 vm_map_single_jit(
17294 	vm_map_t map)
17295 {
17296 	vm_map_lock(map);
17297 	map->single_jit = true;
17298 	vm_map_unlock(map);
17299 }
17300 #endif /* XNU_TARGET_OS_OSX */
17301 
17302 static kern_return_t
vm_map_copy_to_physcopy(vm_map_copy_t copy_map,vm_map_t target_map)17303 vm_map_copy_to_physcopy(
17304 	vm_map_copy_t   copy_map,
17305 	vm_map_t        target_map)
17306 {
17307 	vm_map_size_t           size;
17308 	vm_map_entry_t          entry;
17309 	vm_map_entry_t          new_entry;
17310 	vm_object_t             new_object;
17311 	unsigned int            pmap_flags;
17312 	pmap_t                  new_pmap;
17313 	vm_map_t                new_map;
17314 	vm_map_address_t        src_start, src_end, src_cur;
17315 	vm_map_address_t        dst_start, dst_end, dst_cur;
17316 	kern_return_t           kr;
17317 	void                    *kbuf;
17318 
17319 	/*
17320 	 * Perform the equivalent of vm_allocate() and memcpy().
17321 	 * Replace the mappings in "copy_map" with the newly allocated mapping.
17322 	 */
17323 	DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) BEFORE\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
17324 
17325 	assert(copy_map->cpy_hdr.page_shift != VM_MAP_PAGE_MASK(target_map));
17326 
17327 	/* create a new pmap to map "copy_map" */
17328 	pmap_flags = 0;
17329 	assert(copy_map->cpy_hdr.page_shift == FOURK_PAGE_SHIFT);
17330 #if PMAP_CREATE_FORCE_4K_PAGES
17331 	pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
17332 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
17333 	pmap_flags |= PMAP_CREATE_64BIT;
17334 	new_pmap = pmap_create_options(NULL, (vm_map_size_t)0, pmap_flags);
17335 	if (new_pmap == NULL) {
17336 		return KERN_RESOURCE_SHORTAGE;
17337 	}
17338 
17339 	/* allocate new VM object */
17340 	size = VM_MAP_ROUND_PAGE(copy_map->size, PAGE_MASK);
17341 	new_object = vm_object_allocate(size);
17342 	assert(new_object);
17343 
17344 	/* allocate new VM map entry */
17345 	new_entry = vm_map_copy_entry_create(copy_map, FALSE);
17346 	assert(new_entry);
17347 
17348 	/* finish initializing new VM map entry */
17349 	new_entry->protection = VM_PROT_DEFAULT;
17350 	new_entry->max_protection = VM_PROT_DEFAULT;
17351 	new_entry->use_pmap = TRUE;
17352 
17353 	/* make new VM map entry point to new VM object */
17354 	new_entry->vme_start = 0;
17355 	new_entry->vme_end = size;
17356 	VME_OBJECT_SET(new_entry, new_object);
17357 	VME_OFFSET_SET(new_entry, 0);
17358 
17359 	/* create a new pageable VM map to map "copy_map" */
17360 	new_map = vm_map_create(new_pmap, 0, MACH_VM_MAX_ADDRESS, TRUE);
17361 	assert(new_map);
17362 	vm_map_set_page_shift(new_map, copy_map->cpy_hdr.page_shift);
17363 
17364 	/* map "copy_map" in the new VM map */
17365 	src_start = 0;
17366 	kr = vm_map_copyout_internal(
17367 		new_map,
17368 		&src_start,
17369 		copy_map,
17370 		copy_map->size,
17371 		FALSE, /* consume_on_success */
17372 		VM_PROT_DEFAULT,
17373 		VM_PROT_DEFAULT,
17374 		VM_INHERIT_DEFAULT);
17375 	assert(kr == KERN_SUCCESS);
17376 	src_end = src_start + copy_map->size;
17377 
17378 	/* map "new_object" in the new VM map */
17379 	vm_object_reference(new_object);
17380 	dst_start = 0;
17381 	kr = vm_map_enter(new_map,
17382 	    &dst_start,
17383 	    size,
17384 	    0,               /* mask */
17385 	    VM_FLAGS_ANYWHERE,
17386 	    VM_MAP_KERNEL_FLAGS_NONE,
17387 	    VM_KERN_MEMORY_OSFMK,
17388 	    new_object,
17389 	    0,               /* offset */
17390 	    FALSE,               /* needs copy */
17391 	    VM_PROT_DEFAULT,
17392 	    VM_PROT_DEFAULT,
17393 	    VM_INHERIT_DEFAULT);
17394 	assert(kr == KERN_SUCCESS);
17395 	dst_end = dst_start + size;
17396 
17397 	/* get a kernel buffer */
17398 	kbuf = kalloc_data(PAGE_SIZE, Z_WAITOK | Z_NOFAIL);
17399 
17400 	/* physically copy "copy_map" mappings to new VM object */
17401 	for (src_cur = src_start, dst_cur = dst_start;
17402 	    src_cur < src_end;
17403 	    src_cur += PAGE_SIZE, dst_cur += PAGE_SIZE) {
17404 		vm_size_t bytes;
17405 
17406 		bytes = PAGE_SIZE;
17407 		if (src_cur + PAGE_SIZE > src_end) {
17408 			/* partial copy for last page */
17409 			bytes = src_end - src_cur;
17410 			assert(bytes > 0 && bytes < PAGE_SIZE);
17411 			/* rest of dst page should be zero-filled */
17412 		}
17413 		/* get bytes from src mapping */
17414 		kr = copyinmap(new_map, src_cur, kbuf, bytes);
17415 		if (kr != KERN_SUCCESS) {
17416 			DEBUG4K_COPY("copyinmap(%p, 0x%llx, %p, 0x%llx) kr 0x%x\n", new_map, (uint64_t)src_cur, kbuf, (uint64_t)bytes, kr);
17417 		}
17418 		/* put bytes in dst mapping */
17419 		assert(dst_cur < dst_end);
17420 		assert(dst_cur + bytes <= dst_end);
17421 		kr = copyoutmap(new_map, kbuf, dst_cur, bytes);
17422 		if (kr != KERN_SUCCESS) {
17423 			DEBUG4K_COPY("copyoutmap(%p, %p, 0x%llx, 0x%llx) kr 0x%x\n", new_map, kbuf, (uint64_t)dst_cur, (uint64_t)bytes, kr);
17424 		}
17425 	}
17426 
17427 	/* free kernel buffer */
17428 	kfree_data(kbuf, PAGE_SIZE);
17429 
17430 	/* destroy new map */
17431 	vm_map_destroy(new_map, VM_MAP_REMOVE_NO_FLAGS);
17432 	new_map = VM_MAP_NULL;
17433 
17434 	/* dispose of the old map entries in "copy_map" */
17435 	while (vm_map_copy_first_entry(copy_map) !=
17436 	    vm_map_copy_to_entry(copy_map)) {
17437 		entry = vm_map_copy_first_entry(copy_map);
17438 		vm_map_copy_entry_unlink(copy_map, entry);
17439 		if (entry->is_sub_map) {
17440 			vm_map_deallocate(VME_SUBMAP(entry));
17441 		} else {
17442 			vm_object_deallocate(VME_OBJECT(entry));
17443 		}
17444 		vm_map_copy_entry_dispose(copy_map, entry);
17445 	}
17446 
17447 	/* change "copy_map"'s page_size to match "target_map" */
17448 	copy_map->cpy_hdr.page_shift = VM_MAP_PAGE_SHIFT(target_map);
17449 	copy_map->offset = 0;
17450 	copy_map->size = size;
17451 
17452 	/* insert new map entry in "copy_map" */
17453 	assert(vm_map_copy_last_entry(copy_map) == vm_map_copy_to_entry(copy_map));
17454 	vm_map_copy_entry_link(copy_map, vm_map_copy_last_entry(copy_map), new_entry);
17455 
17456 	DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) AFTER\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
17457 	return KERN_SUCCESS;
17458 }
17459 
17460 void
17461 vm_map_copy_adjust_get_target_copy_map(
17462 	vm_map_copy_t   copy_map,
17463 	vm_map_copy_t   *target_copy_map_p);
17464 void
vm_map_copy_adjust_get_target_copy_map(vm_map_copy_t copy_map,vm_map_copy_t * target_copy_map_p)17465 vm_map_copy_adjust_get_target_copy_map(
17466 	vm_map_copy_t   copy_map,
17467 	vm_map_copy_t   *target_copy_map_p)
17468 {
17469 	vm_map_copy_t   target_copy_map;
17470 	vm_map_entry_t  entry, target_entry;
17471 
17472 	if (*target_copy_map_p != VM_MAP_COPY_NULL) {
17473 		/* the caller already has a "target_copy_map": use it */
17474 		return;
17475 	}
17476 
17477 	/* the caller wants us to create a new copy of "copy_map" */
17478 	target_copy_map = vm_map_copy_allocate();
17479 	target_copy_map->type = copy_map->type;
17480 	assert(target_copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17481 	target_copy_map->offset = copy_map->offset;
17482 	target_copy_map->size = copy_map->size;
17483 	target_copy_map->cpy_hdr.page_shift = copy_map->cpy_hdr.page_shift;
17484 	vm_map_store_init(&target_copy_map->cpy_hdr);
17485 	for (entry = vm_map_copy_first_entry(copy_map);
17486 	    entry != vm_map_copy_to_entry(copy_map);
17487 	    entry = entry->vme_next) {
17488 		target_entry = vm_map_copy_entry_create(target_copy_map, FALSE);
17489 		vm_map_entry_copy_full(target_entry, entry);
17490 		if (target_entry->is_sub_map) {
17491 			vm_map_reference(VME_SUBMAP(target_entry));
17492 		} else {
17493 			vm_object_reference(VME_OBJECT(target_entry));
17494 		}
17495 		vm_map_copy_entry_link(
17496 			target_copy_map,
17497 			vm_map_copy_last_entry(target_copy_map),
17498 			target_entry);
17499 	}
17500 	entry = VM_MAP_ENTRY_NULL;
17501 	*target_copy_map_p = target_copy_map;
17502 }
17503 
17504 void
17505 vm_map_copy_trim(
17506 	vm_map_copy_t   copy_map,
17507 	int             new_page_shift,
17508 	vm_map_offset_t trim_start,
17509 	vm_map_offset_t trim_end);
17510 void
vm_map_copy_trim(vm_map_copy_t copy_map,int new_page_shift,vm_map_offset_t trim_start,vm_map_offset_t trim_end)17511 vm_map_copy_trim(
17512 	vm_map_copy_t   copy_map,
17513 	int             new_page_shift,
17514 	vm_map_offset_t trim_start,
17515 	vm_map_offset_t trim_end)
17516 {
17517 	int             copy_page_shift;
17518 	vm_map_entry_t  entry, next_entry;
17519 
17520 	assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17521 	assert(copy_map->cpy_hdr.nentries > 0);
17522 
17523 	trim_start += vm_map_copy_first_entry(copy_map)->vme_start;
17524 	trim_end += vm_map_copy_first_entry(copy_map)->vme_start;
17525 
17526 	/* use the new page_shift to do the clipping */
17527 	copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
17528 	copy_map->cpy_hdr.page_shift = new_page_shift;
17529 
17530 	for (entry = vm_map_copy_first_entry(copy_map);
17531 	    entry != vm_map_copy_to_entry(copy_map);
17532 	    entry = next_entry) {
17533 		next_entry = entry->vme_next;
17534 		if (entry->vme_end <= trim_start) {
17535 			/* entry fully before trim range: skip */
17536 			continue;
17537 		}
17538 		if (entry->vme_start >= trim_end) {
17539 			/* entry fully after trim range: done */
17540 			break;
17541 		}
17542 		/* clip entry if needed */
17543 		vm_map_copy_clip_start(copy_map, entry, trim_start);
17544 		vm_map_copy_clip_end(copy_map, entry, trim_end);
17545 		/* dispose of entry */
17546 		copy_map->size -= entry->vme_end - entry->vme_start;
17547 		vm_map_copy_entry_unlink(copy_map, entry);
17548 		if (entry->is_sub_map) {
17549 			vm_map_deallocate(VME_SUBMAP(entry));
17550 		} else {
17551 			vm_object_deallocate(VME_OBJECT(entry));
17552 		}
17553 		vm_map_copy_entry_dispose(copy_map, entry);
17554 		entry = VM_MAP_ENTRY_NULL;
17555 	}
17556 
17557 	/* restore copy_map's original page_shift */
17558 	copy_map->cpy_hdr.page_shift = copy_page_shift;
17559 }
17560 
17561 /*
17562  * Make any necessary adjustments to "copy_map" to allow it to be
17563  * mapped into "target_map".
17564  * If no changes were necessary, "target_copy_map" points to the
17565  * untouched "copy_map".
17566  * If changes are necessary, changes will be made to "target_copy_map".
17567  * If "target_copy_map" was NULL, we create a new "vm_map_copy_t" and
17568  * copy the original "copy_map" to it before applying the changes.
17569  * The caller should discard "target_copy_map" if it's not the same as
17570  * the original "copy_map".
17571  */
17572 /* TODO4K: also adjust to sub-range in the copy_map -> add start&end? */
17573 kern_return_t
vm_map_copy_adjust_to_target(vm_map_copy_t src_copy_map,vm_map_offset_t offset,vm_map_size_t size,vm_map_t target_map,boolean_t copy,vm_map_copy_t * target_copy_map_p,vm_map_offset_t * overmap_start_p,vm_map_offset_t * overmap_end_p,vm_map_offset_t * trimmed_start_p)17574 vm_map_copy_adjust_to_target(
17575 	vm_map_copy_t           src_copy_map,
17576 	vm_map_offset_t         offset,
17577 	vm_map_size_t           size,
17578 	vm_map_t                target_map,
17579 	boolean_t               copy,
17580 	vm_map_copy_t           *target_copy_map_p,
17581 	vm_map_offset_t         *overmap_start_p,
17582 	vm_map_offset_t         *overmap_end_p,
17583 	vm_map_offset_t         *trimmed_start_p)
17584 {
17585 	vm_map_copy_t           copy_map, target_copy_map;
17586 	vm_map_size_t           target_size;
17587 	vm_map_size_t           src_copy_map_size;
17588 	vm_map_size_t           overmap_start, overmap_end;
17589 	int                     misalignments;
17590 	vm_map_entry_t          entry, target_entry;
17591 	vm_map_offset_t         addr_adjustment;
17592 	vm_map_offset_t         new_start, new_end;
17593 	int                     copy_page_mask, target_page_mask;
17594 	int                     copy_page_shift, target_page_shift;
17595 	vm_map_offset_t         trimmed_end;
17596 
17597 	/*
17598 	 * Assert that the vm_map_copy is coming from the right
17599 	 * zone and hasn't been forged
17600 	 */
17601 	vm_map_copy_require(src_copy_map);
17602 	assert(src_copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17603 
17604 	/*
17605 	 * Start working with "src_copy_map" but we'll switch
17606 	 * to "target_copy_map" as soon as we start making adjustments.
17607 	 */
17608 	copy_map = src_copy_map;
17609 	src_copy_map_size = src_copy_map->size;
17610 
17611 	copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
17612 	copy_page_mask = VM_MAP_COPY_PAGE_MASK(copy_map);
17613 	target_page_shift = VM_MAP_PAGE_SHIFT(target_map);
17614 	target_page_mask = VM_MAP_PAGE_MASK(target_map);
17615 
17616 	DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p...\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, *target_copy_map_p);
17617 
17618 	target_copy_map = *target_copy_map_p;
17619 	if (target_copy_map != VM_MAP_COPY_NULL) {
17620 		vm_map_copy_require(target_copy_map);
17621 	}
17622 
17623 	if (offset + size > copy_map->size) {
17624 		DEBUG4K_ERROR("copy_map %p (%d->%d) copy_map->size 0x%llx offset 0x%llx size 0x%llx KERN_INVALID_ARGUMENT\n", copy_map, copy_page_shift, target_page_shift, (uint64_t)copy_map->size, (uint64_t)offset, (uint64_t)size);
17625 		return KERN_INVALID_ARGUMENT;
17626 	}
17627 
17628 	/* trim the end */
17629 	trimmed_end = 0;
17630 	new_end = VM_MAP_ROUND_PAGE(offset + size, target_page_mask);
17631 	if (new_end < copy_map->size) {
17632 		trimmed_end = src_copy_map_size - new_end;
17633 		DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim end from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)new_end, (uint64_t)copy_map->size);
17634 		/* get "target_copy_map" if needed and adjust it */
17635 		vm_map_copy_adjust_get_target_copy_map(copy_map,
17636 		    &target_copy_map);
17637 		copy_map = target_copy_map;
17638 		vm_map_copy_trim(target_copy_map, target_page_shift,
17639 		    new_end, copy_map->size);
17640 	}
17641 
17642 	/* trim the start */
17643 	new_start = VM_MAP_TRUNC_PAGE(offset, target_page_mask);
17644 	if (new_start != 0) {
17645 		DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim start from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)0, (uint64_t)new_start);
17646 		/* get "target_copy_map" if needed and adjust it */
17647 		vm_map_copy_adjust_get_target_copy_map(copy_map,
17648 		    &target_copy_map);
17649 		copy_map = target_copy_map;
17650 		vm_map_copy_trim(target_copy_map, target_page_shift,
17651 		    0, new_start);
17652 	}
17653 	*trimmed_start_p = new_start;
17654 
17655 	/* target_size starts with what's left after trimming */
17656 	target_size = copy_map->size;
17657 	assertf(target_size == src_copy_map_size - *trimmed_start_p - trimmed_end,
17658 	    "target_size 0x%llx src_copy_map_size 0x%llx trimmed_start 0x%llx trimmed_end 0x%llx\n",
17659 	    (uint64_t)target_size, (uint64_t)src_copy_map_size,
17660 	    (uint64_t)*trimmed_start_p, (uint64_t)trimmed_end);
17661 
17662 	/* check for misalignments but don't adjust yet */
17663 	misalignments = 0;
17664 	overmap_start = 0;
17665 	overmap_end = 0;
17666 	if (copy_page_shift < target_page_shift) {
17667 		/*
17668 		 * Remapping from 4K to 16K: check the VM object alignments
17669 		 * throughout the range.
17670 		 * If the start and end of the range are mis-aligned, we can
17671 		 * over-map to re-align, and adjust the "overmap" start/end
17672 		 * and "target_size" of the range accordingly.
17673 		 * If there is any mis-alignment within the range:
17674 		 *     if "copy":
17675 		 *         we can do immediate-copy instead of copy-on-write,
17676 		 *     else:
17677 		 *         no way to remap and share; fail.
17678 		 */
17679 		for (entry = vm_map_copy_first_entry(copy_map);
17680 		    entry != vm_map_copy_to_entry(copy_map);
17681 		    entry = entry->vme_next) {
17682 			vm_object_offset_t object_offset_start, object_offset_end;
17683 
17684 			object_offset_start = VME_OFFSET(entry);
17685 			object_offset_end = object_offset_start;
17686 			object_offset_end += entry->vme_end - entry->vme_start;
17687 			if (object_offset_start & target_page_mask) {
17688 				if (entry == vm_map_copy_first_entry(copy_map) && !copy) {
17689 					overmap_start++;
17690 				} else {
17691 					misalignments++;
17692 				}
17693 			}
17694 			if (object_offset_end & target_page_mask) {
17695 				if (entry->vme_next == vm_map_copy_to_entry(copy_map) && !copy) {
17696 					overmap_end++;
17697 				} else {
17698 					misalignments++;
17699 				}
17700 			}
17701 		}
17702 	}
17703 	entry = VM_MAP_ENTRY_NULL;
17704 
17705 	/* decide how to deal with misalignments */
17706 	assert(overmap_start <= 1);
17707 	assert(overmap_end <= 1);
17708 	if (!overmap_start && !overmap_end && !misalignments) {
17709 		/* copy_map is properly aligned for target_map ... */
17710 		if (*trimmed_start_p) {
17711 			/* ... but we trimmed it, so still need to adjust */
17712 		} else {
17713 			/* ... and we didn't trim anything: we're done */
17714 			if (target_copy_map == VM_MAP_COPY_NULL) {
17715 				target_copy_map = copy_map;
17716 			}
17717 			*target_copy_map_p = target_copy_map;
17718 			*overmap_start_p = 0;
17719 			*overmap_end_p = 0;
17720 			DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17721 			return KERN_SUCCESS;
17722 		}
17723 	} else if (misalignments && !copy) {
17724 		/* can't "share" if misaligned */
17725 		DEBUG4K_ADJUST("unsupported sharing\n");
17726 #if MACH_ASSERT
17727 		if (debug4k_panic_on_misaligned_sharing) {
17728 			panic("DEBUG4k %s:%d unsupported sharing", __FUNCTION__, __LINE__);
17729 		}
17730 #endif /* MACH_ASSERT */
17731 		DEBUG4K_ADJUST("copy_map %p (%d) target_map %p (%d) copy %d target_copy_map %p -> KERN_NOT_SUPPORTED\n", copy_map, copy_page_shift, target_map, target_page_shift, copy, *target_copy_map_p);
17732 		return KERN_NOT_SUPPORTED;
17733 	} else {
17734 		/* can't virtual-copy if misaligned (but can physical-copy) */
17735 		DEBUG4K_ADJUST("mis-aligned copying\n");
17736 	}
17737 
17738 	/* get a "target_copy_map" if needed and switch to it */
17739 	vm_map_copy_adjust_get_target_copy_map(copy_map, &target_copy_map);
17740 	copy_map = target_copy_map;
17741 
17742 	if (misalignments && copy) {
17743 		vm_map_size_t target_copy_map_size;
17744 
17745 		/*
17746 		 * Can't do copy-on-write with misaligned mappings.
17747 		 * Replace the mappings with a physical copy of the original
17748 		 * mappings' contents.
17749 		 */
17750 		target_copy_map_size = target_copy_map->size;
17751 		kern_return_t kr = vm_map_copy_to_physcopy(target_copy_map, target_map);
17752 		if (kr != KERN_SUCCESS) {
17753 			return kr;
17754 		}
17755 		*target_copy_map_p = target_copy_map;
17756 		*overmap_start_p = 0;
17757 		*overmap_end_p = target_copy_map->size - target_copy_map_size;
17758 		DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx)-> trimmed 0x%llx overmap start 0x%llx end 0x%llx PHYSCOPY\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17759 		return KERN_SUCCESS;
17760 	}
17761 
17762 	/* apply the adjustments */
17763 	misalignments = 0;
17764 	overmap_start = 0;
17765 	overmap_end = 0;
17766 	/* remove copy_map->offset, so that everything starts at offset 0 */
17767 	addr_adjustment = copy_map->offset;
17768 	/* also remove whatever we trimmed from the start */
17769 	addr_adjustment += *trimmed_start_p;
17770 	for (target_entry = vm_map_copy_first_entry(target_copy_map);
17771 	    target_entry != vm_map_copy_to_entry(target_copy_map);
17772 	    target_entry = target_entry->vme_next) {
17773 		vm_object_offset_t object_offset_start, object_offset_end;
17774 
17775 		DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx BEFORE\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17776 		object_offset_start = VME_OFFSET(target_entry);
17777 		if (object_offset_start & target_page_mask) {
17778 			DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at start\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17779 			if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
17780 				/*
17781 				 * start of 1st entry is mis-aligned:
17782 				 * re-adjust by over-mapping.
17783 				 */
17784 				overmap_start = object_offset_start - trunc_page_mask_64(object_offset_start, target_page_mask);
17785 				DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_start 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_start);
17786 				VME_OFFSET_SET(target_entry, VME_OFFSET(target_entry) - overmap_start);
17787 			} else {
17788 				misalignments++;
17789 				DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
17790 				assert(copy);
17791 			}
17792 		}
17793 
17794 		if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
17795 			target_size += overmap_start;
17796 		} else {
17797 			target_entry->vme_start += overmap_start;
17798 		}
17799 		target_entry->vme_end += overmap_start;
17800 
17801 		object_offset_end = VME_OFFSET(target_entry) + target_entry->vme_end - target_entry->vme_start;
17802 		if (object_offset_end & target_page_mask) {
17803 			DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at end\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17804 			if (target_entry->vme_next == vm_map_copy_to_entry(target_copy_map)) {
17805 				/*
17806 				 * end of last entry is mis-aligned: re-adjust by over-mapping.
17807 				 */
17808 				overmap_end = round_page_mask_64(object_offset_end, target_page_mask) - object_offset_end;
17809 				DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_end 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_end);
17810 				target_entry->vme_end += overmap_end;
17811 				target_size += overmap_end;
17812 			} else {
17813 				misalignments++;
17814 				DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
17815 				assert(copy);
17816 			}
17817 		}
17818 		target_entry->vme_start -= addr_adjustment;
17819 		target_entry->vme_end -= addr_adjustment;
17820 		DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx AFTER\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17821 	}
17822 
17823 	target_copy_map->size = target_size;
17824 	target_copy_map->offset += overmap_start;
17825 	target_copy_map->offset -= addr_adjustment;
17826 	target_copy_map->cpy_hdr.page_shift = target_page_shift;
17827 
17828 //	assert(VM_MAP_PAGE_ALIGNED(target_copy_map->size, target_page_mask));
17829 //	assert(VM_MAP_PAGE_ALIGNED(target_copy_map->offset, FOURK_PAGE_MASK));
17830 	assert(overmap_start < VM_MAP_PAGE_SIZE(target_map));
17831 	assert(overmap_end < VM_MAP_PAGE_SIZE(target_map));
17832 
17833 	*target_copy_map_p = target_copy_map;
17834 	*overmap_start_p = overmap_start;
17835 	*overmap_end_p = overmap_end;
17836 
17837 	DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17838 	return KERN_SUCCESS;
17839 }
17840 
17841 kern_return_t
vm_map_range_physical_size(vm_map_t map,vm_map_address_t start,mach_vm_size_t size,mach_vm_size_t * phys_size)17842 vm_map_range_physical_size(
17843 	vm_map_t         map,
17844 	vm_map_address_t start,
17845 	mach_vm_size_t   size,
17846 	mach_vm_size_t * phys_size)
17847 {
17848 	kern_return_t   kr;
17849 	vm_map_copy_t   copy_map, target_copy_map;
17850 	vm_map_offset_t adjusted_start, adjusted_end;
17851 	vm_map_size_t   adjusted_size;
17852 	vm_prot_t       cur_prot, max_prot;
17853 	vm_map_offset_t overmap_start, overmap_end, trimmed_start;
17854 	vm_map_kernel_flags_t vmk_flags;
17855 
17856 	adjusted_start = vm_map_trunc_page(start, VM_MAP_PAGE_MASK(map));
17857 	adjusted_end = vm_map_round_page(start + size, VM_MAP_PAGE_MASK(map));
17858 	adjusted_size = adjusted_end - adjusted_start;
17859 	*phys_size = adjusted_size;
17860 	if (VM_MAP_PAGE_SIZE(map) == PAGE_SIZE) {
17861 		return KERN_SUCCESS;
17862 	}
17863 	if (start == 0) {
17864 		adjusted_start = vm_map_trunc_page(start, PAGE_MASK);
17865 		adjusted_end = vm_map_round_page(start + size, PAGE_MASK);
17866 		adjusted_size = adjusted_end - adjusted_start;
17867 		*phys_size = adjusted_size;
17868 		return KERN_SUCCESS;
17869 	}
17870 	if (adjusted_size == 0) {
17871 		DEBUG4K_SHARE("map %p start 0x%llx size 0x%llx adjusted 0x%llx -> phys_size 0!\n", map, (uint64_t)start, (uint64_t)size, (uint64_t)adjusted_size);
17872 		*phys_size = 0;
17873 		return KERN_SUCCESS;
17874 	}
17875 
17876 	vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
17877 	vmk_flags.vmkf_copy_pageable = TRUE;
17878 	vmk_flags.vmkf_copy_same_map = TRUE;
17879 	assert(adjusted_size != 0);
17880 	cur_prot = VM_PROT_NONE; /* legacy mode */
17881 	max_prot = VM_PROT_NONE; /* legacy mode */
17882 	kr = vm_map_copy_extract(map, adjusted_start, adjusted_size,
17883 	    FALSE /* copy */,
17884 	    &copy_map,
17885 	    &cur_prot, &max_prot, VM_INHERIT_DEFAULT,
17886 	    vmk_flags);
17887 	if (kr != KERN_SUCCESS) {
17888 		DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
17889 		//assert(0);
17890 		*phys_size = 0;
17891 		return kr;
17892 	}
17893 	assert(copy_map != VM_MAP_COPY_NULL);
17894 	target_copy_map = copy_map;
17895 	DEBUG4K_ADJUST("adjusting...\n");
17896 	kr = vm_map_copy_adjust_to_target(
17897 		copy_map,
17898 		start - adjusted_start, /* offset */
17899 		size, /* size */
17900 		kernel_map,
17901 		FALSE,                          /* copy */
17902 		&target_copy_map,
17903 		&overmap_start,
17904 		&overmap_end,
17905 		&trimmed_start);
17906 	if (kr == KERN_SUCCESS) {
17907 		if (target_copy_map->size != *phys_size) {
17908 			DEBUG4K_ADJUST("map %p (%d) start 0x%llx size 0x%llx adjusted_start 0x%llx adjusted_end 0x%llx overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx phys_size 0x%llx -> 0x%llx\n", map, VM_MAP_PAGE_SHIFT(map), (uint64_t)start, (uint64_t)size, (uint64_t)adjusted_start, (uint64_t)adjusted_end, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start, (uint64_t)*phys_size, (uint64_t)target_copy_map->size);
17909 		}
17910 		*phys_size = target_copy_map->size;
17911 	} else {
17912 		DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
17913 		//assert(0);
17914 		*phys_size = 0;
17915 	}
17916 	vm_map_copy_discard(copy_map);
17917 	copy_map = VM_MAP_COPY_NULL;
17918 
17919 	return kr;
17920 }
17921 
17922 
17923 kern_return_t
memory_entry_check_for_adjustment(vm_map_t src_map,ipc_port_t port,vm_map_offset_t * overmap_start,vm_map_offset_t * overmap_end)17924 memory_entry_check_for_adjustment(
17925 	vm_map_t                        src_map,
17926 	ipc_port_t                      port,
17927 	vm_map_offset_t         *overmap_start,
17928 	vm_map_offset_t         *overmap_end)
17929 {
17930 	kern_return_t kr = KERN_SUCCESS;
17931 	vm_map_copy_t copy_map = VM_MAP_COPY_NULL, target_copy_map = VM_MAP_COPY_NULL;
17932 
17933 	assert(port);
17934 	assertf(ip_kotype(port) == IKOT_NAMED_ENTRY, "Port Type expected: %d...received:%d\n", IKOT_NAMED_ENTRY, ip_kotype(port));
17935 
17936 	vm_named_entry_t        named_entry;
17937 
17938 	named_entry = mach_memory_entry_from_port(port);
17939 	named_entry_lock(named_entry);
17940 	copy_map = named_entry->backing.copy;
17941 	target_copy_map = copy_map;
17942 
17943 	if (src_map && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT) {
17944 		vm_map_offset_t trimmed_start;
17945 
17946 		trimmed_start = 0;
17947 		DEBUG4K_ADJUST("adjusting...\n");
17948 		kr = vm_map_copy_adjust_to_target(
17949 			copy_map,
17950 			0, /* offset */
17951 			copy_map->size, /* size */
17952 			src_map,
17953 			FALSE, /* copy */
17954 			&target_copy_map,
17955 			overmap_start,
17956 			overmap_end,
17957 			&trimmed_start);
17958 		assert(trimmed_start == 0);
17959 	}
17960 	named_entry_unlock(named_entry);
17961 
17962 	return kr;
17963 }
17964 
17965 
17966 /*
17967  *	Routine:	vm_remap
17968  *
17969  *			Map portion of a task's address space.
17970  *			Mapped region must not overlap more than
17971  *			one vm memory object. Protections and
17972  *			inheritance attributes remain the same
17973  *			as in the original task and are	out parameters.
17974  *			Source and Target task can be identical
17975  *			Other attributes are identical as for vm_map()
17976  */
17977 kern_return_t
vm_map_remap(vm_map_t target_map,vm_map_address_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,vm_map_t src_map,vm_map_offset_t memory_address,boolean_t copy,vm_prot_t * cur_protection,vm_prot_t * max_protection,vm_inherit_t inheritance)17978 vm_map_remap(
17979 	vm_map_t                target_map,
17980 	vm_map_address_t        *address,
17981 	vm_map_size_t           size,
17982 	vm_map_offset_t         mask,
17983 	int                     flags,
17984 	vm_map_kernel_flags_t   vmk_flags,
17985 	vm_tag_t                tag,
17986 	vm_map_t                src_map,
17987 	vm_map_offset_t         memory_address,
17988 	boolean_t               copy,
17989 	vm_prot_t               *cur_protection, /* IN/OUT */
17990 	vm_prot_t               *max_protection, /* IN/OUT */
17991 	vm_inherit_t            inheritance)
17992 {
17993 	kern_return_t           result;
17994 	vm_map_entry_t          entry;
17995 	vm_map_entry_t          insp_entry = VM_MAP_ENTRY_NULL;
17996 	vm_map_entry_t          new_entry;
17997 	vm_map_copy_t           copy_map;
17998 	vm_map_offset_t         offset_in_mapping;
17999 	vm_map_size_t           target_size = 0;
18000 	vm_map_size_t           src_page_mask, target_page_mask;
18001 	vm_map_offset_t         overmap_start, overmap_end, trimmed_start;
18002 	vm_map_offset_t         initial_memory_address;
18003 	vm_map_size_t           initial_size;
18004 
18005 	if (target_map == VM_MAP_NULL) {
18006 		return KERN_INVALID_ARGUMENT;
18007 	}
18008 
18009 	initial_memory_address = memory_address;
18010 	initial_size = size;
18011 	src_page_mask = VM_MAP_PAGE_MASK(src_map);
18012 	target_page_mask = VM_MAP_PAGE_MASK(target_map);
18013 
18014 	switch (inheritance) {
18015 	case VM_INHERIT_NONE:
18016 	case VM_INHERIT_COPY:
18017 	case VM_INHERIT_SHARE:
18018 		if (size != 0 && src_map != VM_MAP_NULL) {
18019 			break;
18020 		}
18021 		OS_FALLTHROUGH;
18022 	default:
18023 		return KERN_INVALID_ARGUMENT;
18024 	}
18025 
18026 	if (src_page_mask != target_page_mask) {
18027 		if (copy) {
18028 			DEBUG4K_COPY("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map, VM_MAP_PAGE_SIZE(src_map), (uint64_t)memory_address, (uint64_t)size, copy, target_map, VM_MAP_PAGE_SIZE(target_map));
18029 		} else {
18030 			DEBUG4K_SHARE("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map, VM_MAP_PAGE_SIZE(src_map), (uint64_t)memory_address, (uint64_t)size, copy, target_map, VM_MAP_PAGE_SIZE(target_map));
18031 		}
18032 	}
18033 
18034 	/*
18035 	 * If the user is requesting that we return the address of the
18036 	 * first byte of the data (rather than the base of the page),
18037 	 * then we use different rounding semantics: specifically,
18038 	 * we assume that (memory_address, size) describes a region
18039 	 * all of whose pages we must cover, rather than a base to be truncated
18040 	 * down and a size to be added to that base.  So we figure out
18041 	 * the highest page that the requested region includes and make
18042 	 * sure that the size will cover it.
18043 	 *
18044 	 * The key example we're worried about it is of the form:
18045 	 *
18046 	 *              memory_address = 0x1ff0, size = 0x20
18047 	 *
18048 	 * With the old semantics, we round down the memory_address to 0x1000
18049 	 * and round up the size to 0x1000, resulting in our covering *only*
18050 	 * page 0x1000.  With the new semantics, we'd realize that the region covers
18051 	 * 0x1ff0-0x2010, and compute a size of 0x2000.  Thus, we cover both page
18052 	 * 0x1000 and page 0x2000 in the region we remap.
18053 	 */
18054 	if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
18055 		vm_map_offset_t range_start, range_end;
18056 
18057 		range_start = vm_map_trunc_page(memory_address, src_page_mask);
18058 		range_end = vm_map_round_page(memory_address + size, src_page_mask);
18059 		memory_address = range_start;
18060 		size = range_end - range_start;
18061 		offset_in_mapping = initial_memory_address - memory_address;
18062 	} else {
18063 		/*
18064 		 * IMPORTANT:
18065 		 * This legacy code path is broken: for the range mentioned
18066 		 * above [ memory_address = 0x1ff0,size = 0x20 ], which spans
18067 		 * two 4k pages, it yields [ memory_address = 0x1000,
18068 		 * size = 0x1000 ], which covers only the first 4k page.
18069 		 * BUT some code unfortunately depends on this bug, so we
18070 		 * can't fix it without breaking something.
18071 		 * New code should get automatically opted in the new
18072 		 * behavior with the new VM_FLAGS_RETURN_DATA_ADDR flags.
18073 		 */
18074 		offset_in_mapping = 0;
18075 		memory_address = vm_map_trunc_page(memory_address, src_page_mask);
18076 		size = vm_map_round_page(size, src_page_mask);
18077 		initial_memory_address = memory_address;
18078 		initial_size = size;
18079 	}
18080 
18081 
18082 	if (size == 0) {
18083 		return KERN_INVALID_ARGUMENT;
18084 	}
18085 
18086 	if (flags & VM_FLAGS_RESILIENT_MEDIA) {
18087 		/* must be copy-on-write to be "media resilient" */
18088 		if (!copy) {
18089 			return KERN_INVALID_ARGUMENT;
18090 		}
18091 	}
18092 
18093 	vmk_flags.vmkf_copy_pageable = target_map->hdr.entries_pageable;
18094 	vmk_flags.vmkf_copy_same_map = (src_map == target_map);
18095 
18096 	assert(size != 0);
18097 	result = vm_map_copy_extract(src_map,
18098 	    memory_address,
18099 	    size,
18100 	    copy, &copy_map,
18101 	    cur_protection, /* IN/OUT */
18102 	    max_protection, /* IN/OUT */
18103 	    inheritance,
18104 	    vmk_flags);
18105 	if (result != KERN_SUCCESS) {
18106 		return result;
18107 	}
18108 	assert(copy_map != VM_MAP_COPY_NULL);
18109 
18110 	overmap_start = 0;
18111 	overmap_end = 0;
18112 	trimmed_start = 0;
18113 	target_size = size;
18114 	if (src_page_mask != target_page_mask) {
18115 		vm_map_copy_t target_copy_map;
18116 
18117 		target_copy_map = copy_map; /* can modify "copy_map" itself */
18118 		DEBUG4K_ADJUST("adjusting...\n");
18119 		result = vm_map_copy_adjust_to_target(
18120 			copy_map,
18121 			offset_in_mapping, /* offset */
18122 			initial_size,
18123 			target_map,
18124 			copy,
18125 			&target_copy_map,
18126 			&overmap_start,
18127 			&overmap_end,
18128 			&trimmed_start);
18129 		if (result != KERN_SUCCESS) {
18130 			DEBUG4K_COPY("failed to adjust 0x%x\n", result);
18131 			vm_map_copy_discard(copy_map);
18132 			return result;
18133 		}
18134 		if (trimmed_start == 0) {
18135 			/* nothing trimmed: no adjustment needed */
18136 		} else if (trimmed_start >= offset_in_mapping) {
18137 			/* trimmed more than offset_in_mapping: nothing left */
18138 			assert(overmap_start == 0);
18139 			assert(overmap_end == 0);
18140 			offset_in_mapping = 0;
18141 		} else {
18142 			/* trimmed some of offset_in_mapping: adjust */
18143 			assert(overmap_start == 0);
18144 			assert(overmap_end == 0);
18145 			offset_in_mapping -= trimmed_start;
18146 		}
18147 		offset_in_mapping += overmap_start;
18148 		target_size = target_copy_map->size;
18149 	}
18150 
18151 	/*
18152 	 * Allocate/check a range of free virtual address
18153 	 * space for the target
18154 	 */
18155 	*address = vm_map_trunc_page(*address, target_page_mask);
18156 	vm_map_lock(target_map);
18157 	target_size = vm_map_round_page(target_size, target_page_mask);
18158 	result = vm_map_remap_range_allocate(target_map, address,
18159 	    target_size,
18160 	    mask, flags, vmk_flags, tag,
18161 	    &insp_entry);
18162 
18163 	for (entry = vm_map_copy_first_entry(copy_map);
18164 	    entry != vm_map_copy_to_entry(copy_map);
18165 	    entry = new_entry) {
18166 		new_entry = entry->vme_next;
18167 		vm_map_copy_entry_unlink(copy_map, entry);
18168 		if (result == KERN_SUCCESS) {
18169 			if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
18170 				/* no codesigning -> read-only access */
18171 				entry->max_protection = VM_PROT_READ;
18172 				entry->protection = VM_PROT_READ;
18173 				entry->vme_resilient_codesign = TRUE;
18174 			}
18175 			entry->vme_start += *address;
18176 			entry->vme_end += *address;
18177 			assert(!entry->map_aligned);
18178 			if ((flags & VM_FLAGS_RESILIENT_MEDIA) &&
18179 			    !entry->is_sub_map &&
18180 			    (VME_OBJECT(entry) == VM_OBJECT_NULL ||
18181 			    VME_OBJECT(entry)->internal)) {
18182 				entry->vme_resilient_media = TRUE;
18183 			}
18184 			assert(VM_MAP_PAGE_ALIGNED(entry->vme_start, MIN(target_page_mask, PAGE_MASK)));
18185 			assert(VM_MAP_PAGE_ALIGNED(entry->vme_end, MIN(target_page_mask, PAGE_MASK)));
18186 			assert(VM_MAP_PAGE_ALIGNED(VME_OFFSET(entry), MIN(target_page_mask, PAGE_MASK)));
18187 			vm_map_store_entry_link(target_map, insp_entry, entry,
18188 			    vmk_flags);
18189 			insp_entry = entry;
18190 		} else {
18191 			if (!entry->is_sub_map) {
18192 				vm_object_deallocate(VME_OBJECT(entry));
18193 			} else {
18194 				vm_map_deallocate(VME_SUBMAP(entry));
18195 			}
18196 			vm_map_copy_entry_dispose(copy_map, entry);
18197 		}
18198 	}
18199 
18200 	if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
18201 		*cur_protection = VM_PROT_READ;
18202 		*max_protection = VM_PROT_READ;
18203 	}
18204 
18205 	if (target_map->disable_vmentry_reuse == TRUE) {
18206 		assert(!target_map->is_nested_map);
18207 		if (target_map->highest_entry_end < insp_entry->vme_end) {
18208 			target_map->highest_entry_end = insp_entry->vme_end;
18209 		}
18210 	}
18211 
18212 	if (result == KERN_SUCCESS) {
18213 		target_map->size += target_size;
18214 		SAVE_HINT_MAP_WRITE(target_map, insp_entry);
18215 
18216 	}
18217 	vm_map_unlock(target_map);
18218 
18219 	if (result == KERN_SUCCESS && target_map->wiring_required) {
18220 		result = vm_map_wire_kernel(target_map, *address,
18221 		    *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
18222 		    TRUE);
18223 	}
18224 
18225 	/*
18226 	 * If requested, return the address of the data pointed to by the
18227 	 * request, rather than the base of the resulting page.
18228 	 */
18229 	if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
18230 		*address += offset_in_mapping;
18231 	}
18232 
18233 	if (src_page_mask != target_page_mask) {
18234 		DEBUG4K_SHARE("vm_remap(%p 0x%llx 0x%llx copy=%d-> %p 0x%llx 0x%llx  result=0x%x\n", src_map, (uint64_t)memory_address, (uint64_t)size, copy, target_map, (uint64_t)*address, (uint64_t)offset_in_mapping, result);
18235 	}
18236 	vm_map_copy_discard(copy_map);
18237 	copy_map = VM_MAP_COPY_NULL;
18238 
18239 	return result;
18240 }
18241 
18242 /*
18243  *	Routine:	vm_map_remap_range_allocate
18244  *
18245  *	Description:
18246  *		Allocate a range in the specified virtual address map.
18247  *		returns the address and the map entry just before the allocated
18248  *		range
18249  *
18250  *	Map must be locked.
18251  */
18252 
18253 static kern_return_t
vm_map_remap_range_allocate(vm_map_t map,vm_map_address_t * address,vm_map_size_t size,vm_map_offset_t mask,int flags,vm_map_kernel_flags_t vmk_flags,__unused vm_tag_t tag,vm_map_entry_t * map_entry)18254 vm_map_remap_range_allocate(
18255 	vm_map_t                map,
18256 	vm_map_address_t        *address,       /* IN/OUT */
18257 	vm_map_size_t           size,
18258 	vm_map_offset_t         mask,
18259 	int                     flags,
18260 	vm_map_kernel_flags_t   vmk_flags,
18261 	__unused vm_tag_t       tag,
18262 	vm_map_entry_t          *map_entry)     /* OUT */
18263 {
18264 	vm_map_entry_t  entry;
18265 	vm_map_offset_t start;
18266 	vm_map_offset_t end;
18267 	vm_map_offset_t desired_empty_end;
18268 	kern_return_t   kr;
18269 	vm_map_entry_t          hole_entry;
18270 
18271 StartAgain:;
18272 
18273 	start = *address;
18274 
18275 	if (flags & VM_FLAGS_ANYWHERE) {
18276 		if (flags & VM_FLAGS_RANDOM_ADDR) {
18277 			/*
18278 			 * Get a random start address.
18279 			 */
18280 			kr = vm_map_random_address_for_size(map, address, size);
18281 			if (kr != KERN_SUCCESS) {
18282 				return kr;
18283 			}
18284 			start = *address;
18285 		}
18286 
18287 		/*
18288 		 *	Calculate the first possible address.
18289 		 */
18290 
18291 		if (start < map->min_offset) {
18292 			start = map->min_offset;
18293 		}
18294 		if (start > map->max_offset) {
18295 			return KERN_NO_SPACE;
18296 		}
18297 
18298 		/*
18299 		 *	Look for the first possible address;
18300 		 *	if there's already something at this
18301 		 *	address, we have to start after it.
18302 		 */
18303 
18304 		if (map->disable_vmentry_reuse == TRUE) {
18305 			VM_MAP_HIGHEST_ENTRY(map, entry, start);
18306 		} else {
18307 			if (map->holelistenabled) {
18308 				hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
18309 
18310 				if (hole_entry == NULL) {
18311 					/*
18312 					 * No more space in the map?
18313 					 */
18314 					return KERN_NO_SPACE;
18315 				} else {
18316 					boolean_t found_hole = FALSE;
18317 
18318 					do {
18319 						if (hole_entry->vme_start >= start) {
18320 							start = hole_entry->vme_start;
18321 							found_hole = TRUE;
18322 							break;
18323 						}
18324 
18325 						if (hole_entry->vme_end > start) {
18326 							found_hole = TRUE;
18327 							break;
18328 						}
18329 						hole_entry = hole_entry->vme_next;
18330 					} while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
18331 
18332 					if (found_hole == FALSE) {
18333 						return KERN_NO_SPACE;
18334 					}
18335 
18336 					entry = hole_entry;
18337 				}
18338 			} else {
18339 				assert(first_free_is_valid(map));
18340 				if (start == map->min_offset) {
18341 					if ((entry = map->first_free) != vm_map_to_entry(map)) {
18342 						start = entry->vme_end;
18343 					}
18344 				} else {
18345 					vm_map_entry_t  tmp_entry;
18346 					if (vm_map_lookup_entry(map, start, &tmp_entry)) {
18347 						start = tmp_entry->vme_end;
18348 					}
18349 					entry = tmp_entry;
18350 				}
18351 			}
18352 			start = vm_map_round_page(start,
18353 			    VM_MAP_PAGE_MASK(map));
18354 		}
18355 
18356 		/*
18357 		 *	In any case, the "entry" always precedes
18358 		 *	the proposed new region throughout the
18359 		 *	loop:
18360 		 */
18361 
18362 		while (TRUE) {
18363 			vm_map_entry_t  next;
18364 
18365 			/*
18366 			 *	Find the end of the proposed new region.
18367 			 *	Be sure we didn't go beyond the end, or
18368 			 *	wrap around the address.
18369 			 */
18370 
18371 			end = ((start + mask) & ~mask);
18372 			end = vm_map_round_page(end,
18373 			    VM_MAP_PAGE_MASK(map));
18374 			if (end < start) {
18375 				return KERN_NO_SPACE;
18376 			}
18377 			start = end;
18378 			end += size;
18379 
18380 			/* We want an entire page of empty space, but don't increase the allocation size. */
18381 			desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
18382 
18383 			if ((desired_empty_end > map->max_offset) || (desired_empty_end < start)) {
18384 				if (map->wait_for_space) {
18385 					if (size <= (map->max_offset -
18386 					    map->min_offset)) {
18387 						assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
18388 						vm_map_unlock(map);
18389 						thread_block(THREAD_CONTINUE_NULL);
18390 						vm_map_lock(map);
18391 						goto StartAgain;
18392 					}
18393 				}
18394 
18395 				return KERN_NO_SPACE;
18396 			}
18397 
18398 			next = entry->vme_next;
18399 
18400 			if (map->holelistenabled) {
18401 				if (entry->vme_end >= desired_empty_end) {
18402 					break;
18403 				}
18404 			} else {
18405 				/*
18406 				 *	If there are no more entries, we must win.
18407 				 *
18408 				 *	OR
18409 				 *
18410 				 *	If there is another entry, it must be
18411 				 *	after the end of the potential new region.
18412 				 */
18413 
18414 				if (next == vm_map_to_entry(map)) {
18415 					break;
18416 				}
18417 
18418 				if (next->vme_start >= desired_empty_end) {
18419 					break;
18420 				}
18421 			}
18422 
18423 			/*
18424 			 *	Didn't fit -- move to the next entry.
18425 			 */
18426 
18427 			entry = next;
18428 
18429 			if (map->holelistenabled) {
18430 				if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
18431 					/*
18432 					 * Wrapped around
18433 					 */
18434 					return KERN_NO_SPACE;
18435 				}
18436 				start = entry->vme_start;
18437 			} else {
18438 				start = entry->vme_end;
18439 			}
18440 		}
18441 
18442 		if (map->holelistenabled) {
18443 			if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
18444 				panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.", entry, (unsigned long long)entry->vme_start);
18445 			}
18446 		}
18447 
18448 		*address = start;
18449 	} else {
18450 		vm_map_entry_t          temp_entry;
18451 
18452 		/*
18453 		 *	Verify that:
18454 		 *		the address doesn't itself violate
18455 		 *		the mask requirement.
18456 		 */
18457 
18458 		if ((start & mask) != 0) {
18459 			return KERN_NO_SPACE;
18460 		}
18461 
18462 
18463 		/*
18464 		 *	...	the address is within bounds
18465 		 */
18466 
18467 		end = start + size;
18468 
18469 		if ((start < map->min_offset) ||
18470 		    (end > map->max_offset) ||
18471 		    (start >= end)) {
18472 			return KERN_INVALID_ADDRESS;
18473 		}
18474 
18475 		/*
18476 		 * If we're asked to overwrite whatever was mapped in that
18477 		 * range, first deallocate that range.
18478 		 */
18479 		if (flags & VM_FLAGS_OVERWRITE) {
18480 			vm_map_t zap_map;
18481 			int remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES | VM_MAP_REMOVE_NO_MAP_ALIGN;
18482 
18483 			/*
18484 			 * We use a "zap_map" to avoid having to unlock
18485 			 * the "map" in vm_map_delete(), which would compromise
18486 			 * the atomicity of the "deallocate" and then "remap"
18487 			 * combination.
18488 			 */
18489 			zap_map = vm_map_create(PMAP_NULL,
18490 			    start,
18491 			    end,
18492 			    map->hdr.entries_pageable);
18493 			if (zap_map == VM_MAP_NULL) {
18494 				return KERN_RESOURCE_SHORTAGE;
18495 			}
18496 			vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
18497 			vm_map_disable_hole_optimization(zap_map);
18498 
18499 			if (vmk_flags.vmkf_overwrite_immutable) {
18500 				remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
18501 			}
18502 			kr = vm_map_delete(map, start, end,
18503 			    remove_flags,
18504 			    zap_map);
18505 			if (kr == KERN_SUCCESS) {
18506 				vm_map_destroy(zap_map,
18507 				    VM_MAP_REMOVE_NO_PMAP_CLEANUP);
18508 				zap_map = VM_MAP_NULL;
18509 			}
18510 		}
18511 
18512 		/*
18513 		 *	...	the starting address isn't allocated
18514 		 */
18515 
18516 		if (vm_map_lookup_entry(map, start, &temp_entry)) {
18517 			return KERN_NO_SPACE;
18518 		}
18519 
18520 		entry = temp_entry;
18521 
18522 		/*
18523 		 *	...	the next region doesn't overlap the
18524 		 *		end point.
18525 		 */
18526 
18527 		if ((entry->vme_next != vm_map_to_entry(map)) &&
18528 		    (entry->vme_next->vme_start < end)) {
18529 			return KERN_NO_SPACE;
18530 		}
18531 	}
18532 	*map_entry = entry;
18533 	return KERN_SUCCESS;
18534 }
18535 
18536 /*
18537  *	vm_map_switch:
18538  *
18539  *	Set the address map for the current thread to the specified map
18540  */
18541 
18542 vm_map_t
vm_map_switch(vm_map_t map)18543 vm_map_switch(
18544 	vm_map_t        map)
18545 {
18546 	int             mycpu;
18547 	thread_t        thread = current_thread();
18548 	vm_map_t        oldmap = thread->map;
18549 
18550 	mp_disable_preemption();
18551 	mycpu = cpu_number();
18552 
18553 	/*
18554 	 *	Deactivate the current map and activate the requested map
18555 	 */
18556 	PMAP_SWITCH_USER(thread, map, mycpu);
18557 
18558 	mp_enable_preemption();
18559 	return oldmap;
18560 }
18561 
18562 
18563 /*
18564  *	Routine:	vm_map_write_user
18565  *
18566  *	Description:
18567  *		Copy out data from a kernel space into space in the
18568  *		destination map. The space must already exist in the
18569  *		destination map.
18570  *		NOTE:  This routine should only be called by threads
18571  *		which can block on a page fault. i.e. kernel mode user
18572  *		threads.
18573  *
18574  */
18575 kern_return_t
vm_map_write_user(vm_map_t map,void * src_p,vm_map_address_t dst_addr,vm_size_t size)18576 vm_map_write_user(
18577 	vm_map_t                map,
18578 	void                    *src_p,
18579 	vm_map_address_t        dst_addr,
18580 	vm_size_t               size)
18581 {
18582 	kern_return_t   kr = KERN_SUCCESS;
18583 
18584 	if (current_map() == map) {
18585 		if (copyout(src_p, dst_addr, size)) {
18586 			kr = KERN_INVALID_ADDRESS;
18587 		}
18588 	} else {
18589 		vm_map_t        oldmap;
18590 
18591 		/* take on the identity of the target map while doing */
18592 		/* the transfer */
18593 
18594 		vm_map_reference(map);
18595 		oldmap = vm_map_switch(map);
18596 		if (copyout(src_p, dst_addr, size)) {
18597 			kr = KERN_INVALID_ADDRESS;
18598 		}
18599 		vm_map_switch(oldmap);
18600 		vm_map_deallocate(map);
18601 	}
18602 	return kr;
18603 }
18604 
18605 /*
18606  *	Routine:	vm_map_read_user
18607  *
18608  *	Description:
18609  *		Copy in data from a user space source map into the
18610  *		kernel map. The space must already exist in the
18611  *		kernel map.
18612  *		NOTE:  This routine should only be called by threads
18613  *		which can block on a page fault. i.e. kernel mode user
18614  *		threads.
18615  *
18616  */
18617 kern_return_t
vm_map_read_user(vm_map_t map,vm_map_address_t src_addr,void * dst_p,vm_size_t size)18618 vm_map_read_user(
18619 	vm_map_t                map,
18620 	vm_map_address_t        src_addr,
18621 	void                    *dst_p,
18622 	vm_size_t               size)
18623 {
18624 	kern_return_t   kr = KERN_SUCCESS;
18625 
18626 	if (current_map() == map) {
18627 		if (copyin(src_addr, dst_p, size)) {
18628 			kr = KERN_INVALID_ADDRESS;
18629 		}
18630 	} else {
18631 		vm_map_t        oldmap;
18632 
18633 		/* take on the identity of the target map while doing */
18634 		/* the transfer */
18635 
18636 		vm_map_reference(map);
18637 		oldmap = vm_map_switch(map);
18638 		if (copyin(src_addr, dst_p, size)) {
18639 			kr = KERN_INVALID_ADDRESS;
18640 		}
18641 		vm_map_switch(oldmap);
18642 		vm_map_deallocate(map);
18643 	}
18644 	return kr;
18645 }
18646 
18647 
18648 /*
18649  *	vm_map_check_protection:
18650  *
18651  *	Assert that the target map allows the specified
18652  *	privilege on the entire address region given.
18653  *	The entire region must be allocated.
18654  */
18655 boolean_t
vm_map_check_protection(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end,vm_prot_t protection)18656 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
18657     vm_map_offset_t end, vm_prot_t protection)
18658 {
18659 	vm_map_entry_t entry;
18660 	vm_map_entry_t tmp_entry;
18661 
18662 	vm_map_lock(map);
18663 
18664 	if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
18665 		vm_map_unlock(map);
18666 		return FALSE;
18667 	}
18668 
18669 	if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
18670 		vm_map_unlock(map);
18671 		return FALSE;
18672 	}
18673 
18674 	entry = tmp_entry;
18675 
18676 	while (start < end) {
18677 		if (entry == vm_map_to_entry(map)) {
18678 			vm_map_unlock(map);
18679 			return FALSE;
18680 		}
18681 
18682 		/*
18683 		 *	No holes allowed!
18684 		 */
18685 
18686 		if (start < entry->vme_start) {
18687 			vm_map_unlock(map);
18688 			return FALSE;
18689 		}
18690 
18691 		/*
18692 		 * Check protection associated with entry.
18693 		 */
18694 
18695 		if ((entry->protection & protection) != protection) {
18696 			vm_map_unlock(map);
18697 			return FALSE;
18698 		}
18699 
18700 		/* go to next entry */
18701 
18702 		start = entry->vme_end;
18703 		entry = entry->vme_next;
18704 	}
18705 	vm_map_unlock(map);
18706 	return TRUE;
18707 }
18708 
18709 kern_return_t
vm_map_purgable_control(vm_map_t map,vm_map_offset_t address,vm_purgable_t control,int * state)18710 vm_map_purgable_control(
18711 	vm_map_t                map,
18712 	vm_map_offset_t         address,
18713 	vm_purgable_t           control,
18714 	int                     *state)
18715 {
18716 	vm_map_entry_t          entry;
18717 	vm_object_t             object;
18718 	kern_return_t           kr;
18719 	boolean_t               was_nonvolatile;
18720 
18721 	/*
18722 	 * Vet all the input parameters and current type and state of the
18723 	 * underlaying object.  Return with an error if anything is amiss.
18724 	 */
18725 	if (map == VM_MAP_NULL) {
18726 		return KERN_INVALID_ARGUMENT;
18727 	}
18728 
18729 	if (control != VM_PURGABLE_SET_STATE &&
18730 	    control != VM_PURGABLE_GET_STATE &&
18731 	    control != VM_PURGABLE_PURGE_ALL &&
18732 	    control != VM_PURGABLE_SET_STATE_FROM_KERNEL) {
18733 		return KERN_INVALID_ARGUMENT;
18734 	}
18735 
18736 	if (control == VM_PURGABLE_PURGE_ALL) {
18737 		vm_purgeable_object_purge_all();
18738 		return KERN_SUCCESS;
18739 	}
18740 
18741 	if ((control == VM_PURGABLE_SET_STATE ||
18742 	    control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
18743 	    (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
18744 	    ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) {
18745 		return KERN_INVALID_ARGUMENT;
18746 	}
18747 
18748 	vm_map_lock_read(map);
18749 
18750 	if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
18751 		/*
18752 		 * Must pass a valid non-submap address.
18753 		 */
18754 		vm_map_unlock_read(map);
18755 		return KERN_INVALID_ADDRESS;
18756 	}
18757 
18758 	if ((entry->protection & VM_PROT_WRITE) == 0 &&
18759 	    control != VM_PURGABLE_GET_STATE) {
18760 		/*
18761 		 * Can't apply purgable controls to something you can't write.
18762 		 */
18763 		vm_map_unlock_read(map);
18764 		return KERN_PROTECTION_FAILURE;
18765 	}
18766 
18767 	object = VME_OBJECT(entry);
18768 	if (object == VM_OBJECT_NULL ||
18769 	    object->purgable == VM_PURGABLE_DENY) {
18770 		/*
18771 		 * Object must already be present and be purgeable.
18772 		 */
18773 		vm_map_unlock_read(map);
18774 		return KERN_INVALID_ARGUMENT;
18775 	}
18776 
18777 	vm_object_lock(object);
18778 
18779 #if 00
18780 	if (VME_OFFSET(entry) != 0 ||
18781 	    entry->vme_end - entry->vme_start != object->vo_size) {
18782 		/*
18783 		 * Can only apply purgable controls to the whole (existing)
18784 		 * object at once.
18785 		 */
18786 		vm_map_unlock_read(map);
18787 		vm_object_unlock(object);
18788 		return KERN_INVALID_ARGUMENT;
18789 	}
18790 #endif
18791 
18792 	assert(!entry->is_sub_map);
18793 	assert(!entry->use_pmap); /* purgeable has its own accounting */
18794 
18795 	vm_map_unlock_read(map);
18796 
18797 	was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
18798 
18799 	kr = vm_object_purgable_control(object, control, state);
18800 
18801 	if (was_nonvolatile &&
18802 	    object->purgable != VM_PURGABLE_NONVOLATILE &&
18803 	    map->pmap == kernel_pmap) {
18804 #if DEBUG
18805 		object->vo_purgeable_volatilizer = kernel_task;
18806 #endif /* DEBUG */
18807 	}
18808 
18809 	vm_object_unlock(object);
18810 
18811 	return kr;
18812 }
18813 
18814 void
vm_map_footprint_query_page_info(vm_map_t map,vm_map_entry_t map_entry,vm_map_offset_t curr_s_offset,int * disposition_p)18815 vm_map_footprint_query_page_info(
18816 	vm_map_t        map,
18817 	vm_map_entry_t  map_entry,
18818 	vm_map_offset_t curr_s_offset,
18819 	int             *disposition_p)
18820 {
18821 	int             pmap_disp;
18822 	vm_object_t     object;
18823 	int             disposition;
18824 	int             effective_page_size;
18825 
18826 	vm_map_lock_assert_held(map);
18827 	assert(!map->has_corpse_footprint);
18828 	assert(curr_s_offset >= map_entry->vme_start);
18829 	assert(curr_s_offset < map_entry->vme_end);
18830 
18831 	object = VME_OBJECT(map_entry);
18832 	if (object == VM_OBJECT_NULL) {
18833 		*disposition_p = 0;
18834 		return;
18835 	}
18836 
18837 	effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(map));
18838 
18839 	pmap_disp = 0;
18840 	if (object == VM_OBJECT_NULL) {
18841 		/* nothing mapped here: no need to ask */
18842 		*disposition_p = 0;
18843 		return;
18844 	} else if (map_entry->is_sub_map &&
18845 	    !map_entry->use_pmap) {
18846 		/* nested pmap: no footprint */
18847 		*disposition_p = 0;
18848 		return;
18849 	}
18850 
18851 	/*
18852 	 * Query the pmap.
18853 	 */
18854 	pmap_query_page_info(map->pmap, curr_s_offset, &pmap_disp);
18855 
18856 	/*
18857 	 * Compute this page's disposition.
18858 	 */
18859 	disposition = 0;
18860 
18861 	/* deal with "alternate accounting" first */
18862 	if (!map_entry->is_sub_map &&
18863 	    object->vo_no_footprint) {
18864 		/* does not count in footprint */
18865 		assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18866 	} else if (!map_entry->is_sub_map &&
18867 	    (object->purgable == VM_PURGABLE_NONVOLATILE ||
18868 	    (object->purgable == VM_PURGABLE_DENY &&
18869 	    object->vo_ledger_tag)) &&
18870 	    VM_OBJECT_OWNER(object) != NULL &&
18871 	    VM_OBJECT_OWNER(object)->map == map) {
18872 		assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18873 		if ((((curr_s_offset
18874 		    - map_entry->vme_start
18875 		    + VME_OFFSET(map_entry))
18876 		    / effective_page_size) <
18877 		    (object->resident_page_count +
18878 		    vm_compressor_pager_get_count(object->pager)))) {
18879 			/*
18880 			 * Non-volatile purgeable object owned
18881 			 * by this task: report the first
18882 			 * "#resident + #compressed" pages as
18883 			 * "resident" (to show that they
18884 			 * contribute to the footprint) but not
18885 			 * "dirty" (to avoid double-counting
18886 			 * with the fake "non-volatile" region
18887 			 * we'll report at the end of the
18888 			 * address space to account for all
18889 			 * (mapped or not) non-volatile memory
18890 			 * owned by this task.
18891 			 */
18892 			disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18893 		}
18894 	} else if (!map_entry->is_sub_map &&
18895 	    (object->purgable == VM_PURGABLE_VOLATILE ||
18896 	    object->purgable == VM_PURGABLE_EMPTY) &&
18897 	    VM_OBJECT_OWNER(object) != NULL &&
18898 	    VM_OBJECT_OWNER(object)->map == map) {
18899 		assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18900 		if ((((curr_s_offset
18901 		    - map_entry->vme_start
18902 		    + VME_OFFSET(map_entry))
18903 		    / effective_page_size) <
18904 		    object->wired_page_count)) {
18905 			/*
18906 			 * Volatile|empty purgeable object owned
18907 			 * by this task: report the first
18908 			 * "#wired" pages as "resident" (to
18909 			 * show that they contribute to the
18910 			 * footprint) but not "dirty" (to avoid
18911 			 * double-counting with the fake
18912 			 * "non-volatile" region we'll report
18913 			 * at the end of the address space to
18914 			 * account for all (mapped or not)
18915 			 * non-volatile memory owned by this
18916 			 * task.
18917 			 */
18918 			disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18919 		}
18920 	} else if (!map_entry->is_sub_map &&
18921 	    map_entry->iokit_acct &&
18922 	    object->internal &&
18923 	    object->purgable == VM_PURGABLE_DENY) {
18924 		/*
18925 		 * Non-purgeable IOKit memory: phys_footprint
18926 		 * includes the entire virtual mapping.
18927 		 */
18928 		assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18929 		disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18930 		disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
18931 	} else if (pmap_disp & (PMAP_QUERY_PAGE_ALTACCT |
18932 	    PMAP_QUERY_PAGE_COMPRESSED_ALTACCT)) {
18933 		/* alternate accounting */
18934 #if (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG)
18935 		if (map->pmap->footprint_was_suspended) {
18936 			/*
18937 			 * The assertion below can fail if dyld
18938 			 * suspended footprint accounting
18939 			 * while doing some adjustments to
18940 			 * this page;  the mapping would say
18941 			 * "use pmap accounting" but the page
18942 			 * would be marked "alternate
18943 			 * accounting".
18944 			 */
18945 		} else
18946 #endif /* (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG) */
18947 		{
18948 			assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18949 		}
18950 		disposition = 0;
18951 	} else {
18952 		if (pmap_disp & PMAP_QUERY_PAGE_PRESENT) {
18953 			assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18954 			disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18955 			disposition |= VM_PAGE_QUERY_PAGE_REF;
18956 			if (pmap_disp & PMAP_QUERY_PAGE_INTERNAL) {
18957 				disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
18958 			} else {
18959 				disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
18960 			}
18961 			if (pmap_disp & PMAP_QUERY_PAGE_REUSABLE) {
18962 				disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
18963 			}
18964 		} else if (pmap_disp & PMAP_QUERY_PAGE_COMPRESSED) {
18965 			assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18966 			disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
18967 		}
18968 	}
18969 
18970 	*disposition_p = disposition;
18971 }
18972 
18973 kern_return_t
vm_map_page_query_internal(vm_map_t target_map,vm_map_offset_t offset,int * disposition,int * ref_count)18974 vm_map_page_query_internal(
18975 	vm_map_t        target_map,
18976 	vm_map_offset_t offset,
18977 	int             *disposition,
18978 	int             *ref_count)
18979 {
18980 	kern_return_t                   kr;
18981 	vm_page_info_basic_data_t       info;
18982 	mach_msg_type_number_t          count;
18983 
18984 	count = VM_PAGE_INFO_BASIC_COUNT;
18985 	kr = vm_map_page_info(target_map,
18986 	    offset,
18987 	    VM_PAGE_INFO_BASIC,
18988 	    (vm_page_info_t) &info,
18989 	    &count);
18990 	if (kr == KERN_SUCCESS) {
18991 		*disposition = info.disposition;
18992 		*ref_count = info.ref_count;
18993 	} else {
18994 		*disposition = 0;
18995 		*ref_count = 0;
18996 	}
18997 
18998 	return kr;
18999 }
19000 
19001 kern_return_t
vm_map_page_info(vm_map_t map,vm_map_offset_t offset,vm_page_info_flavor_t flavor,vm_page_info_t info,mach_msg_type_number_t * count)19002 vm_map_page_info(
19003 	vm_map_t                map,
19004 	vm_map_offset_t         offset,
19005 	vm_page_info_flavor_t   flavor,
19006 	vm_page_info_t          info,
19007 	mach_msg_type_number_t  *count)
19008 {
19009 	return vm_map_page_range_info_internal(map,
19010 	           offset, /* start of range */
19011 	           (offset + 1), /* this will get rounded in the call to the page boundary */
19012 	           (int)-1, /* effective_page_shift: unspecified */
19013 	           flavor,
19014 	           info,
19015 	           count);
19016 }
19017 
19018 kern_return_t
vm_map_page_range_info_internal(vm_map_t map,vm_map_offset_t start_offset,vm_map_offset_t end_offset,int effective_page_shift,vm_page_info_flavor_t flavor,vm_page_info_t info,mach_msg_type_number_t * count)19019 vm_map_page_range_info_internal(
19020 	vm_map_t                map,
19021 	vm_map_offset_t         start_offset,
19022 	vm_map_offset_t         end_offset,
19023 	int                     effective_page_shift,
19024 	vm_page_info_flavor_t   flavor,
19025 	vm_page_info_t          info,
19026 	mach_msg_type_number_t  *count)
19027 {
19028 	vm_map_entry_t          map_entry = VM_MAP_ENTRY_NULL;
19029 	vm_object_t             object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
19030 	vm_page_t               m = VM_PAGE_NULL;
19031 	kern_return_t           retval = KERN_SUCCESS;
19032 	int                     disposition = 0;
19033 	int                     ref_count = 0;
19034 	int                     depth = 0, info_idx = 0;
19035 	vm_page_info_basic_t    basic_info = 0;
19036 	vm_map_offset_t         offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
19037 	vm_map_offset_t         start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
19038 	boolean_t               do_region_footprint;
19039 	ledger_amount_t         ledger_resident, ledger_compressed;
19040 	int                     effective_page_size;
19041 	vm_map_offset_t         effective_page_mask;
19042 
19043 	switch (flavor) {
19044 	case VM_PAGE_INFO_BASIC:
19045 		if (*count != VM_PAGE_INFO_BASIC_COUNT) {
19046 			/*
19047 			 * The "vm_page_info_basic_data" structure was not
19048 			 * properly padded, so allow the size to be off by
19049 			 * one to maintain backwards binary compatibility...
19050 			 */
19051 			if (*count != VM_PAGE_INFO_BASIC_COUNT - 1) {
19052 				return KERN_INVALID_ARGUMENT;
19053 			}
19054 		}
19055 		break;
19056 	default:
19057 		return KERN_INVALID_ARGUMENT;
19058 	}
19059 
19060 	if (effective_page_shift == -1) {
19061 		effective_page_shift = vm_self_region_page_shift_safely(map);
19062 		if (effective_page_shift == -1) {
19063 			return KERN_INVALID_ARGUMENT;
19064 		}
19065 	}
19066 	effective_page_size = (1 << effective_page_shift);
19067 	effective_page_mask = effective_page_size - 1;
19068 
19069 	do_region_footprint = task_self_region_footprint();
19070 	disposition = 0;
19071 	ref_count = 0;
19072 	depth = 0;
19073 	info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
19074 	retval = KERN_SUCCESS;
19075 
19076 	offset_in_page = start_offset & effective_page_mask;
19077 	start = vm_map_trunc_page(start_offset, effective_page_mask);
19078 	end = vm_map_round_page(end_offset, effective_page_mask);
19079 
19080 	if (end < start) {
19081 		return KERN_INVALID_ARGUMENT;
19082 	}
19083 
19084 	assert((end - start) <= MAX_PAGE_RANGE_QUERY);
19085 
19086 	vm_map_lock_read(map);
19087 
19088 	task_ledgers_footprint(map->pmap->ledger, &ledger_resident, &ledger_compressed);
19089 
19090 	for (curr_s_offset = start; curr_s_offset < end;) {
19091 		/*
19092 		 * New lookup needs reset of these variables.
19093 		 */
19094 		curr_object = object = VM_OBJECT_NULL;
19095 		offset_in_object = 0;
19096 		ref_count = 0;
19097 		depth = 0;
19098 
19099 		if (do_region_footprint &&
19100 		    curr_s_offset >= vm_map_last_entry(map)->vme_end) {
19101 			/*
19102 			 * Request for "footprint" info about a page beyond
19103 			 * the end of address space: this must be for
19104 			 * the fake region vm_map_region_recurse_64()
19105 			 * reported to account for non-volatile purgeable
19106 			 * memory owned by this task.
19107 			 */
19108 			disposition = 0;
19109 
19110 			if (curr_s_offset - vm_map_last_entry(map)->vme_end <=
19111 			    (unsigned) ledger_compressed) {
19112 				/*
19113 				 * We haven't reported all the "non-volatile
19114 				 * compressed" pages yet, so report this fake
19115 				 * page as "compressed".
19116 				 */
19117 				disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
19118 			} else {
19119 				/*
19120 				 * We've reported all the non-volatile
19121 				 * compressed page but not all the non-volatile
19122 				 * pages , so report this fake page as
19123 				 * "resident dirty".
19124 				 */
19125 				disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19126 				disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
19127 				disposition |= VM_PAGE_QUERY_PAGE_REF;
19128 			}
19129 			switch (flavor) {
19130 			case VM_PAGE_INFO_BASIC:
19131 				basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19132 				basic_info->disposition = disposition;
19133 				basic_info->ref_count = 1;
19134 				basic_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
19135 				basic_info->offset = 0;
19136 				basic_info->depth = 0;
19137 
19138 				info_idx++;
19139 				break;
19140 			}
19141 			curr_s_offset += effective_page_size;
19142 			continue;
19143 		}
19144 
19145 		/*
19146 		 * First, find the map entry covering "curr_s_offset", going down
19147 		 * submaps if necessary.
19148 		 */
19149 		if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
19150 			/* no entry -> no object -> no page */
19151 
19152 			if (curr_s_offset < vm_map_min(map)) {
19153 				/*
19154 				 * Illegal address that falls below map min.
19155 				 */
19156 				curr_e_offset = MIN(end, vm_map_min(map));
19157 			} else if (curr_s_offset >= vm_map_max(map)) {
19158 				/*
19159 				 * Illegal address that falls on/after map max.
19160 				 */
19161 				curr_e_offset = end;
19162 			} else if (map_entry == vm_map_to_entry(map)) {
19163 				/*
19164 				 * Hit a hole.
19165 				 */
19166 				if (map_entry->vme_next == vm_map_to_entry(map)) {
19167 					/*
19168 					 * Empty map.
19169 					 */
19170 					curr_e_offset = MIN(map->max_offset, end);
19171 				} else {
19172 					/*
19173 					 * Hole at start of the map.
19174 					 */
19175 					curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
19176 				}
19177 			} else {
19178 				if (map_entry->vme_next == vm_map_to_entry(map)) {
19179 					/*
19180 					 * Hole at the end of the map.
19181 					 */
19182 					curr_e_offset = MIN(map->max_offset, end);
19183 				} else {
19184 					curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
19185 				}
19186 			}
19187 
19188 			assert(curr_e_offset >= curr_s_offset);
19189 
19190 			uint64_t num_pages = (curr_e_offset - curr_s_offset) >> effective_page_shift;
19191 
19192 			void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19193 
19194 			bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
19195 
19196 			curr_s_offset = curr_e_offset;
19197 
19198 			info_idx += num_pages;
19199 
19200 			continue;
19201 		}
19202 
19203 		/* compute offset from this map entry's start */
19204 		offset_in_object = curr_s_offset - map_entry->vme_start;
19205 
19206 		/* compute offset into this map entry's object (or submap) */
19207 		offset_in_object += VME_OFFSET(map_entry);
19208 
19209 		if (map_entry->is_sub_map) {
19210 			vm_map_t sub_map = VM_MAP_NULL;
19211 			vm_page_info_t submap_info = 0;
19212 			vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
19213 
19214 			range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
19215 
19216 			submap_s_offset = offset_in_object;
19217 			submap_e_offset = submap_s_offset + range_len;
19218 
19219 			sub_map = VME_SUBMAP(map_entry);
19220 
19221 			vm_map_reference(sub_map);
19222 			vm_map_unlock_read(map);
19223 
19224 			submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19225 
19226 			assertf(VM_MAP_PAGE_SHIFT(sub_map) >= VM_MAP_PAGE_SHIFT(map),
19227 			    "Submap page size (%d) differs from current map (%d)\n", VM_MAP_PAGE_SIZE(sub_map), VM_MAP_PAGE_SIZE(map));
19228 
19229 			retval = vm_map_page_range_info_internal(sub_map,
19230 			    submap_s_offset,
19231 			    submap_e_offset,
19232 			    effective_page_shift,
19233 			    VM_PAGE_INFO_BASIC,
19234 			    (vm_page_info_t) submap_info,
19235 			    count);
19236 
19237 			assert(retval == KERN_SUCCESS);
19238 
19239 			vm_map_lock_read(map);
19240 			vm_map_deallocate(sub_map);
19241 
19242 			/* Move the "info" index by the number of pages we inspected.*/
19243 			info_idx += range_len >> effective_page_shift;
19244 
19245 			/* Move our current offset by the size of the range we inspected.*/
19246 			curr_s_offset += range_len;
19247 
19248 			continue;
19249 		}
19250 
19251 		object = VME_OBJECT(map_entry);
19252 
19253 		if (object == VM_OBJECT_NULL) {
19254 			/*
19255 			 * We don't have an object here and, hence,
19256 			 * no pages to inspect. We'll fill up the
19257 			 * info structure appropriately.
19258 			 */
19259 
19260 			curr_e_offset = MIN(map_entry->vme_end, end);
19261 
19262 			uint64_t num_pages = (curr_e_offset - curr_s_offset) >> effective_page_shift;
19263 
19264 			void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19265 
19266 			bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
19267 
19268 			curr_s_offset = curr_e_offset;
19269 
19270 			info_idx += num_pages;
19271 
19272 			continue;
19273 		}
19274 
19275 		if (do_region_footprint) {
19276 			disposition = 0;
19277 			if (map->has_corpse_footprint) {
19278 				/*
19279 				 * Query the page info data we saved
19280 				 * while forking the corpse.
19281 				 */
19282 				vm_map_corpse_footprint_query_page_info(
19283 					map,
19284 					curr_s_offset,
19285 					&disposition);
19286 			} else {
19287 				/*
19288 				 * Query the live pmap for footprint info
19289 				 * about this page.
19290 				 */
19291 				vm_map_footprint_query_page_info(
19292 					map,
19293 					map_entry,
19294 					curr_s_offset,
19295 					&disposition);
19296 			}
19297 			switch (flavor) {
19298 			case VM_PAGE_INFO_BASIC:
19299 				basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19300 				basic_info->disposition = disposition;
19301 				basic_info->ref_count = 1;
19302 				basic_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
19303 				basic_info->offset = 0;
19304 				basic_info->depth = 0;
19305 
19306 				info_idx++;
19307 				break;
19308 			}
19309 			curr_s_offset += effective_page_size;
19310 			continue;
19311 		}
19312 
19313 		vm_object_reference(object);
19314 		/*
19315 		 * Shared mode -- so we can allow other readers
19316 		 * to grab the lock too.
19317 		 */
19318 		vm_object_lock_shared(object);
19319 
19320 		curr_e_offset = MIN(map_entry->vme_end, end);
19321 
19322 		vm_map_unlock_read(map);
19323 
19324 		map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
19325 
19326 		curr_object = object;
19327 
19328 		for (; curr_s_offset < curr_e_offset;) {
19329 			if (object == curr_object) {
19330 				ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
19331 			} else {
19332 				ref_count = curr_object->ref_count;
19333 			}
19334 
19335 			curr_offset_in_object = offset_in_object;
19336 
19337 			for (;;) {
19338 				m = vm_page_lookup(curr_object, vm_object_trunc_page(curr_offset_in_object));
19339 
19340 				if (m != VM_PAGE_NULL) {
19341 					disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19342 					break;
19343 				} else {
19344 					if (curr_object->internal &&
19345 					    curr_object->alive &&
19346 					    !curr_object->terminating &&
19347 					    curr_object->pager_ready) {
19348 						if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, vm_object_trunc_page(curr_offset_in_object))
19349 						    == VM_EXTERNAL_STATE_EXISTS) {
19350 							/* the pager has that page */
19351 							disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
19352 							break;
19353 						}
19354 					}
19355 
19356 					/*
19357 					 * Go down the VM object shadow chain until we find the page
19358 					 * we're looking for.
19359 					 */
19360 
19361 					if (curr_object->shadow != VM_OBJECT_NULL) {
19362 						vm_object_t shadow = VM_OBJECT_NULL;
19363 
19364 						curr_offset_in_object += curr_object->vo_shadow_offset;
19365 						shadow = curr_object->shadow;
19366 
19367 						vm_object_lock_shared(shadow);
19368 						vm_object_unlock(curr_object);
19369 
19370 						curr_object = shadow;
19371 						depth++;
19372 						continue;
19373 					} else {
19374 						break;
19375 					}
19376 				}
19377 			}
19378 
19379 			/* The ref_count is not strictly accurate, it measures the number   */
19380 			/* of entities holding a ref on the object, they may not be mapping */
19381 			/* the object or may not be mapping the section holding the         */
19382 			/* target page but its still a ball park number and though an over- */
19383 			/* count, it picks up the copy-on-write cases                       */
19384 
19385 			/* We could also get a picture of page sharing from pmap_attributes */
19386 			/* but this would under count as only faulted-in mappings would     */
19387 			/* show up.							    */
19388 
19389 			if ((curr_object == object) && curr_object->shadow) {
19390 				disposition |= VM_PAGE_QUERY_PAGE_COPIED;
19391 			}
19392 
19393 			if (!curr_object->internal) {
19394 				disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
19395 			}
19396 
19397 			if (m != VM_PAGE_NULL) {
19398 				if (m->vmp_fictitious) {
19399 					disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
19400 				} else {
19401 					if (m->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m))) {
19402 						disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
19403 					}
19404 
19405 					if (m->vmp_reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m))) {
19406 						disposition |= VM_PAGE_QUERY_PAGE_REF;
19407 					}
19408 
19409 					if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
19410 						disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
19411 					}
19412 
19413 					/*
19414 					 * XXX TODO4K:
19415 					 * when this routine deals with 4k
19416 					 * pages, check the appropriate CS bit
19417 					 * here.
19418 					 */
19419 					if (m->vmp_cs_validated) {
19420 						disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
19421 					}
19422 					if (m->vmp_cs_tainted) {
19423 						disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
19424 					}
19425 					if (m->vmp_cs_nx) {
19426 						disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
19427 					}
19428 					if (m->vmp_reusable || curr_object->all_reusable) {
19429 						disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
19430 					}
19431 				}
19432 			}
19433 
19434 			switch (flavor) {
19435 			case VM_PAGE_INFO_BASIC:
19436 				basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19437 				basic_info->disposition = disposition;
19438 				basic_info->ref_count = ref_count;
19439 				basic_info->object_id = (vm_object_id_t) (uintptr_t)
19440 				    VM_KERNEL_ADDRPERM(curr_object);
19441 				basic_info->offset =
19442 				    (memory_object_offset_t) curr_offset_in_object + offset_in_page;
19443 				basic_info->depth = depth;
19444 
19445 				info_idx++;
19446 				break;
19447 			}
19448 
19449 			disposition = 0;
19450 			offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
19451 
19452 			/*
19453 			 * Move to next offset in the range and in our object.
19454 			 */
19455 			curr_s_offset += effective_page_size;
19456 			offset_in_object += effective_page_size;
19457 			curr_offset_in_object = offset_in_object;
19458 
19459 			if (curr_object != object) {
19460 				vm_object_unlock(curr_object);
19461 
19462 				curr_object = object;
19463 
19464 				vm_object_lock_shared(curr_object);
19465 			} else {
19466 				vm_object_lock_yield_shared(curr_object);
19467 			}
19468 		}
19469 
19470 		vm_object_unlock(curr_object);
19471 		vm_object_deallocate(curr_object);
19472 
19473 		vm_map_lock_read(map);
19474 	}
19475 
19476 	vm_map_unlock_read(map);
19477 	return retval;
19478 }
19479 
19480 /*
19481  *	vm_map_msync
19482  *
19483  *	Synchronises the memory range specified with its backing store
19484  *	image by either flushing or cleaning the contents to the appropriate
19485  *	memory manager engaging in a memory object synchronize dialog with
19486  *	the manager.  The client doesn't return until the manager issues
19487  *	m_o_s_completed message.  MIG Magically converts user task parameter
19488  *	to the task's address map.
19489  *
19490  *	interpretation of sync_flags
19491  *	VM_SYNC_INVALIDATE	- discard pages, only return precious
19492  *				  pages to manager.
19493  *
19494  *	VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
19495  *				- discard pages, write dirty or precious
19496  *				  pages back to memory manager.
19497  *
19498  *	VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
19499  *				- write dirty or precious pages back to
19500  *				  the memory manager.
19501  *
19502  *	VM_SYNC_CONTIGUOUS	- does everything normally, but if there
19503  *				  is a hole in the region, and we would
19504  *				  have returned KERN_SUCCESS, return
19505  *				  KERN_INVALID_ADDRESS instead.
19506  *
19507  *	NOTE
19508  *	The memory object attributes have not yet been implemented, this
19509  *	function will have to deal with the invalidate attribute
19510  *
19511  *	RETURNS
19512  *	KERN_INVALID_TASK		Bad task parameter
19513  *	KERN_INVALID_ARGUMENT		both sync and async were specified.
19514  *	KERN_SUCCESS			The usual.
19515  *	KERN_INVALID_ADDRESS		There was a hole in the region.
19516  */
19517 
19518 kern_return_t
vm_map_msync(vm_map_t map,vm_map_address_t address,vm_map_size_t size,vm_sync_t sync_flags)19519 vm_map_msync(
19520 	vm_map_t                map,
19521 	vm_map_address_t        address,
19522 	vm_map_size_t           size,
19523 	vm_sync_t               sync_flags)
19524 {
19525 	vm_map_entry_t          entry;
19526 	vm_map_size_t           amount_left;
19527 	vm_object_offset_t      offset;
19528 	vm_object_offset_t      start_offset, end_offset;
19529 	boolean_t               do_sync_req;
19530 	boolean_t               had_hole = FALSE;
19531 	vm_map_offset_t         pmap_offset;
19532 
19533 	if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
19534 	    (sync_flags & VM_SYNC_SYNCHRONOUS)) {
19535 		return KERN_INVALID_ARGUMENT;
19536 	}
19537 
19538 	if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19539 		DEBUG4K_SHARE("map %p address 0x%llx size 0x%llx flags 0x%x\n", map, (uint64_t)address, (uint64_t)size, sync_flags);
19540 	}
19541 
19542 	/*
19543 	 * align address and size on page boundaries
19544 	 */
19545 	size = (vm_map_round_page(address + size,
19546 	    VM_MAP_PAGE_MASK(map)) -
19547 	    vm_map_trunc_page(address,
19548 	    VM_MAP_PAGE_MASK(map)));
19549 	address = vm_map_trunc_page(address,
19550 	    VM_MAP_PAGE_MASK(map));
19551 
19552 	if (map == VM_MAP_NULL) {
19553 		return KERN_INVALID_TASK;
19554 	}
19555 
19556 	if (size == 0) {
19557 		return KERN_SUCCESS;
19558 	}
19559 
19560 	amount_left = size;
19561 
19562 	while (amount_left > 0) {
19563 		vm_object_size_t        flush_size;
19564 		vm_object_t             object;
19565 
19566 		vm_map_lock(map);
19567 		if (!vm_map_lookup_entry(map,
19568 		    address,
19569 		    &entry)) {
19570 			vm_map_size_t   skip;
19571 
19572 			/*
19573 			 * hole in the address map.
19574 			 */
19575 			had_hole = TRUE;
19576 
19577 			if (sync_flags & VM_SYNC_KILLPAGES) {
19578 				/*
19579 				 * For VM_SYNC_KILLPAGES, there should be
19580 				 * no holes in the range, since we couldn't
19581 				 * prevent someone else from allocating in
19582 				 * that hole and we wouldn't want to "kill"
19583 				 * their pages.
19584 				 */
19585 				vm_map_unlock(map);
19586 				break;
19587 			}
19588 
19589 			/*
19590 			 * Check for empty map.
19591 			 */
19592 			if (entry == vm_map_to_entry(map) &&
19593 			    entry->vme_next == entry) {
19594 				vm_map_unlock(map);
19595 				break;
19596 			}
19597 			/*
19598 			 * Check that we don't wrap and that
19599 			 * we have at least one real map entry.
19600 			 */
19601 			if ((map->hdr.nentries == 0) ||
19602 			    (entry->vme_next->vme_start < address)) {
19603 				vm_map_unlock(map);
19604 				break;
19605 			}
19606 			/*
19607 			 * Move up to the next entry if needed
19608 			 */
19609 			skip = (entry->vme_next->vme_start - address);
19610 			if (skip >= amount_left) {
19611 				amount_left = 0;
19612 			} else {
19613 				amount_left -= skip;
19614 			}
19615 			address = entry->vme_next->vme_start;
19616 			vm_map_unlock(map);
19617 			continue;
19618 		}
19619 
19620 		offset = address - entry->vme_start;
19621 		pmap_offset = address;
19622 
19623 		/*
19624 		 * do we have more to flush than is contained in this
19625 		 * entry ?
19626 		 */
19627 		if (amount_left + entry->vme_start + offset > entry->vme_end) {
19628 			flush_size = entry->vme_end -
19629 			    (entry->vme_start + offset);
19630 		} else {
19631 			flush_size = amount_left;
19632 		}
19633 		amount_left -= flush_size;
19634 		address += flush_size;
19635 
19636 		if (entry->is_sub_map == TRUE) {
19637 			vm_map_t        local_map;
19638 			vm_map_offset_t local_offset;
19639 
19640 			local_map = VME_SUBMAP(entry);
19641 			local_offset = VME_OFFSET(entry);
19642 			vm_map_reference(local_map);
19643 			vm_map_unlock(map);
19644 			if (vm_map_msync(
19645 				    local_map,
19646 				    local_offset,
19647 				    flush_size,
19648 				    sync_flags) == KERN_INVALID_ADDRESS) {
19649 				had_hole = TRUE;
19650 			}
19651 			vm_map_deallocate(local_map);
19652 			continue;
19653 		}
19654 		object = VME_OBJECT(entry);
19655 
19656 		/*
19657 		 * We can't sync this object if the object has not been
19658 		 * created yet
19659 		 */
19660 		if (object == VM_OBJECT_NULL) {
19661 			vm_map_unlock(map);
19662 			continue;
19663 		}
19664 		offset += VME_OFFSET(entry);
19665 
19666 		vm_object_lock(object);
19667 
19668 		if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
19669 			int kill_pages = 0;
19670 			boolean_t reusable_pages = FALSE;
19671 
19672 			if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19673 				/*
19674 				 * This is a destructive operation and so we
19675 				 * err on the side of limiting the range of
19676 				 * the operation.
19677 				 */
19678 				start_offset = vm_object_round_page(offset);
19679 				end_offset = vm_object_trunc_page(offset + flush_size);
19680 
19681 				if (end_offset <= start_offset) {
19682 					vm_object_unlock(object);
19683 					vm_map_unlock(map);
19684 					continue;
19685 				}
19686 
19687 				pmap_offset += start_offset - offset;
19688 			} else {
19689 				start_offset = offset;
19690 				end_offset = offset + flush_size;
19691 			}
19692 
19693 			if (sync_flags & VM_SYNC_KILLPAGES) {
19694 				if (((object->ref_count == 1) ||
19695 				    ((object->copy_strategy !=
19696 				    MEMORY_OBJECT_COPY_SYMMETRIC) &&
19697 				    (object->copy == VM_OBJECT_NULL))) &&
19698 				    (object->shadow == VM_OBJECT_NULL)) {
19699 					if (object->ref_count != 1) {
19700 						vm_page_stats_reusable.free_shared++;
19701 					}
19702 					kill_pages = 1;
19703 				} else {
19704 					kill_pages = -1;
19705 				}
19706 			}
19707 			if (kill_pages != -1) {
19708 				vm_object_deactivate_pages(
19709 					object,
19710 					start_offset,
19711 					(vm_object_size_t) (end_offset - start_offset),
19712 					kill_pages,
19713 					reusable_pages,
19714 					map->pmap,
19715 					pmap_offset);
19716 			}
19717 			vm_object_unlock(object);
19718 			vm_map_unlock(map);
19719 			continue;
19720 		}
19721 		/*
19722 		 * We can't sync this object if there isn't a pager.
19723 		 * Don't bother to sync internal objects, since there can't
19724 		 * be any "permanent" storage for these objects anyway.
19725 		 */
19726 		if ((object->pager == MEMORY_OBJECT_NULL) ||
19727 		    (object->internal) || (object->private)) {
19728 			vm_object_unlock(object);
19729 			vm_map_unlock(map);
19730 			continue;
19731 		}
19732 		/*
19733 		 * keep reference on the object until syncing is done
19734 		 */
19735 		vm_object_reference_locked(object);
19736 		vm_object_unlock(object);
19737 
19738 		vm_map_unlock(map);
19739 
19740 		if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19741 			start_offset = vm_object_trunc_page(offset);
19742 			end_offset = vm_object_round_page(offset + flush_size);
19743 		} else {
19744 			start_offset = offset;
19745 			end_offset = offset + flush_size;
19746 		}
19747 
19748 		do_sync_req = vm_object_sync(object,
19749 		    start_offset,
19750 		    (end_offset - start_offset),
19751 		    sync_flags & VM_SYNC_INVALIDATE,
19752 		    ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
19753 		    (sync_flags & VM_SYNC_ASYNCHRONOUS)),
19754 		    sync_flags & VM_SYNC_SYNCHRONOUS);
19755 
19756 		if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
19757 			/*
19758 			 * clear out the clustering and read-ahead hints
19759 			 */
19760 			vm_object_lock(object);
19761 
19762 			object->pages_created = 0;
19763 			object->pages_used = 0;
19764 			object->sequential = 0;
19765 			object->last_alloc = 0;
19766 
19767 			vm_object_unlock(object);
19768 		}
19769 		vm_object_deallocate(object);
19770 	} /* while */
19771 
19772 	/* for proper msync() behaviour */
19773 	if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS)) {
19774 		return KERN_INVALID_ADDRESS;
19775 	}
19776 
19777 	return KERN_SUCCESS;
19778 }/* vm_msync */
19779 
19780 kern_return_t
vm_named_entry_from_vm_object(vm_named_entry_t named_entry,vm_object_t object,vm_object_offset_t offset,vm_object_size_t size,vm_prot_t prot)19781 vm_named_entry_from_vm_object(
19782 	vm_named_entry_t        named_entry,
19783 	vm_object_t             object,
19784 	vm_object_offset_t      offset,
19785 	vm_object_size_t        size,
19786 	vm_prot_t               prot)
19787 {
19788 	vm_map_copy_t copy;
19789 	vm_map_entry_t copy_entry;
19790 
19791 	assert(!named_entry->is_sub_map);
19792 	assert(!named_entry->is_copy);
19793 	assert(!named_entry->is_object);
19794 	assert(!named_entry->internal);
19795 	assert(named_entry->backing.copy == VM_MAP_COPY_NULL);
19796 
19797 	copy = vm_map_copy_allocate();
19798 	copy->type = VM_MAP_COPY_ENTRY_LIST;
19799 	copy->offset = offset;
19800 	copy->size = size;
19801 	copy->cpy_hdr.page_shift = PAGE_SHIFT;
19802 	vm_map_store_init(&copy->cpy_hdr);
19803 
19804 	copy_entry = vm_map_copy_entry_create(copy, FALSE);
19805 	copy_entry->protection = prot;
19806 	copy_entry->max_protection = prot;
19807 	copy_entry->use_pmap = TRUE;
19808 	copy_entry->vme_start = VM_MAP_TRUNC_PAGE(offset, PAGE_MASK);
19809 	copy_entry->vme_end = VM_MAP_ROUND_PAGE(offset + size, PAGE_MASK);
19810 	VME_OBJECT_SET(copy_entry, object);
19811 	VME_OFFSET_SET(copy_entry, vm_object_trunc_page(offset));
19812 	vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy), copy_entry);
19813 
19814 	named_entry->backing.copy = copy;
19815 	named_entry->is_object = TRUE;
19816 	if (object->internal) {
19817 		named_entry->internal = TRUE;
19818 	}
19819 
19820 	DEBUG4K_MEMENTRY("named_entry %p copy %p object %p offset 0x%llx size 0x%llx prot 0x%x\n", named_entry, copy, object, offset, size, prot);
19821 
19822 	return KERN_SUCCESS;
19823 }
19824 
19825 vm_object_t
vm_named_entry_to_vm_object(vm_named_entry_t named_entry)19826 vm_named_entry_to_vm_object(
19827 	vm_named_entry_t named_entry)
19828 {
19829 	vm_map_copy_t   copy;
19830 	vm_map_entry_t  copy_entry;
19831 	vm_object_t     object;
19832 
19833 	assert(!named_entry->is_sub_map);
19834 	assert(!named_entry->is_copy);
19835 	assert(named_entry->is_object);
19836 	copy = named_entry->backing.copy;
19837 	assert(copy != VM_MAP_COPY_NULL);
19838 	assert(copy->cpy_hdr.nentries == 1);
19839 	copy_entry = vm_map_copy_first_entry(copy);
19840 	assert(!copy_entry->is_sub_map);
19841 	object = VME_OBJECT(copy_entry);
19842 
19843 	DEBUG4K_MEMENTRY("%p -> %p -> %p [0x%llx 0x%llx 0x%llx 0x%x/0x%x ] -> %p offset 0x%llx size 0x%llx prot 0x%x\n", named_entry, copy, copy_entry, (uint64_t)copy_entry->vme_start, (uint64_t)copy_entry->vme_end, copy_entry->vme_offset, copy_entry->protection, copy_entry->max_protection, object, named_entry->offset, named_entry->size, named_entry->protection);
19844 
19845 	return object;
19846 }
19847 
19848 /*
19849  *	Routine:	convert_port_entry_to_map
19850  *	Purpose:
19851  *		Convert from a port specifying an entry or a task
19852  *		to a map. Doesn't consume the port ref; produces a map ref,
19853  *		which may be null.  Unlike convert_port_to_map, the
19854  *		port may be task or a named entry backed.
19855  *	Conditions:
19856  *		Nothing locked.
19857  */
19858 
19859 
19860 vm_map_t
convert_port_entry_to_map(ipc_port_t port)19861 convert_port_entry_to_map(
19862 	ipc_port_t      port)
19863 {
19864 	vm_map_t map = VM_MAP_NULL;
19865 	vm_named_entry_t        named_entry;
19866 	uint32_t        try_failed_count = 0;
19867 
19868 	if (!IP_VALID(port)) {
19869 		return VM_MAP_NULL;
19870 	}
19871 
19872 	if (ip_kotype(port) != IKOT_NAMED_ENTRY) {
19873 		return convert_port_to_map(port);
19874 	}
19875 
19876 	ip_mq_lock(port);
19877 
19878 	while (TRUE) {
19879 		named_entry = mach_memory_entry_from_port(port);
19880 		if (named_entry == NULL) {
19881 			ip_mq_unlock(port);
19882 			return VM_MAP_NULL;
19883 		}
19884 
19885 		if (lck_mtx_try_lock(&(named_entry)->Lock)) {
19886 			break;
19887 		}
19888 
19889 		ip_mq_unlock(port);
19890 
19891 		try_failed_count++;
19892 		mutex_pause(try_failed_count);
19893 		ip_mq_lock(port);
19894 	}
19895 
19896 	named_entry->ref_count++;
19897 	lck_mtx_unlock(&(named_entry)->Lock);
19898 	ip_mq_unlock(port);
19899 	if ((named_entry->is_sub_map) &&
19900 	    (named_entry->protection & VM_PROT_WRITE)) {
19901 		map = named_entry->backing.map;
19902 		if (map->pmap != PMAP_NULL) {
19903 			if (map->pmap == kernel_pmap) {
19904 				panic("userspace has access "
19905 				    "to a kernel map %p", map);
19906 			}
19907 			pmap_require(map->pmap);
19908 		}
19909 		vm_map_reference(map);
19910 	}
19911 	mach_destroy_memory_entry(port);
19912 	return map;
19913 }
19914 
19915 /*
19916  * Export routines to other components for the things we access locally through
19917  * macros.
19918  */
19919 #undef current_map
19920 vm_map_t
current_map(void)19921 current_map(void)
19922 {
19923 	return current_map_fast();
19924 }
19925 
19926 /*
19927  *	vm_map_reference:
19928  *
19929  *	Takes a reference on the specified map.
19930  */
19931 void
vm_map_reference(vm_map_t map)19932 vm_map_reference(
19933 	vm_map_t        map)
19934 {
19935 	if (__probable(map != VM_MAP_NULL)) {
19936 		vm_map_require(map);
19937 		os_ref_retain(&map->map_refcnt);
19938 	}
19939 }
19940 
19941 /*
19942  *	vm_map_deallocate:
19943  *
19944  *	Removes a reference from the specified map,
19945  *	destroying it if no references remain.
19946  *	The map should not be locked.
19947  */
19948 void
vm_map_deallocate(vm_map_t map)19949 vm_map_deallocate(
19950 	vm_map_t        map)
19951 {
19952 	if (__probable(map != VM_MAP_NULL)) {
19953 		vm_map_require(map);
19954 		if (os_ref_release(&map->map_refcnt) == 0) {
19955 			vm_map_destroy(map, VM_MAP_REMOVE_NO_FLAGS);
19956 		}
19957 	}
19958 }
19959 
19960 void
vm_map_inspect_deallocate(vm_map_inspect_t map)19961 vm_map_inspect_deallocate(
19962 	vm_map_inspect_t      map)
19963 {
19964 	vm_map_deallocate((vm_map_t)map);
19965 }
19966 
19967 void
vm_map_read_deallocate(vm_map_read_t map)19968 vm_map_read_deallocate(
19969 	vm_map_read_t      map)
19970 {
19971 	vm_map_deallocate((vm_map_t)map);
19972 }
19973 
19974 
19975 void
vm_map_disable_NX(vm_map_t map)19976 vm_map_disable_NX(vm_map_t map)
19977 {
19978 	if (map == NULL) {
19979 		return;
19980 	}
19981 	if (map->pmap == NULL) {
19982 		return;
19983 	}
19984 
19985 	pmap_disable_NX(map->pmap);
19986 }
19987 
19988 void
vm_map_disallow_data_exec(vm_map_t map)19989 vm_map_disallow_data_exec(vm_map_t map)
19990 {
19991 	if (map == NULL) {
19992 		return;
19993 	}
19994 
19995 	map->map_disallow_data_exec = TRUE;
19996 }
19997 
19998 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
19999  * more descriptive.
20000  */
20001 void
vm_map_set_32bit(vm_map_t map)20002 vm_map_set_32bit(vm_map_t map)
20003 {
20004 #if defined(__arm__) || defined(__arm64__)
20005 	map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
20006 #else
20007 	map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
20008 #endif
20009 }
20010 
20011 
20012 void
vm_map_set_64bit(vm_map_t map)20013 vm_map_set_64bit(vm_map_t map)
20014 {
20015 #if defined(__arm__) || defined(__arm64__)
20016 	map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
20017 #else
20018 	map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
20019 #endif
20020 }
20021 
20022 /*
20023  * Expand the maximum size of an existing map to the maximum supported.
20024  */
20025 void
vm_map_set_jumbo(vm_map_t map)20026 vm_map_set_jumbo(vm_map_t map)
20027 {
20028 #if defined (__arm64__) && !defined(CONFIG_ARROW)
20029 	vm_map_set_max_addr(map, ~0);
20030 #else /* arm64 */
20031 	(void) map;
20032 #endif
20033 }
20034 
20035 /*
20036  * This map has a JIT entitlement
20037  */
20038 void
vm_map_set_jit_entitled(vm_map_t map)20039 vm_map_set_jit_entitled(vm_map_t map)
20040 {
20041 #if defined (__arm64__)
20042 	pmap_set_jit_entitled(map->pmap);
20043 #else /* arm64 */
20044 	(void) map;
20045 #endif
20046 }
20047 
20048 /*
20049  * Expand the maximum size of an existing map.
20050  */
20051 void
vm_map_set_max_addr(vm_map_t map,vm_map_offset_t new_max_offset)20052 vm_map_set_max_addr(vm_map_t map, vm_map_offset_t new_max_offset)
20053 {
20054 #if defined(__arm64__)
20055 	vm_map_offset_t max_supported_offset = 0;
20056 	vm_map_offset_t old_max_offset = map->max_offset;
20057 	max_supported_offset = pmap_max_offset(vm_map_is_64bit(map), ARM_PMAP_MAX_OFFSET_JUMBO);
20058 
20059 	new_max_offset = trunc_page(new_max_offset);
20060 
20061 	/* The address space cannot be shrunk using this routine. */
20062 	if (old_max_offset >= new_max_offset) {
20063 		return;
20064 	}
20065 
20066 	if (max_supported_offset < new_max_offset) {
20067 		new_max_offset = max_supported_offset;
20068 	}
20069 
20070 	map->max_offset = new_max_offset;
20071 
20072 	if (map->holes_list->prev->vme_end == old_max_offset) {
20073 		/*
20074 		 * There is already a hole at the end of the map; simply make it bigger.
20075 		 */
20076 		map->holes_list->prev->vme_end = map->max_offset;
20077 	} else {
20078 		/*
20079 		 * There is no hole at the end, so we need to create a new hole
20080 		 * for the new empty space we're creating.
20081 		 */
20082 		struct vm_map_links *new_hole = zalloc(vm_map_holes_zone);
20083 		new_hole->start = old_max_offset;
20084 		new_hole->end = map->max_offset;
20085 		new_hole->prev = map->holes_list->prev;
20086 		new_hole->next = (struct vm_map_entry *)map->holes_list;
20087 		map->holes_list->prev->links.next = (struct vm_map_entry *)new_hole;
20088 		map->holes_list->prev = (struct vm_map_entry *)new_hole;
20089 	}
20090 #else
20091 	(void)map;
20092 	(void)new_max_offset;
20093 #endif
20094 }
20095 
20096 vm_map_offset_t
vm_compute_max_offset(boolean_t is64)20097 vm_compute_max_offset(boolean_t is64)
20098 {
20099 #if defined(__arm__) || defined(__arm64__)
20100 	return pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE);
20101 #else
20102 	return is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS;
20103 #endif
20104 }
20105 
20106 void
vm_map_get_max_aslr_slide_section(vm_map_t map __unused,int64_t * max_sections,int64_t * section_size)20107 vm_map_get_max_aslr_slide_section(
20108 	vm_map_t                map __unused,
20109 	int64_t                 *max_sections,
20110 	int64_t                 *section_size)
20111 {
20112 #if defined(__arm64__)
20113 	*max_sections = 3;
20114 	*section_size = ARM_TT_TWIG_SIZE;
20115 #else
20116 	*max_sections = 1;
20117 	*section_size = 0;
20118 #endif
20119 }
20120 
20121 uint64_t
vm_map_get_max_aslr_slide_pages(vm_map_t map)20122 vm_map_get_max_aslr_slide_pages(vm_map_t map)
20123 {
20124 #if defined(__arm64__)
20125 	/* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
20126 	 * limited embedded address space; this is also meant to minimize pmap
20127 	 * memory usage on 16KB page systems.
20128 	 */
20129 	return 1 << (24 - VM_MAP_PAGE_SHIFT(map));
20130 #else
20131 	return 1 << (vm_map_is_64bit(map) ? 16 : 8);
20132 #endif
20133 }
20134 
20135 uint64_t
vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)20136 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
20137 {
20138 #if defined(__arm64__)
20139 	/* We limit the loader slide to 4MB, in order to ensure at least 8 bits
20140 	 * of independent entropy on 16KB page systems.
20141 	 */
20142 	return 1 << (22 - VM_MAP_PAGE_SHIFT(map));
20143 #else
20144 	return 1 << (vm_map_is_64bit(map) ? 16 : 8);
20145 #endif
20146 }
20147 
20148 #ifndef __arm__
20149 boolean_t
vm_map_is_64bit(vm_map_t map)20150 vm_map_is_64bit(
20151 	vm_map_t map)
20152 {
20153 	return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
20154 }
20155 #endif
20156 
20157 boolean_t
vm_map_has_hard_pagezero(vm_map_t map,vm_map_offset_t pagezero_size)20158 vm_map_has_hard_pagezero(
20159 	vm_map_t        map,
20160 	vm_map_offset_t pagezero_size)
20161 {
20162 	/*
20163 	 * XXX FBDP
20164 	 * We should lock the VM map (for read) here but we can get away
20165 	 * with it for now because there can't really be any race condition:
20166 	 * the VM map's min_offset is changed only when the VM map is created
20167 	 * and when the zero page is established (when the binary gets loaded),
20168 	 * and this routine gets called only when the task terminates and the
20169 	 * VM map is being torn down, and when a new map is created via
20170 	 * load_machfile()/execve().
20171 	 */
20172 	return map->min_offset >= pagezero_size;
20173 }
20174 
20175 /*
20176  * Raise a VM map's maximun offset.
20177  */
20178 kern_return_t
vm_map_raise_max_offset(vm_map_t map,vm_map_offset_t new_max_offset)20179 vm_map_raise_max_offset(
20180 	vm_map_t        map,
20181 	vm_map_offset_t new_max_offset)
20182 {
20183 	kern_return_t   ret;
20184 
20185 	vm_map_lock(map);
20186 	ret = KERN_INVALID_ADDRESS;
20187 
20188 	if (new_max_offset >= map->max_offset) {
20189 		if (!vm_map_is_64bit(map)) {
20190 			if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
20191 				map->max_offset = new_max_offset;
20192 				ret = KERN_SUCCESS;
20193 			}
20194 		} else {
20195 			if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
20196 				map->max_offset = new_max_offset;
20197 				ret = KERN_SUCCESS;
20198 			}
20199 		}
20200 	}
20201 
20202 	vm_map_unlock(map);
20203 	return ret;
20204 }
20205 
20206 
20207 /*
20208  * Raise a VM map's minimum offset.
20209  * To strictly enforce "page zero" reservation.
20210  */
20211 kern_return_t
vm_map_raise_min_offset(vm_map_t map,vm_map_offset_t new_min_offset)20212 vm_map_raise_min_offset(
20213 	vm_map_t        map,
20214 	vm_map_offset_t new_min_offset)
20215 {
20216 	vm_map_entry_t  first_entry;
20217 
20218 	new_min_offset = vm_map_round_page(new_min_offset,
20219 	    VM_MAP_PAGE_MASK(map));
20220 
20221 	vm_map_lock(map);
20222 
20223 	if (new_min_offset < map->min_offset) {
20224 		/*
20225 		 * Can't move min_offset backwards, as that would expose
20226 		 * a part of the address space that was previously, and for
20227 		 * possibly good reasons, inaccessible.
20228 		 */
20229 		vm_map_unlock(map);
20230 		return KERN_INVALID_ADDRESS;
20231 	}
20232 	if (new_min_offset >= map->max_offset) {
20233 		/* can't go beyond the end of the address space */
20234 		vm_map_unlock(map);
20235 		return KERN_INVALID_ADDRESS;
20236 	}
20237 
20238 	first_entry = vm_map_first_entry(map);
20239 	if (first_entry != vm_map_to_entry(map) &&
20240 	    first_entry->vme_start < new_min_offset) {
20241 		/*
20242 		 * Some memory was already allocated below the new
20243 		 * minimun offset.  It's too late to change it now...
20244 		 */
20245 		vm_map_unlock(map);
20246 		return KERN_NO_SPACE;
20247 	}
20248 
20249 	map->min_offset = new_min_offset;
20250 
20251 	assert(map->holes_list);
20252 	map->holes_list->start = new_min_offset;
20253 	assert(new_min_offset < map->holes_list->end);
20254 
20255 	vm_map_unlock(map);
20256 
20257 	return KERN_SUCCESS;
20258 }
20259 
20260 /*
20261  * Set the limit on the maximum amount of address space and user wired memory allowed for this map.
20262  * This is basically a copy of the RLIMIT_AS and RLIMIT_MEMLOCK rlimit value maintained by the BSD
20263  * side of the kernel. The limits are checked in the mach VM side, so we keep a copy so we don't
20264  * have to reach over to the BSD data structures.
20265  */
20266 
20267 uint64_t vm_map_set_size_limit_count = 0;
20268 kern_return_t
vm_map_set_size_limit(vm_map_t map,uint64_t new_size_limit)20269 vm_map_set_size_limit(vm_map_t map, uint64_t new_size_limit)
20270 {
20271 	kern_return_t kr;
20272 
20273 	vm_map_lock(map);
20274 	if (new_size_limit < map->size) {
20275 		/* new limit should not be lower than its current size */
20276 		DTRACE_VM2(vm_map_set_size_limit_fail,
20277 		    vm_map_size_t, map->size,
20278 		    uint64_t, new_size_limit);
20279 		kr = KERN_FAILURE;
20280 	} else if (new_size_limit == map->size_limit) {
20281 		/* no change */
20282 		kr = KERN_SUCCESS;
20283 	} else {
20284 		/* set new limit */
20285 		DTRACE_VM2(vm_map_set_size_limit,
20286 		    vm_map_size_t, map->size,
20287 		    uint64_t, new_size_limit);
20288 		if (new_size_limit != RLIM_INFINITY) {
20289 			vm_map_set_size_limit_count++;
20290 		}
20291 		map->size_limit = new_size_limit;
20292 		kr = KERN_SUCCESS;
20293 	}
20294 	vm_map_unlock(map);
20295 	return kr;
20296 }
20297 
20298 uint64_t vm_map_set_data_limit_count = 0;
20299 kern_return_t
vm_map_set_data_limit(vm_map_t map,uint64_t new_data_limit)20300 vm_map_set_data_limit(vm_map_t map, uint64_t new_data_limit)
20301 {
20302 	kern_return_t kr;
20303 
20304 	vm_map_lock(map);
20305 	if (new_data_limit < map->size) {
20306 		/* new limit should not be lower than its current size */
20307 		DTRACE_VM2(vm_map_set_data_limit_fail,
20308 		    vm_map_size_t, map->size,
20309 		    uint64_t, new_data_limit);
20310 		kr = KERN_FAILURE;
20311 	} else if (new_data_limit == map->data_limit) {
20312 		/* no change */
20313 		kr = KERN_SUCCESS;
20314 	} else {
20315 		/* set new limit */
20316 		DTRACE_VM2(vm_map_set_data_limit,
20317 		    vm_map_size_t, map->size,
20318 		    uint64_t, new_data_limit);
20319 		if (new_data_limit != RLIM_INFINITY) {
20320 			vm_map_set_data_limit_count++;
20321 		}
20322 		map->data_limit = new_data_limit;
20323 		kr = KERN_SUCCESS;
20324 	}
20325 	vm_map_unlock(map);
20326 	return kr;
20327 }
20328 
20329 void
vm_map_set_user_wire_limit(vm_map_t map,vm_size_t limit)20330 vm_map_set_user_wire_limit(vm_map_t     map,
20331     vm_size_t    limit)
20332 {
20333 	vm_map_lock(map);
20334 	map->user_wire_limit = limit;
20335 	vm_map_unlock(map);
20336 }
20337 
20338 
20339 void
vm_map_switch_protect(vm_map_t map,boolean_t val)20340 vm_map_switch_protect(vm_map_t     map,
20341     boolean_t    val)
20342 {
20343 	vm_map_lock(map);
20344 	map->switch_protect = val;
20345 	vm_map_unlock(map);
20346 }
20347 
20348 extern int cs_process_enforcement_enable;
20349 boolean_t
vm_map_cs_enforcement(vm_map_t map)20350 vm_map_cs_enforcement(
20351 	vm_map_t map)
20352 {
20353 	if (cs_process_enforcement_enable) {
20354 		return TRUE;
20355 	}
20356 	return map->cs_enforcement;
20357 }
20358 
20359 kern_return_t
vm_map_cs_wx_enable(vm_map_t map)20360 vm_map_cs_wx_enable(
20361 	vm_map_t map)
20362 {
20363 	return pmap_cs_allow_invalid(vm_map_pmap(map));
20364 }
20365 
20366 void
vm_map_cs_debugged_set(vm_map_t map,boolean_t val)20367 vm_map_cs_debugged_set(
20368 	vm_map_t map,
20369 	boolean_t val)
20370 {
20371 	vm_map_lock(map);
20372 	map->cs_debugged = val;
20373 	vm_map_unlock(map);
20374 }
20375 
20376 void
vm_map_cs_enforcement_set(vm_map_t map,boolean_t val)20377 vm_map_cs_enforcement_set(
20378 	vm_map_t map,
20379 	boolean_t val)
20380 {
20381 	vm_map_lock(map);
20382 	map->cs_enforcement = val;
20383 	pmap_set_vm_map_cs_enforced(map->pmap, val);
20384 	vm_map_unlock(map);
20385 }
20386 
20387 /*
20388  * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
20389  * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
20390  * bump both counters.
20391  */
20392 void
vm_map_iokit_mapped_region(vm_map_t map,vm_size_t bytes)20393 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
20394 {
20395 	pmap_t pmap = vm_map_pmap(map);
20396 
20397 	ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
20398 	ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
20399 }
20400 
20401 void
vm_map_iokit_unmapped_region(vm_map_t map,vm_size_t bytes)20402 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
20403 {
20404 	pmap_t pmap = vm_map_pmap(map);
20405 
20406 	ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
20407 	ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
20408 }
20409 
20410 /* Add (generate) code signature for memory range */
20411 #if CONFIG_DYNAMIC_CODE_SIGNING
20412 kern_return_t
vm_map_sign(vm_map_t map,vm_map_offset_t start,vm_map_offset_t end)20413 vm_map_sign(vm_map_t map,
20414     vm_map_offset_t start,
20415     vm_map_offset_t end)
20416 {
20417 	vm_map_entry_t entry;
20418 	vm_page_t m;
20419 	vm_object_t object;
20420 
20421 	/*
20422 	 * Vet all the input parameters and current type and state of the
20423 	 * underlaying object.  Return with an error if anything is amiss.
20424 	 */
20425 	if (map == VM_MAP_NULL) {
20426 		return KERN_INVALID_ARGUMENT;
20427 	}
20428 
20429 	vm_map_lock_read(map);
20430 
20431 	if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
20432 		/*
20433 		 * Must pass a valid non-submap address.
20434 		 */
20435 		vm_map_unlock_read(map);
20436 		return KERN_INVALID_ADDRESS;
20437 	}
20438 
20439 	if ((entry->vme_start > start) || (entry->vme_end < end)) {
20440 		/*
20441 		 * Map entry doesn't cover the requested range. Not handling
20442 		 * this situation currently.
20443 		 */
20444 		vm_map_unlock_read(map);
20445 		return KERN_INVALID_ARGUMENT;
20446 	}
20447 
20448 	object = VME_OBJECT(entry);
20449 	if (object == VM_OBJECT_NULL) {
20450 		/*
20451 		 * Object must already be present or we can't sign.
20452 		 */
20453 		vm_map_unlock_read(map);
20454 		return KERN_INVALID_ARGUMENT;
20455 	}
20456 
20457 	vm_object_lock(object);
20458 	vm_map_unlock_read(map);
20459 
20460 	while (start < end) {
20461 		uint32_t refmod;
20462 
20463 		m = vm_page_lookup(object,
20464 		    start - entry->vme_start + VME_OFFSET(entry));
20465 		if (m == VM_PAGE_NULL) {
20466 			/* shoud we try to fault a page here? we can probably
20467 			 * demand it exists and is locked for this request */
20468 			vm_object_unlock(object);
20469 			return KERN_FAILURE;
20470 		}
20471 		/* deal with special page status */
20472 		if (m->vmp_busy ||
20473 		    (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_private || m->vmp_absent))) {
20474 			vm_object_unlock(object);
20475 			return KERN_FAILURE;
20476 		}
20477 
20478 		/* Page is OK... now "validate" it */
20479 		/* This is the place where we'll call out to create a code
20480 		 * directory, later */
20481 		/* XXX TODO4K: deal with 4k subpages individually? */
20482 		m->vmp_cs_validated = VMP_CS_ALL_TRUE;
20483 
20484 		/* The page is now "clean" for codesigning purposes. That means
20485 		 * we don't consider it as modified (wpmapped) anymore. But
20486 		 * we'll disconnect the page so we note any future modification
20487 		 * attempts. */
20488 		m->vmp_wpmapped = FALSE;
20489 		refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
20490 
20491 		/* Pull the dirty status from the pmap, since we cleared the
20492 		 * wpmapped bit */
20493 		if ((refmod & VM_MEM_MODIFIED) && !m->vmp_dirty) {
20494 			SET_PAGE_DIRTY(m, FALSE);
20495 		}
20496 
20497 		/* On to the next page */
20498 		start += PAGE_SIZE;
20499 	}
20500 	vm_object_unlock(object);
20501 
20502 	return KERN_SUCCESS;
20503 }
20504 #endif
20505 
20506 kern_return_t
vm_map_partial_reap(vm_map_t map,unsigned int * reclaimed_resident,unsigned int * reclaimed_compressed)20507 vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
20508 {
20509 	vm_map_entry_t  entry = VM_MAP_ENTRY_NULL;
20510 	vm_map_entry_t next_entry;
20511 	kern_return_t   kr = KERN_SUCCESS;
20512 	vm_map_t        zap_map;
20513 
20514 	vm_map_lock(map);
20515 
20516 	/*
20517 	 * We use a "zap_map" to avoid having to unlock
20518 	 * the "map" in vm_map_delete().
20519 	 */
20520 	zap_map = vm_map_create(PMAP_NULL,
20521 	    map->min_offset,
20522 	    map->max_offset,
20523 	    map->hdr.entries_pageable);
20524 
20525 	if (zap_map == VM_MAP_NULL) {
20526 		return KERN_RESOURCE_SHORTAGE;
20527 	}
20528 
20529 	vm_map_set_page_shift(zap_map,
20530 	    VM_MAP_PAGE_SHIFT(map));
20531 	vm_map_disable_hole_optimization(zap_map);
20532 
20533 	for (entry = vm_map_first_entry(map);
20534 	    entry != vm_map_to_entry(map);
20535 	    entry = next_entry) {
20536 		next_entry = entry->vme_next;
20537 
20538 		if (VME_OBJECT(entry) &&
20539 		    !entry->is_sub_map &&
20540 		    (VME_OBJECT(entry)->internal == TRUE) &&
20541 		    (VME_OBJECT(entry)->ref_count == 1)) {
20542 			*reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
20543 			*reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
20544 
20545 			(void)vm_map_delete(map,
20546 			    entry->vme_start,
20547 			    entry->vme_end,
20548 			    VM_MAP_REMOVE_SAVE_ENTRIES,
20549 			    zap_map);
20550 		}
20551 	}
20552 
20553 	vm_map_unlock(map);
20554 
20555 	/*
20556 	 * Get rid of the "zap_maps" and all the map entries that
20557 	 * they may still contain.
20558 	 */
20559 	if (zap_map != VM_MAP_NULL) {
20560 		vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
20561 		zap_map = VM_MAP_NULL;
20562 	}
20563 
20564 	return kr;
20565 }
20566 
20567 
20568 #if DEVELOPMENT || DEBUG
20569 
20570 int
vm_map_disconnect_page_mappings(vm_map_t map,boolean_t do_unnest)20571 vm_map_disconnect_page_mappings(
20572 	vm_map_t map,
20573 	boolean_t do_unnest)
20574 {
20575 	vm_map_entry_t entry;
20576 	ledger_amount_t byte_count = 0;
20577 
20578 	if (do_unnest == TRUE) {
20579 #ifndef NO_NESTED_PMAP
20580 		vm_map_lock(map);
20581 
20582 		for (entry = vm_map_first_entry(map);
20583 		    entry != vm_map_to_entry(map);
20584 		    entry = entry->vme_next) {
20585 			if (entry->is_sub_map && entry->use_pmap) {
20586 				/*
20587 				 * Make sure the range between the start of this entry and
20588 				 * the end of this entry is no longer nested, so that
20589 				 * we will only remove mappings from the pmap in use by this
20590 				 * this task
20591 				 */
20592 				vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
20593 			}
20594 		}
20595 		vm_map_unlock(map);
20596 #endif
20597 	}
20598 	vm_map_lock_read(map);
20599 
20600 	ledger_get_balance(map->pmap->ledger, task_ledgers.phys_mem, &byte_count);
20601 
20602 	for (entry = vm_map_first_entry(map);
20603 	    entry != vm_map_to_entry(map);
20604 	    entry = entry->vme_next) {
20605 		if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
20606 		    (VME_OBJECT(entry)->phys_contiguous))) {
20607 			continue;
20608 		}
20609 		if (entry->is_sub_map) {
20610 			assert(!entry->use_pmap);
20611 		}
20612 
20613 		pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
20614 	}
20615 	vm_map_unlock_read(map);
20616 
20617 	return (int) (byte_count / VM_MAP_PAGE_SIZE(map));
20618 }
20619 
20620 kern_return_t
vm_map_inject_error(vm_map_t map,vm_map_offset_t vaddr)20621 vm_map_inject_error(vm_map_t map, vm_map_offset_t vaddr)
20622 {
20623 	vm_object_t object = NULL;
20624 	vm_object_offset_t offset;
20625 	vm_prot_t prot;
20626 	boolean_t wired;
20627 	vm_map_version_t version;
20628 	vm_map_t real_map;
20629 	int result = KERN_FAILURE;
20630 
20631 	vaddr = vm_map_trunc_page(vaddr, PAGE_MASK);
20632 	vm_map_lock(map);
20633 
20634 	result = vm_map_lookup_locked(&map, vaddr, VM_PROT_READ,
20635 	    OBJECT_LOCK_EXCLUSIVE, &version, &object, &offset, &prot, &wired,
20636 	    NULL, &real_map, NULL);
20637 	if (object == NULL) {
20638 		result = KERN_MEMORY_ERROR;
20639 	} else if (object->pager) {
20640 		result = vm_compressor_pager_inject_error(object->pager,
20641 		    offset);
20642 	} else {
20643 		result = KERN_MEMORY_PRESENT;
20644 	}
20645 
20646 	if (object != NULL) {
20647 		vm_object_unlock(object);
20648 	}
20649 
20650 	if (real_map != map) {
20651 		vm_map_unlock(real_map);
20652 	}
20653 	vm_map_unlock(map);
20654 
20655 	return result;
20656 }
20657 
20658 #endif
20659 
20660 
20661 #if CONFIG_FREEZE
20662 
20663 
20664 extern struct freezer_context freezer_context_global;
20665 AbsoluteTime c_freezer_last_yield_ts = 0;
20666 
20667 extern unsigned int memorystatus_freeze_private_shared_pages_ratio;
20668 extern unsigned int memorystatus_freeze_shared_mb_per_process_max;
20669 
20670 kern_return_t
vm_map_freeze(task_t task,unsigned int * purgeable_count,unsigned int * wired_count,unsigned int * clean_count,unsigned int * dirty_count,unsigned int dirty_budget,unsigned int * shared_count,int * freezer_error_code,boolean_t eval_only)20671 vm_map_freeze(
20672 	task_t       task,
20673 	unsigned int *purgeable_count,
20674 	unsigned int *wired_count,
20675 	unsigned int *clean_count,
20676 	unsigned int *dirty_count,
20677 	unsigned int dirty_budget,
20678 	unsigned int *shared_count,
20679 	int          *freezer_error_code,
20680 	boolean_t    eval_only)
20681 {
20682 	vm_map_entry_t  entry2 = VM_MAP_ENTRY_NULL;
20683 	kern_return_t   kr = KERN_SUCCESS;
20684 	boolean_t       evaluation_phase = TRUE;
20685 	vm_object_t     cur_shared_object = NULL;
20686 	int             cur_shared_obj_ref_cnt = 0;
20687 	unsigned int    dirty_private_count = 0, dirty_shared_count = 0, obj_pages_snapshot = 0;
20688 
20689 	*purgeable_count = *wired_count = *clean_count = *dirty_count = *shared_count = 0;
20690 
20691 	/*
20692 	 * We need the exclusive lock here so that we can
20693 	 * block any page faults or lookups while we are
20694 	 * in the middle of freezing this vm map.
20695 	 */
20696 	vm_map_t map = task->map;
20697 
20698 	vm_map_lock(map);
20699 
20700 	assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
20701 
20702 	if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
20703 		if (vm_compressor_low_on_space()) {
20704 			*freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
20705 		}
20706 
20707 		if (vm_swap_low_on_space()) {
20708 			*freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
20709 		}
20710 
20711 		kr = KERN_NO_SPACE;
20712 		goto done;
20713 	}
20714 
20715 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
20716 		/*
20717 		 * In-memory compressor backing the freezer. No disk.
20718 		 * So no need to do the evaluation phase.
20719 		 */
20720 		evaluation_phase = FALSE;
20721 
20722 		if (eval_only == TRUE) {
20723 			/*
20724 			 * We don't support 'eval_only' mode
20725 			 * in this non-swap config.
20726 			 */
20727 			*freezer_error_code = FREEZER_ERROR_GENERIC;
20728 			kr = KERN_INVALID_ARGUMENT;
20729 			goto done;
20730 		}
20731 
20732 		freezer_context_global.freezer_ctx_uncompressed_pages = 0;
20733 		clock_get_uptime(&c_freezer_last_yield_ts);
20734 	}
20735 again:
20736 
20737 	for (entry2 = vm_map_first_entry(map);
20738 	    entry2 != vm_map_to_entry(map);
20739 	    entry2 = entry2->vme_next) {
20740 		vm_object_t     src_object = VME_OBJECT(entry2);
20741 
20742 		if (src_object &&
20743 		    !entry2->is_sub_map &&
20744 		    !src_object->phys_contiguous) {
20745 			/* If eligible, scan the entry, moving eligible pages over to our parent object */
20746 
20747 			if (src_object->internal == TRUE) {
20748 				if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
20749 					/*
20750 					 * We skip purgeable objects during evaluation phase only.
20751 					 * If we decide to freeze this process, we'll explicitly
20752 					 * purge these objects before we go around again with
20753 					 * 'evaluation_phase' set to FALSE.
20754 					 */
20755 
20756 					if ((src_object->purgable == VM_PURGABLE_EMPTY) || (src_object->purgable == VM_PURGABLE_VOLATILE)) {
20757 						/*
20758 						 * We want to purge objects that may not belong to this task but are mapped
20759 						 * in this task alone. Since we already purged this task's purgeable memory
20760 						 * at the end of a successful evaluation phase, we want to avoid doing no-op calls
20761 						 * on this task's purgeable objects. Hence the check for only volatile objects.
20762 						 */
20763 						if (evaluation_phase == FALSE &&
20764 						    (src_object->purgable == VM_PURGABLE_VOLATILE) &&
20765 						    (src_object->ref_count == 1)) {
20766 							vm_object_lock(src_object);
20767 							vm_object_purge(src_object, 0);
20768 							vm_object_unlock(src_object);
20769 						}
20770 						continue;
20771 					}
20772 
20773 					/*
20774 					 * Pages belonging to this object could be swapped to disk.
20775 					 * Make sure it's not a shared object because we could end
20776 					 * up just bringing it back in again.
20777 					 *
20778 					 * We try to optimize somewhat by checking for objects that are mapped
20779 					 * more than once within our own map. But we don't do full searches,
20780 					 * we just look at the entries following our current entry.
20781 					 */
20782 
20783 					if (src_object->ref_count > 1) {
20784 						if (src_object != cur_shared_object) {
20785 							obj_pages_snapshot = (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
20786 							dirty_shared_count += obj_pages_snapshot;
20787 
20788 							cur_shared_object = src_object;
20789 							cur_shared_obj_ref_cnt = 1;
20790 							continue;
20791 						} else {
20792 							cur_shared_obj_ref_cnt++;
20793 							if (src_object->ref_count == cur_shared_obj_ref_cnt) {
20794 								/*
20795 								 * Fall through to below and treat this object as private.
20796 								 * So deduct its pages from our shared total and add it to the
20797 								 * private total.
20798 								 */
20799 
20800 								dirty_shared_count -= obj_pages_snapshot;
20801 								dirty_private_count += obj_pages_snapshot;
20802 							} else {
20803 								continue;
20804 							}
20805 						}
20806 					}
20807 
20808 
20809 					if (src_object->ref_count == 1) {
20810 						dirty_private_count += (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
20811 					}
20812 
20813 					if (evaluation_phase == TRUE) {
20814 						continue;
20815 					}
20816 				}
20817 
20818 				uint32_t paged_out_count = vm_object_compressed_freezer_pageout(src_object, dirty_budget);
20819 				*wired_count += src_object->wired_page_count;
20820 
20821 				if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
20822 					if (vm_compressor_low_on_space()) {
20823 						*freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
20824 					}
20825 
20826 					if (vm_swap_low_on_space()) {
20827 						*freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
20828 					}
20829 
20830 					kr = KERN_NO_SPACE;
20831 					break;
20832 				}
20833 				if (paged_out_count >= dirty_budget) {
20834 					break;
20835 				}
20836 				dirty_budget -= paged_out_count;
20837 			}
20838 		}
20839 	}
20840 
20841 	*shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
20842 	if (evaluation_phase) {
20843 		unsigned int shared_pages_threshold = (memorystatus_freeze_shared_mb_per_process_max * 1024 * 1024ULL) / PAGE_SIZE_64;
20844 
20845 		if (dirty_shared_count > shared_pages_threshold) {
20846 			*freezer_error_code = FREEZER_ERROR_EXCESS_SHARED_MEMORY;
20847 			kr = KERN_FAILURE;
20848 			goto done;
20849 		}
20850 
20851 		if (dirty_shared_count &&
20852 		    ((dirty_private_count / dirty_shared_count) < memorystatus_freeze_private_shared_pages_ratio)) {
20853 			*freezer_error_code = FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO;
20854 			kr = KERN_FAILURE;
20855 			goto done;
20856 		}
20857 
20858 		evaluation_phase = FALSE;
20859 		dirty_shared_count = dirty_private_count = 0;
20860 
20861 		freezer_context_global.freezer_ctx_uncompressed_pages = 0;
20862 		clock_get_uptime(&c_freezer_last_yield_ts);
20863 
20864 		if (eval_only) {
20865 			kr = KERN_SUCCESS;
20866 			goto done;
20867 		}
20868 
20869 		vm_purgeable_purge_task_owned(task);
20870 
20871 		goto again;
20872 	} else {
20873 		kr = KERN_SUCCESS;
20874 	}
20875 
20876 done:
20877 	vm_map_unlock(map);
20878 
20879 	if ((eval_only == FALSE) && (kr == KERN_SUCCESS)) {
20880 		vm_object_compressed_freezer_done();
20881 	}
20882 	return kr;
20883 }
20884 
20885 #endif
20886 
20887 /*
20888  * vm_map_entry_should_cow_for_true_share:
20889  *
20890  * Determines if the map entry should be clipped and setup for copy-on-write
20891  * to avoid applying "true_share" to a large VM object when only a subset is
20892  * targeted.
20893  *
20894  * For now, we target only the map entries created for the Objective C
20895  * Garbage Collector, which initially have the following properties:
20896  *	- alias == VM_MEMORY_MALLOC
20897  *      - wired_count == 0
20898  *      - !needs_copy
20899  * and a VM object with:
20900  *      - internal
20901  *      - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
20902  *      - !true_share
20903  *      - vo_size == ANON_CHUNK_SIZE
20904  *
20905  * Only non-kernel map entries.
20906  */
20907 boolean_t
vm_map_entry_should_cow_for_true_share(vm_map_entry_t entry)20908 vm_map_entry_should_cow_for_true_share(
20909 	vm_map_entry_t  entry)
20910 {
20911 	vm_object_t     object;
20912 
20913 	if (entry->is_sub_map) {
20914 		/* entry does not point at a VM object */
20915 		return FALSE;
20916 	}
20917 
20918 	if (entry->needs_copy) {
20919 		/* already set for copy_on_write: done! */
20920 		return FALSE;
20921 	}
20922 
20923 	if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
20924 	    VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
20925 		/* not a malloc heap or Obj-C Garbage Collector heap */
20926 		return FALSE;
20927 	}
20928 
20929 	if (entry->wired_count) {
20930 		/* wired: can't change the map entry... */
20931 		vm_counters.should_cow_but_wired++;
20932 		return FALSE;
20933 	}
20934 
20935 	object = VME_OBJECT(entry);
20936 
20937 	if (object == VM_OBJECT_NULL) {
20938 		/* no object yet... */
20939 		return FALSE;
20940 	}
20941 
20942 	if (!object->internal) {
20943 		/* not an internal object */
20944 		return FALSE;
20945 	}
20946 
20947 	if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
20948 		/* not the default copy strategy */
20949 		return FALSE;
20950 	}
20951 
20952 	if (object->true_share) {
20953 		/* already true_share: too late to avoid it */
20954 		return FALSE;
20955 	}
20956 
20957 	if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
20958 	    object->vo_size != ANON_CHUNK_SIZE) {
20959 		/* ... not an object created for the ObjC Garbage Collector */
20960 		return FALSE;
20961 	}
20962 
20963 	if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
20964 	    object->vo_size != 2048 * 4096) {
20965 		/* ... not a "MALLOC_SMALL" heap */
20966 		return FALSE;
20967 	}
20968 
20969 	/*
20970 	 * All the criteria match: we have a large object being targeted for "true_share".
20971 	 * To limit the adverse side-effects linked with "true_share", tell the caller to
20972 	 * try and avoid setting up the entire object for "true_share" by clipping the
20973 	 * targeted range and setting it up for copy-on-write.
20974 	 */
20975 	return TRUE;
20976 }
20977 
20978 vm_map_offset_t
vm_map_round_page_mask(vm_map_offset_t offset,vm_map_offset_t mask)20979 vm_map_round_page_mask(
20980 	vm_map_offset_t offset,
20981 	vm_map_offset_t mask)
20982 {
20983 	return VM_MAP_ROUND_PAGE(offset, mask);
20984 }
20985 
20986 vm_map_offset_t
vm_map_trunc_page_mask(vm_map_offset_t offset,vm_map_offset_t mask)20987 vm_map_trunc_page_mask(
20988 	vm_map_offset_t offset,
20989 	vm_map_offset_t mask)
20990 {
20991 	return VM_MAP_TRUNC_PAGE(offset, mask);
20992 }
20993 
20994 boolean_t
vm_map_page_aligned(vm_map_offset_t offset,vm_map_offset_t mask)20995 vm_map_page_aligned(
20996 	vm_map_offset_t offset,
20997 	vm_map_offset_t mask)
20998 {
20999 	return ((offset) & mask) == 0;
21000 }
21001 
21002 int
vm_map_page_shift(vm_map_t map)21003 vm_map_page_shift(
21004 	vm_map_t map)
21005 {
21006 	return VM_MAP_PAGE_SHIFT(map);
21007 }
21008 
21009 int
vm_map_page_size(vm_map_t map)21010 vm_map_page_size(
21011 	vm_map_t map)
21012 {
21013 	return VM_MAP_PAGE_SIZE(map);
21014 }
21015 
21016 vm_map_offset_t
vm_map_page_mask(vm_map_t map)21017 vm_map_page_mask(
21018 	vm_map_t map)
21019 {
21020 	return VM_MAP_PAGE_MASK(map);
21021 }
21022 
21023 kern_return_t
vm_map_set_page_shift(vm_map_t map,int pageshift)21024 vm_map_set_page_shift(
21025 	vm_map_t        map,
21026 	int             pageshift)
21027 {
21028 	if (map->hdr.nentries != 0) {
21029 		/* too late to change page size */
21030 		return KERN_FAILURE;
21031 	}
21032 
21033 	map->hdr.page_shift = pageshift;
21034 
21035 	return KERN_SUCCESS;
21036 }
21037 
21038 kern_return_t
vm_map_query_volatile(vm_map_t map,mach_vm_size_t * volatile_virtual_size_p,mach_vm_size_t * volatile_resident_size_p,mach_vm_size_t * volatile_compressed_size_p,mach_vm_size_t * volatile_pmap_size_p,mach_vm_size_t * volatile_compressed_pmap_size_p)21039 vm_map_query_volatile(
21040 	vm_map_t        map,
21041 	mach_vm_size_t  *volatile_virtual_size_p,
21042 	mach_vm_size_t  *volatile_resident_size_p,
21043 	mach_vm_size_t  *volatile_compressed_size_p,
21044 	mach_vm_size_t  *volatile_pmap_size_p,
21045 	mach_vm_size_t  *volatile_compressed_pmap_size_p)
21046 {
21047 	mach_vm_size_t  volatile_virtual_size;
21048 	mach_vm_size_t  volatile_resident_count;
21049 	mach_vm_size_t  volatile_compressed_count;
21050 	mach_vm_size_t  volatile_pmap_count;
21051 	mach_vm_size_t  volatile_compressed_pmap_count;
21052 	mach_vm_size_t  resident_count;
21053 	vm_map_entry_t  entry;
21054 	vm_object_t     object;
21055 
21056 	/* map should be locked by caller */
21057 
21058 	volatile_virtual_size = 0;
21059 	volatile_resident_count = 0;
21060 	volatile_compressed_count = 0;
21061 	volatile_pmap_count = 0;
21062 	volatile_compressed_pmap_count = 0;
21063 
21064 	for (entry = vm_map_first_entry(map);
21065 	    entry != vm_map_to_entry(map);
21066 	    entry = entry->vme_next) {
21067 		mach_vm_size_t  pmap_resident_bytes, pmap_compressed_bytes;
21068 
21069 		if (entry->is_sub_map) {
21070 			continue;
21071 		}
21072 		if (!(entry->protection & VM_PROT_WRITE)) {
21073 			continue;
21074 		}
21075 		object = VME_OBJECT(entry);
21076 		if (object == VM_OBJECT_NULL) {
21077 			continue;
21078 		}
21079 		if (object->purgable != VM_PURGABLE_VOLATILE &&
21080 		    object->purgable != VM_PURGABLE_EMPTY) {
21081 			continue;
21082 		}
21083 		if (VME_OFFSET(entry)) {
21084 			/*
21085 			 * If the map entry has been split and the object now
21086 			 * appears several times in the VM map, we don't want
21087 			 * to count the object's resident_page_count more than
21088 			 * once.  We count it only for the first one, starting
21089 			 * at offset 0 and ignore the other VM map entries.
21090 			 */
21091 			continue;
21092 		}
21093 		resident_count = object->resident_page_count;
21094 		if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
21095 			resident_count = 0;
21096 		} else {
21097 			resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
21098 		}
21099 
21100 		volatile_virtual_size += entry->vme_end - entry->vme_start;
21101 		volatile_resident_count += resident_count;
21102 		if (object->pager) {
21103 			volatile_compressed_count +=
21104 			    vm_compressor_pager_get_count(object->pager);
21105 		}
21106 		pmap_compressed_bytes = 0;
21107 		pmap_resident_bytes =
21108 		    pmap_query_resident(map->pmap,
21109 		    entry->vme_start,
21110 		    entry->vme_end,
21111 		    &pmap_compressed_bytes);
21112 		volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
21113 		volatile_compressed_pmap_count += (pmap_compressed_bytes
21114 		    / PAGE_SIZE);
21115 	}
21116 
21117 	/* map is still locked on return */
21118 
21119 	*volatile_virtual_size_p = volatile_virtual_size;
21120 	*volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
21121 	*volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
21122 	*volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
21123 	*volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
21124 
21125 	return KERN_SUCCESS;
21126 }
21127 
21128 void
vm_map_sizes(vm_map_t map,vm_map_size_t * psize,vm_map_size_t * pfree,vm_map_size_t * plargest_free)21129 vm_map_sizes(vm_map_t map,
21130     vm_map_size_t * psize,
21131     vm_map_size_t * pfree,
21132     vm_map_size_t * plargest_free)
21133 {
21134 	vm_map_entry_t  entry;
21135 	vm_map_offset_t prev;
21136 	vm_map_size_t   free, total_free, largest_free;
21137 	boolean_t       end;
21138 
21139 	if (!map) {
21140 		*psize = *pfree = *plargest_free = 0;
21141 		return;
21142 	}
21143 	total_free = largest_free = 0;
21144 
21145 	vm_map_lock_read(map);
21146 	if (psize) {
21147 		*psize = map->max_offset - map->min_offset;
21148 	}
21149 
21150 	prev = map->min_offset;
21151 	for (entry = vm_map_first_entry(map);; entry = entry->vme_next) {
21152 		end = (entry == vm_map_to_entry(map));
21153 
21154 		if (end) {
21155 			free = entry->vme_end   - prev;
21156 		} else {
21157 			free = entry->vme_start - prev;
21158 		}
21159 
21160 		total_free += free;
21161 		if (free > largest_free) {
21162 			largest_free = free;
21163 		}
21164 
21165 		if (end) {
21166 			break;
21167 		}
21168 		prev = entry->vme_end;
21169 	}
21170 	vm_map_unlock_read(map);
21171 	if (pfree) {
21172 		*pfree = total_free;
21173 	}
21174 	if (plargest_free) {
21175 		*plargest_free = largest_free;
21176 	}
21177 }
21178 
21179 #if VM_SCAN_FOR_SHADOW_CHAIN
21180 int vm_map_shadow_max(vm_map_t map);
21181 int
vm_map_shadow_max(vm_map_t map)21182 vm_map_shadow_max(
21183 	vm_map_t map)
21184 {
21185 	int             shadows, shadows_max;
21186 	vm_map_entry_t  entry;
21187 	vm_object_t     object, next_object;
21188 
21189 	if (map == NULL) {
21190 		return 0;
21191 	}
21192 
21193 	shadows_max = 0;
21194 
21195 	vm_map_lock_read(map);
21196 
21197 	for (entry = vm_map_first_entry(map);
21198 	    entry != vm_map_to_entry(map);
21199 	    entry = entry->vme_next) {
21200 		if (entry->is_sub_map) {
21201 			continue;
21202 		}
21203 		object = VME_OBJECT(entry);
21204 		if (object == NULL) {
21205 			continue;
21206 		}
21207 		vm_object_lock_shared(object);
21208 		for (shadows = 0;
21209 		    object->shadow != NULL;
21210 		    shadows++, object = next_object) {
21211 			next_object = object->shadow;
21212 			vm_object_lock_shared(next_object);
21213 			vm_object_unlock(object);
21214 		}
21215 		vm_object_unlock(object);
21216 		if (shadows > shadows_max) {
21217 			shadows_max = shadows;
21218 		}
21219 	}
21220 
21221 	vm_map_unlock_read(map);
21222 
21223 	return shadows_max;
21224 }
21225 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
21226 
21227 void
vm_commit_pagezero_status(vm_map_t lmap)21228 vm_commit_pagezero_status(vm_map_t lmap)
21229 {
21230 	pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
21231 }
21232 
21233 #if XNU_TARGET_OS_OSX
21234 void
vm_map_set_high_start(vm_map_t map,vm_map_offset_t high_start)21235 vm_map_set_high_start(
21236 	vm_map_t        map,
21237 	vm_map_offset_t high_start)
21238 {
21239 	map->vmmap_high_start = high_start;
21240 }
21241 #endif /* XNU_TARGET_OS_OSX */
21242 
21243 
21244 /*
21245  * FORKED CORPSE FOOTPRINT
21246  *
21247  * A forked corpse gets a copy of the original VM map but its pmap is mostly
21248  * empty since it never ran and never got to fault in any pages.
21249  * Collecting footprint info (via "sysctl vm.self_region_footprint") for
21250  * a forked corpse would therefore return very little information.
21251  *
21252  * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
21253  * to vm_map_fork() to collect footprint information from the original VM map
21254  * and its pmap, and store it in the forked corpse's VM map.  That information
21255  * is stored in place of the VM map's "hole list" since we'll never need to
21256  * lookup for holes in the corpse's map.
21257  *
21258  * The corpse's footprint info looks like this:
21259  *
21260  * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
21261  * as follows:
21262  *                     +---------------------------------------+
21263  *            header-> | cf_size                               |
21264  *                     +-------------------+-------------------+
21265  *                     | cf_last_region    | cf_last_zeroes    |
21266  *                     +-------------------+-------------------+
21267  *           region1-> | cfr_vaddr                             |
21268  *                     +-------------------+-------------------+
21269  *                     | cfr_num_pages     | d0 | d1 | d2 | d3 |
21270  *                     +---------------------------------------+
21271  *                     | d4 | d5 | ...                         |
21272  *                     +---------------------------------------+
21273  *                     | ...                                   |
21274  *                     +-------------------+-------------------+
21275  *                     | dy | dz | na | na | cfr_vaddr...      | <-region2
21276  *                     +-------------------+-------------------+
21277  *                     | cfr_vaddr (ctd)   | cfr_num_pages     |
21278  *                     +---------------------------------------+
21279  *                     | d0 | d1 ...                           |
21280  *                     +---------------------------------------+
21281  *                       ...
21282  *                     +---------------------------------------+
21283  *       last region-> | cfr_vaddr                             |
21284  *                     +---------------------------------------+
21285  *                     + cfr_num_pages     | d0 | d1 | d2 | d3 |
21286  *                     +---------------------------------------+
21287  *                       ...
21288  *                     +---------------------------------------+
21289  *                     | dx | dy | dz | na | na | na | na | na |
21290  *                     +---------------------------------------+
21291  *
21292  * where:
21293  *      cf_size:	total size of the buffer (rounded to page size)
21294  *      cf_last_region:	offset in the buffer of the last "region" sub-header
21295  *	cf_last_zeroes: number of trailing "zero" dispositions at the end
21296  *			of last region
21297  *	cfr_vaddr:	virtual address of the start of the covered "region"
21298  *	cfr_num_pages:	number of pages in the covered "region"
21299  *	d*:		disposition of the page at that virtual address
21300  * Regions in the buffer are word-aligned.
21301  *
21302  * We estimate the size of the buffer based on the number of memory regions
21303  * and the virtual size of the address space.  While copying each memory region
21304  * during vm_map_fork(), we also collect the footprint info for that region
21305  * and store it in the buffer, packing it as much as possible (coalescing
21306  * contiguous memory regions to avoid having too many region headers and
21307  * avoiding long streaks of "zero" page dispositions by splitting footprint
21308  * "regions", so the number of regions in the footprint buffer might not match
21309  * the number of memory regions in the address space.
21310  *
21311  * We also have to copy the original task's "nonvolatile" ledgers since that's
21312  * part of the footprint and will need to be reported to any tool asking for
21313  * the footprint information of the forked corpse.
21314  */
21315 
21316 uint64_t vm_map_corpse_footprint_count = 0;
21317 uint64_t vm_map_corpse_footprint_size_avg = 0;
21318 uint64_t vm_map_corpse_footprint_size_max = 0;
21319 uint64_t vm_map_corpse_footprint_full = 0;
21320 uint64_t vm_map_corpse_footprint_no_buf = 0;
21321 
21322 struct vm_map_corpse_footprint_header {
21323 	vm_size_t       cf_size;        /* allocated buffer size */
21324 	uint32_t        cf_last_region; /* offset of last region in buffer */
21325 	union {
21326 		uint32_t cfu_last_zeroes; /* during creation:
21327 		                           * number of "zero" dispositions at
21328 		                           * end of last region */
21329 		uint32_t cfu_hint_region; /* during lookup:
21330 		                           * offset of last looked up region */
21331 #define cf_last_zeroes cfu.cfu_last_zeroes
21332 #define cf_hint_region cfu.cfu_hint_region
21333 	} cfu;
21334 };
21335 typedef uint8_t cf_disp_t;
21336 struct vm_map_corpse_footprint_region {
21337 	vm_map_offset_t cfr_vaddr;      /* region start virtual address */
21338 	uint32_t        cfr_num_pages;  /* number of pages in this "region" */
21339 	cf_disp_t   cfr_disposition[0]; /* disposition of each page */
21340 } __attribute__((packed));
21341 
21342 static cf_disp_t
vm_page_disposition_to_cf_disp(int disposition)21343 vm_page_disposition_to_cf_disp(
21344 	int disposition)
21345 {
21346 	assert(sizeof(cf_disp_t) == 1);
21347 	/* relocate bits that don't fit in a "uint8_t" */
21348 	if (disposition & VM_PAGE_QUERY_PAGE_REUSABLE) {
21349 		disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
21350 	}
21351 	/* cast gets rid of extra bits */
21352 	return (cf_disp_t) disposition;
21353 }
21354 
21355 static int
vm_page_cf_disp_to_disposition(cf_disp_t cf_disp)21356 vm_page_cf_disp_to_disposition(
21357 	cf_disp_t cf_disp)
21358 {
21359 	int disposition;
21360 
21361 	assert(sizeof(cf_disp_t) == 1);
21362 	disposition = (int) cf_disp;
21363 	/* move relocated bits back in place */
21364 	if (cf_disp & VM_PAGE_QUERY_PAGE_FICTITIOUS) {
21365 		disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
21366 		disposition &= ~VM_PAGE_QUERY_PAGE_FICTITIOUS;
21367 	}
21368 	return disposition;
21369 }
21370 
21371 /*
21372  * vm_map_corpse_footprint_new_region:
21373  *      closes the current footprint "region" and creates a new one
21374  *
21375  * Returns NULL if there's not enough space in the buffer for a new region.
21376  */
21377 static struct vm_map_corpse_footprint_region *
vm_map_corpse_footprint_new_region(struct vm_map_corpse_footprint_header * footprint_header)21378 vm_map_corpse_footprint_new_region(
21379 	struct vm_map_corpse_footprint_header *footprint_header)
21380 {
21381 	uintptr_t       footprint_edge;
21382 	uint32_t        new_region_offset;
21383 	struct vm_map_corpse_footprint_region *footprint_region;
21384 	struct vm_map_corpse_footprint_region *new_footprint_region;
21385 
21386 	footprint_edge = ((uintptr_t)footprint_header +
21387 	    footprint_header->cf_size);
21388 	footprint_region = ((struct vm_map_corpse_footprint_region *)
21389 	    ((char *)footprint_header +
21390 	    footprint_header->cf_last_region));
21391 	assert((uintptr_t)footprint_region + sizeof(*footprint_region) <=
21392 	    footprint_edge);
21393 
21394 	/* get rid of trailing zeroes in the last region */
21395 	assert(footprint_region->cfr_num_pages >=
21396 	    footprint_header->cf_last_zeroes);
21397 	footprint_region->cfr_num_pages -=
21398 	    footprint_header->cf_last_zeroes;
21399 	footprint_header->cf_last_zeroes = 0;
21400 
21401 	/* reuse this region if it's now empty */
21402 	if (footprint_region->cfr_num_pages == 0) {
21403 		return footprint_region;
21404 	}
21405 
21406 	/* compute offset of new region */
21407 	new_region_offset = footprint_header->cf_last_region;
21408 	new_region_offset += sizeof(*footprint_region);
21409 	new_region_offset += (footprint_region->cfr_num_pages * sizeof(cf_disp_t));
21410 	new_region_offset = roundup(new_region_offset, sizeof(int));
21411 
21412 	/* check if we're going over the edge */
21413 	if (((uintptr_t)footprint_header +
21414 	    new_region_offset +
21415 	    sizeof(*footprint_region)) >=
21416 	    footprint_edge) {
21417 		/* over the edge: no new region */
21418 		return NULL;
21419 	}
21420 
21421 	/* adjust offset of last region in header */
21422 	footprint_header->cf_last_region = new_region_offset;
21423 
21424 	new_footprint_region = (struct vm_map_corpse_footprint_region *)
21425 	    ((char *)footprint_header +
21426 	    footprint_header->cf_last_region);
21427 	new_footprint_region->cfr_vaddr = 0;
21428 	new_footprint_region->cfr_num_pages = 0;
21429 	/* caller needs to initialize new region */
21430 
21431 	return new_footprint_region;
21432 }
21433 
21434 /*
21435  * vm_map_corpse_footprint_collect:
21436  *	collect footprint information for "old_entry" in "old_map" and
21437  *	stores it in "new_map"'s vmmap_footprint_info.
21438  */
21439 kern_return_t
vm_map_corpse_footprint_collect(vm_map_t old_map,vm_map_entry_t old_entry,vm_map_t new_map)21440 vm_map_corpse_footprint_collect(
21441 	vm_map_t        old_map,
21442 	vm_map_entry_t  old_entry,
21443 	vm_map_t        new_map)
21444 {
21445 	vm_map_offset_t va;
21446 	kern_return_t   kr;
21447 	struct vm_map_corpse_footprint_header *footprint_header;
21448 	struct vm_map_corpse_footprint_region *footprint_region;
21449 	struct vm_map_corpse_footprint_region *new_footprint_region;
21450 	cf_disp_t       *next_disp_p;
21451 	uintptr_t       footprint_edge;
21452 	uint32_t        num_pages_tmp;
21453 	int             effective_page_size;
21454 
21455 	effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(old_map));
21456 
21457 	va = old_entry->vme_start;
21458 
21459 	vm_map_lock_assert_exclusive(old_map);
21460 	vm_map_lock_assert_exclusive(new_map);
21461 
21462 	assert(new_map->has_corpse_footprint);
21463 	assert(!old_map->has_corpse_footprint);
21464 	if (!new_map->has_corpse_footprint ||
21465 	    old_map->has_corpse_footprint) {
21466 		/*
21467 		 * This can only transfer footprint info from a
21468 		 * map with a live pmap to a map with a corpse footprint.
21469 		 */
21470 		return KERN_NOT_SUPPORTED;
21471 	}
21472 
21473 	if (new_map->vmmap_corpse_footprint == NULL) {
21474 		vm_offset_t     buf;
21475 		vm_size_t       buf_size;
21476 
21477 		buf = 0;
21478 		buf_size = (sizeof(*footprint_header) +
21479 		    (old_map->hdr.nentries
21480 		    *
21481 		    (sizeof(*footprint_region) +
21482 		    +3))            /* potential alignment for each region */
21483 		    +
21484 		    ((old_map->size / effective_page_size)
21485 		    *
21486 		    sizeof(cf_disp_t)));      /* disposition for each page */
21487 //		printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
21488 		buf_size = round_page(buf_size);
21489 
21490 		/* limit buffer to 1 page to validate overflow detection */
21491 //		buf_size = PAGE_SIZE;
21492 
21493 		/* limit size to a somewhat sane amount */
21494 #if XNU_TARGET_OS_OSX
21495 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE   (8*1024*1024)   /* 8MB */
21496 #else /* XNU_TARGET_OS_OSX */
21497 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE   (256*1024)      /* 256KB */
21498 #endif /* XNU_TARGET_OS_OSX */
21499 		if (buf_size > VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE) {
21500 			buf_size = VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE;
21501 		}
21502 
21503 		/*
21504 		 * Allocate the pageable buffer (with a trailing guard page).
21505 		 * It will be zero-filled on demand.
21506 		 */
21507 		kr = kernel_memory_allocate(kernel_map,
21508 		    &buf,
21509 		    (buf_size
21510 		    + PAGE_SIZE),                          /* trailing guard page */
21511 		    0,                         /* mask */
21512 		    KMA_PAGEABLE | KMA_GUARD_LAST,
21513 		    VM_KERN_MEMORY_DIAG);
21514 		if (kr != KERN_SUCCESS) {
21515 			vm_map_corpse_footprint_no_buf++;
21516 			return kr;
21517 		}
21518 
21519 		/* initialize header and 1st region */
21520 		footprint_header = (struct vm_map_corpse_footprint_header *)buf;
21521 		new_map->vmmap_corpse_footprint = footprint_header;
21522 
21523 		footprint_header->cf_size = buf_size;
21524 		footprint_header->cf_last_region =
21525 		    sizeof(*footprint_header);
21526 		footprint_header->cf_last_zeroes = 0;
21527 
21528 		footprint_region = (struct vm_map_corpse_footprint_region *)
21529 		    ((char *)footprint_header +
21530 		    footprint_header->cf_last_region);
21531 		footprint_region->cfr_vaddr = 0;
21532 		footprint_region->cfr_num_pages = 0;
21533 	} else {
21534 		/* retrieve header and last region */
21535 		footprint_header = (struct vm_map_corpse_footprint_header *)
21536 		    new_map->vmmap_corpse_footprint;
21537 		footprint_region = (struct vm_map_corpse_footprint_region *)
21538 		    ((char *)footprint_header +
21539 		    footprint_header->cf_last_region);
21540 	}
21541 	footprint_edge = ((uintptr_t)footprint_header +
21542 	    footprint_header->cf_size);
21543 
21544 	if ((footprint_region->cfr_vaddr +
21545 	    (((vm_map_offset_t)footprint_region->cfr_num_pages) *
21546 	    effective_page_size))
21547 	    != old_entry->vme_start) {
21548 		uint64_t num_pages_delta, num_pages_delta_size;
21549 		uint32_t region_offset_delta_size;
21550 
21551 		/*
21552 		 * Not the next contiguous virtual address:
21553 		 * start a new region or store "zero" dispositions for
21554 		 * the missing pages?
21555 		 */
21556 		/* size of gap in actual page dispositions */
21557 		num_pages_delta = ((old_entry->vme_start -
21558 		    footprint_region->cfr_vaddr) / effective_page_size)
21559 		    - footprint_region->cfr_num_pages;
21560 		num_pages_delta_size = num_pages_delta * sizeof(cf_disp_t);
21561 		/* size of gap as a new footprint region header */
21562 		region_offset_delta_size =
21563 		    (sizeof(*footprint_region) +
21564 		    roundup(((footprint_region->cfr_num_pages -
21565 		    footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)),
21566 		    sizeof(int)) -
21567 		    ((footprint_region->cfr_num_pages -
21568 		    footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)));
21569 //		printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
21570 		if (region_offset_delta_size < num_pages_delta_size ||
21571 		    os_add3_overflow(footprint_region->cfr_num_pages,
21572 		    (uint32_t) num_pages_delta,
21573 		    1,
21574 		    &num_pages_tmp)) {
21575 			/*
21576 			 * Storing data for this gap would take more space
21577 			 * than inserting a new footprint region header:
21578 			 * let's start a new region and save space. If it's a
21579 			 * tie, let's avoid using a new region, since that
21580 			 * would require more region hops to find the right
21581 			 * range during lookups.
21582 			 *
21583 			 * If the current region's cfr_num_pages would overflow
21584 			 * if we added "zero" page dispositions for the gap,
21585 			 * no choice but to start a new region.
21586 			 */
21587 //			printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
21588 			new_footprint_region =
21589 			    vm_map_corpse_footprint_new_region(footprint_header);
21590 			/* check that we're not going over the edge */
21591 			if (new_footprint_region == NULL) {
21592 				goto over_the_edge;
21593 			}
21594 			footprint_region = new_footprint_region;
21595 			/* initialize new region as empty */
21596 			footprint_region->cfr_vaddr = old_entry->vme_start;
21597 			footprint_region->cfr_num_pages = 0;
21598 		} else {
21599 			/*
21600 			 * Store "zero" page dispositions for the missing
21601 			 * pages.
21602 			 */
21603 //			printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
21604 			for (; num_pages_delta > 0; num_pages_delta--) {
21605 				next_disp_p = (cf_disp_t *)
21606 				    ((uintptr_t) footprint_region +
21607 				    sizeof(*footprint_region));
21608 				next_disp_p += footprint_region->cfr_num_pages;
21609 				/* check that we're not going over the edge */
21610 				if ((uintptr_t)next_disp_p >= footprint_edge) {
21611 					goto over_the_edge;
21612 				}
21613 				/* store "zero" disposition for this gap page */
21614 				footprint_region->cfr_num_pages++;
21615 				*next_disp_p = (cf_disp_t) 0;
21616 				footprint_header->cf_last_zeroes++;
21617 			}
21618 		}
21619 	}
21620 
21621 	for (va = old_entry->vme_start;
21622 	    va < old_entry->vme_end;
21623 	    va += effective_page_size) {
21624 		int             disposition;
21625 		cf_disp_t       cf_disp;
21626 
21627 		vm_map_footprint_query_page_info(old_map,
21628 		    old_entry,
21629 		    va,
21630 		    &disposition);
21631 		cf_disp = vm_page_disposition_to_cf_disp(disposition);
21632 
21633 //		if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
21634 
21635 		if (cf_disp == 0 && footprint_region->cfr_num_pages == 0) {
21636 			/*
21637 			 * Ignore "zero" dispositions at start of
21638 			 * region: just move start of region.
21639 			 */
21640 			footprint_region->cfr_vaddr += effective_page_size;
21641 			continue;
21642 		}
21643 
21644 		/* would region's cfr_num_pages overflow? */
21645 		if (os_add_overflow(footprint_region->cfr_num_pages, 1,
21646 		    &num_pages_tmp)) {
21647 			/* overflow: create a new region */
21648 			new_footprint_region =
21649 			    vm_map_corpse_footprint_new_region(
21650 				footprint_header);
21651 			if (new_footprint_region == NULL) {
21652 				goto over_the_edge;
21653 			}
21654 			footprint_region = new_footprint_region;
21655 			footprint_region->cfr_vaddr = va;
21656 			footprint_region->cfr_num_pages = 0;
21657 		}
21658 
21659 		next_disp_p = (cf_disp_t *) ((uintptr_t) footprint_region +
21660 		    sizeof(*footprint_region));
21661 		next_disp_p += footprint_region->cfr_num_pages;
21662 		/* check that we're not going over the edge */
21663 		if ((uintptr_t)next_disp_p >= footprint_edge) {
21664 			goto over_the_edge;
21665 		}
21666 		/* store this dispostion */
21667 		*next_disp_p = cf_disp;
21668 		footprint_region->cfr_num_pages++;
21669 
21670 		if (cf_disp != 0) {
21671 			/* non-zero disp: break the current zero streak */
21672 			footprint_header->cf_last_zeroes = 0;
21673 			/* done */
21674 			continue;
21675 		}
21676 
21677 		/* zero disp: add to the current streak of zeroes */
21678 		footprint_header->cf_last_zeroes++;
21679 		if ((footprint_header->cf_last_zeroes +
21680 		    roundup(((footprint_region->cfr_num_pages -
21681 		    footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)) &
21682 		    (sizeof(int) - 1),
21683 		    sizeof(int))) <
21684 		    (sizeof(*footprint_header))) {
21685 			/*
21686 			 * There are not enough trailing "zero" dispositions
21687 			 * (+ the extra padding we would need for the previous
21688 			 * region); creating a new region would not save space
21689 			 * at this point, so let's keep this "zero" disposition
21690 			 * in this region and reconsider later.
21691 			 */
21692 			continue;
21693 		}
21694 		/*
21695 		 * Create a new region to avoid having too many consecutive
21696 		 * "zero" dispositions.
21697 		 */
21698 		new_footprint_region =
21699 		    vm_map_corpse_footprint_new_region(footprint_header);
21700 		if (new_footprint_region == NULL) {
21701 			goto over_the_edge;
21702 		}
21703 		footprint_region = new_footprint_region;
21704 		/* initialize the new region as empty ... */
21705 		footprint_region->cfr_num_pages = 0;
21706 		/* ... and skip this "zero" disp */
21707 		footprint_region->cfr_vaddr = va + effective_page_size;
21708 	}
21709 
21710 	return KERN_SUCCESS;
21711 
21712 over_the_edge:
21713 //	printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
21714 	vm_map_corpse_footprint_full++;
21715 	return KERN_RESOURCE_SHORTAGE;
21716 }
21717 
21718 /*
21719  * vm_map_corpse_footprint_collect_done:
21720  *	completes the footprint collection by getting rid of any remaining
21721  *	trailing "zero" dispositions and trimming the unused part of the
21722  *	kernel buffer
21723  */
21724 void
vm_map_corpse_footprint_collect_done(vm_map_t new_map)21725 vm_map_corpse_footprint_collect_done(
21726 	vm_map_t        new_map)
21727 {
21728 	struct vm_map_corpse_footprint_header *footprint_header;
21729 	struct vm_map_corpse_footprint_region *footprint_region;
21730 	vm_size_t       buf_size, actual_size;
21731 	kern_return_t   kr;
21732 
21733 	assert(new_map->has_corpse_footprint);
21734 	if (!new_map->has_corpse_footprint ||
21735 	    new_map->vmmap_corpse_footprint == NULL) {
21736 		return;
21737 	}
21738 
21739 	footprint_header = (struct vm_map_corpse_footprint_header *)
21740 	    new_map->vmmap_corpse_footprint;
21741 	buf_size = footprint_header->cf_size;
21742 
21743 	footprint_region = (struct vm_map_corpse_footprint_region *)
21744 	    ((char *)footprint_header +
21745 	    footprint_header->cf_last_region);
21746 
21747 	/* get rid of trailing zeroes in last region */
21748 	assert(footprint_region->cfr_num_pages >= footprint_header->cf_last_zeroes);
21749 	footprint_region->cfr_num_pages -= footprint_header->cf_last_zeroes;
21750 	footprint_header->cf_last_zeroes = 0;
21751 
21752 	actual_size = (vm_size_t)(footprint_header->cf_last_region +
21753 	    sizeof(*footprint_region) +
21754 	    (footprint_region->cfr_num_pages * sizeof(cf_disp_t)));
21755 
21756 //	printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
21757 	vm_map_corpse_footprint_size_avg =
21758 	    (((vm_map_corpse_footprint_size_avg *
21759 	    vm_map_corpse_footprint_count) +
21760 	    actual_size) /
21761 	    (vm_map_corpse_footprint_count + 1));
21762 	vm_map_corpse_footprint_count++;
21763 	if (actual_size > vm_map_corpse_footprint_size_max) {
21764 		vm_map_corpse_footprint_size_max = actual_size;
21765 	}
21766 
21767 	actual_size = round_page(actual_size);
21768 	if (buf_size > actual_size) {
21769 		kr = vm_deallocate(kernel_map,
21770 		    ((vm_address_t)footprint_header +
21771 		    actual_size +
21772 		    PAGE_SIZE),                 /* trailing guard page */
21773 		    (buf_size - actual_size));
21774 		assertf(kr == KERN_SUCCESS,
21775 		    "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21776 		    footprint_header,
21777 		    (uint64_t) buf_size,
21778 		    (uint64_t) actual_size,
21779 		    kr);
21780 		kr = vm_protect(kernel_map,
21781 		    ((vm_address_t)footprint_header +
21782 		    actual_size),
21783 		    PAGE_SIZE,
21784 		    FALSE,             /* set_maximum */
21785 		    VM_PROT_NONE);
21786 		assertf(kr == KERN_SUCCESS,
21787 		    "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21788 		    footprint_header,
21789 		    (uint64_t) buf_size,
21790 		    (uint64_t) actual_size,
21791 		    kr);
21792 	}
21793 
21794 	footprint_header->cf_size = actual_size;
21795 }
21796 
21797 /*
21798  * vm_map_corpse_footprint_query_page_info:
21799  *	retrieves the disposition of the page at virtual address "vaddr"
21800  *	in the forked corpse's VM map
21801  *
21802  * This is the equivalent of vm_map_footprint_query_page_info() for a forked corpse.
21803  */
21804 kern_return_t
vm_map_corpse_footprint_query_page_info(vm_map_t map,vm_map_offset_t va,int * disposition_p)21805 vm_map_corpse_footprint_query_page_info(
21806 	vm_map_t        map,
21807 	vm_map_offset_t va,
21808 	int             *disposition_p)
21809 {
21810 	struct vm_map_corpse_footprint_header *footprint_header;
21811 	struct vm_map_corpse_footprint_region *footprint_region;
21812 	uint32_t        footprint_region_offset;
21813 	vm_map_offset_t region_start, region_end;
21814 	int             disp_idx;
21815 	kern_return_t   kr;
21816 	int             effective_page_size;
21817 	cf_disp_t       cf_disp;
21818 
21819 	if (!map->has_corpse_footprint) {
21820 		*disposition_p = 0;
21821 		kr = KERN_INVALID_ARGUMENT;
21822 		goto done;
21823 	}
21824 
21825 	footprint_header = map->vmmap_corpse_footprint;
21826 	if (footprint_header == NULL) {
21827 		*disposition_p = 0;
21828 //		if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21829 		kr = KERN_INVALID_ARGUMENT;
21830 		goto done;
21831 	}
21832 
21833 	/* start looking at the hint ("cf_hint_region") */
21834 	footprint_region_offset = footprint_header->cf_hint_region;
21835 
21836 	effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(map));
21837 
21838 lookup_again:
21839 	if (footprint_region_offset < sizeof(*footprint_header)) {
21840 		/* hint too low: start from 1st region */
21841 		footprint_region_offset = sizeof(*footprint_header);
21842 	}
21843 	if (footprint_region_offset >= footprint_header->cf_last_region) {
21844 		/* hint too high: re-start from 1st region */
21845 		footprint_region_offset = sizeof(*footprint_header);
21846 	}
21847 	footprint_region = (struct vm_map_corpse_footprint_region *)
21848 	    ((char *)footprint_header + footprint_region_offset);
21849 	region_start = footprint_region->cfr_vaddr;
21850 	region_end = (region_start +
21851 	    ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
21852 	    effective_page_size));
21853 	if (va < region_start &&
21854 	    footprint_region_offset != sizeof(*footprint_header)) {
21855 		/* our range starts before the hint region */
21856 
21857 		/* reset the hint (in a racy way...) */
21858 		footprint_header->cf_hint_region = sizeof(*footprint_header);
21859 		/* lookup "va" again from 1st region */
21860 		footprint_region_offset = sizeof(*footprint_header);
21861 		goto lookup_again;
21862 	}
21863 
21864 	while (va >= region_end) {
21865 		if (footprint_region_offset >= footprint_header->cf_last_region) {
21866 			break;
21867 		}
21868 		/* skip the region's header */
21869 		footprint_region_offset += sizeof(*footprint_region);
21870 		/* skip the region's page dispositions */
21871 		footprint_region_offset += (footprint_region->cfr_num_pages * sizeof(cf_disp_t));
21872 		/* align to next word boundary */
21873 		footprint_region_offset =
21874 		    roundup(footprint_region_offset,
21875 		    sizeof(int));
21876 		footprint_region = (struct vm_map_corpse_footprint_region *)
21877 		    ((char *)footprint_header + footprint_region_offset);
21878 		region_start = footprint_region->cfr_vaddr;
21879 		region_end = (region_start +
21880 		    ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
21881 		    effective_page_size));
21882 	}
21883 	if (va < region_start || va >= region_end) {
21884 		/* page not found */
21885 		*disposition_p = 0;
21886 //		if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21887 		kr = KERN_SUCCESS;
21888 		goto done;
21889 	}
21890 
21891 	/* "va" found: set the lookup hint for next lookup (in a racy way...) */
21892 	footprint_header->cf_hint_region = footprint_region_offset;
21893 
21894 	/* get page disposition for "va" in this region */
21895 	disp_idx = (int) ((va - footprint_region->cfr_vaddr) / effective_page_size);
21896 	cf_disp = footprint_region->cfr_disposition[disp_idx];
21897 	*disposition_p = vm_page_cf_disp_to_disposition(cf_disp);
21898 	kr = KERN_SUCCESS;
21899 done:
21900 //	if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21901 	/* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
21902 	DTRACE_VM4(footprint_query_page_info,
21903 	    vm_map_t, map,
21904 	    vm_map_offset_t, va,
21905 	    int, *disposition_p,
21906 	    kern_return_t, kr);
21907 
21908 	return kr;
21909 }
21910 
21911 void
vm_map_corpse_footprint_destroy(vm_map_t map)21912 vm_map_corpse_footprint_destroy(
21913 	vm_map_t        map)
21914 {
21915 	if (map->has_corpse_footprint &&
21916 	    map->vmmap_corpse_footprint != 0) {
21917 		struct vm_map_corpse_footprint_header *footprint_header;
21918 		vm_size_t buf_size;
21919 		kern_return_t kr;
21920 
21921 		footprint_header = map->vmmap_corpse_footprint;
21922 		buf_size = footprint_header->cf_size;
21923 		kr = vm_deallocate(kernel_map,
21924 		    (vm_offset_t) map->vmmap_corpse_footprint,
21925 		    ((vm_size_t) buf_size
21926 		    + PAGE_SIZE));                 /* trailing guard page */
21927 		assertf(kr == KERN_SUCCESS, "kr=0x%x\n", kr);
21928 		map->vmmap_corpse_footprint = 0;
21929 		map->has_corpse_footprint = FALSE;
21930 	}
21931 }
21932 
21933 /*
21934  * vm_map_copy_footprint_ledgers:
21935  *	copies any ledger that's relevant to the memory footprint of "old_task"
21936  *	into the forked corpse's task ("new_task")
21937  */
21938 void
vm_map_copy_footprint_ledgers(task_t old_task,task_t new_task)21939 vm_map_copy_footprint_ledgers(
21940 	task_t  old_task,
21941 	task_t  new_task)
21942 {
21943 	vm_map_copy_ledger(old_task, new_task, task_ledgers.phys_footprint);
21944 	vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile);
21945 	vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile_compressed);
21946 	vm_map_copy_ledger(old_task, new_task, task_ledgers.internal);
21947 	vm_map_copy_ledger(old_task, new_task, task_ledgers.internal_compressed);
21948 	vm_map_copy_ledger(old_task, new_task, task_ledgers.iokit_mapped);
21949 	vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting);
21950 	vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting_compressed);
21951 	vm_map_copy_ledger(old_task, new_task, task_ledgers.page_table);
21952 	vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint);
21953 	vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint_compressed);
21954 	vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile);
21955 	vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile_compressed);
21956 	vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint);
21957 	vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint_compressed);
21958 	vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint);
21959 	vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint_compressed);
21960 	vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint);
21961 	vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint_compressed);
21962 	vm_map_copy_ledger(old_task, new_task, task_ledgers.wired_mem);
21963 }
21964 
21965 /*
21966  * vm_map_copy_ledger:
21967  *	copy a single ledger from "old_task" to "new_task"
21968  */
21969 void
vm_map_copy_ledger(task_t old_task,task_t new_task,int ledger_entry)21970 vm_map_copy_ledger(
21971 	task_t  old_task,
21972 	task_t  new_task,
21973 	int     ledger_entry)
21974 {
21975 	ledger_amount_t old_balance, new_balance, delta;
21976 
21977 	assert(new_task->map->has_corpse_footprint);
21978 	if (!new_task->map->has_corpse_footprint) {
21979 		return;
21980 	}
21981 
21982 	/* turn off sanity checks for the ledger we're about to mess with */
21983 	ledger_disable_panic_on_negative(new_task->ledger,
21984 	    ledger_entry);
21985 
21986 	/* adjust "new_task" to match "old_task" */
21987 	ledger_get_balance(old_task->ledger,
21988 	    ledger_entry,
21989 	    &old_balance);
21990 	ledger_get_balance(new_task->ledger,
21991 	    ledger_entry,
21992 	    &new_balance);
21993 	if (new_balance == old_balance) {
21994 		/* new == old: done */
21995 	} else if (new_balance > old_balance) {
21996 		/* new > old ==> new -= new - old */
21997 		delta = new_balance - old_balance;
21998 		ledger_debit(new_task->ledger,
21999 		    ledger_entry,
22000 		    delta);
22001 	} else {
22002 		/* new < old ==> new += old - new */
22003 		delta = old_balance - new_balance;
22004 		ledger_credit(new_task->ledger,
22005 		    ledger_entry,
22006 		    delta);
22007 	}
22008 }
22009 
22010 /*
22011  * vm_map_get_pmap:
22012  * returns the pmap associated with the vm_map
22013  */
22014 pmap_t
vm_map_get_pmap(vm_map_t map)22015 vm_map_get_pmap(vm_map_t map)
22016 {
22017 	return vm_map_pmap(map);
22018 }
22019 
22020 #if MACH_ASSERT
22021 
22022 extern int pmap_ledgers_panic;
22023 extern int pmap_ledgers_panic_leeway;
22024 
22025 #define LEDGER_DRIFT(__LEDGER)                    \
22026 	int             __LEDGER##_over;          \
22027 	ledger_amount_t __LEDGER##_over_total;    \
22028 	ledger_amount_t __LEDGER##_over_max;      \
22029 	int             __LEDGER##_under;         \
22030 	ledger_amount_t __LEDGER##_under_total;   \
22031 	ledger_amount_t __LEDGER##_under_max
22032 
22033 struct {
22034 	uint64_t        num_pmaps_checked;
22035 
22036 	LEDGER_DRIFT(phys_footprint);
22037 	LEDGER_DRIFT(internal);
22038 	LEDGER_DRIFT(internal_compressed);
22039 	LEDGER_DRIFT(external);
22040 	LEDGER_DRIFT(reusable);
22041 	LEDGER_DRIFT(iokit_mapped);
22042 	LEDGER_DRIFT(alternate_accounting);
22043 	LEDGER_DRIFT(alternate_accounting_compressed);
22044 	LEDGER_DRIFT(page_table);
22045 	LEDGER_DRIFT(purgeable_volatile);
22046 	LEDGER_DRIFT(purgeable_nonvolatile);
22047 	LEDGER_DRIFT(purgeable_volatile_compressed);
22048 	LEDGER_DRIFT(purgeable_nonvolatile_compressed);
22049 	LEDGER_DRIFT(tagged_nofootprint);
22050 	LEDGER_DRIFT(tagged_footprint);
22051 	LEDGER_DRIFT(tagged_nofootprint_compressed);
22052 	LEDGER_DRIFT(tagged_footprint_compressed);
22053 	LEDGER_DRIFT(network_volatile);
22054 	LEDGER_DRIFT(network_nonvolatile);
22055 	LEDGER_DRIFT(network_volatile_compressed);
22056 	LEDGER_DRIFT(network_nonvolatile_compressed);
22057 	LEDGER_DRIFT(media_nofootprint);
22058 	LEDGER_DRIFT(media_footprint);
22059 	LEDGER_DRIFT(media_nofootprint_compressed);
22060 	LEDGER_DRIFT(media_footprint_compressed);
22061 	LEDGER_DRIFT(graphics_nofootprint);
22062 	LEDGER_DRIFT(graphics_footprint);
22063 	LEDGER_DRIFT(graphics_nofootprint_compressed);
22064 	LEDGER_DRIFT(graphics_footprint_compressed);
22065 	LEDGER_DRIFT(neural_nofootprint);
22066 	LEDGER_DRIFT(neural_footprint);
22067 	LEDGER_DRIFT(neural_nofootprint_compressed);
22068 	LEDGER_DRIFT(neural_footprint_compressed);
22069 } pmap_ledgers_drift;
22070 
22071 void
vm_map_pmap_check_ledgers(pmap_t pmap,ledger_t ledger,int pid,char * procname)22072 vm_map_pmap_check_ledgers(
22073 	pmap_t          pmap,
22074 	ledger_t        ledger,
22075 	int             pid,
22076 	char            *procname)
22077 {
22078 	ledger_amount_t bal;
22079 	boolean_t       do_panic;
22080 
22081 	do_panic = FALSE;
22082 
22083 	pmap_ledgers_drift.num_pmaps_checked++;
22084 
22085 #define LEDGER_CHECK_BALANCE(__LEDGER)                                  \
22086 MACRO_BEGIN                                                             \
22087 	int panic_on_negative = TRUE;                                   \
22088 	ledger_get_balance(ledger,                                      \
22089 	                   task_ledgers.__LEDGER,                       \
22090 	                   &bal);                                       \
22091 	ledger_get_panic_on_negative(ledger,                            \
22092 	                             task_ledgers.__LEDGER,             \
22093 	                             &panic_on_negative);               \
22094 	if (bal != 0) {                                                 \
22095 	        if (panic_on_negative ||                                \
22096 	            (pmap_ledgers_panic &&                              \
22097 	             pmap_ledgers_panic_leeway > 0 &&                   \
22098 	             (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) ||  \
22099 	              bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
22100 	                do_panic = TRUE;                                \
22101 	        }                                                       \
22102 	        printf("LEDGER BALANCE proc %d (%s) "                   \
22103 	               "\"%s\" = %lld\n",                               \
22104 	               pid, procname, #__LEDGER, bal);                  \
22105 	        if (bal > 0) {                                          \
22106 	                pmap_ledgers_drift.__LEDGER##_over++;           \
22107 	                pmap_ledgers_drift.__LEDGER##_over_total += bal; \
22108 	                if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
22109 	                        pmap_ledgers_drift.__LEDGER##_over_max = bal; \
22110 	                }                                               \
22111 	        } else if (bal < 0) {                                   \
22112 	                pmap_ledgers_drift.__LEDGER##_under++;          \
22113 	                pmap_ledgers_drift.__LEDGER##_under_total += bal; \
22114 	                if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
22115 	                        pmap_ledgers_drift.__LEDGER##_under_max = bal; \
22116 	                }                                               \
22117 	        }                                                       \
22118 	}                                                               \
22119 MACRO_END
22120 
22121 	LEDGER_CHECK_BALANCE(phys_footprint);
22122 	LEDGER_CHECK_BALANCE(internal);
22123 	LEDGER_CHECK_BALANCE(internal_compressed);
22124 	LEDGER_CHECK_BALANCE(external);
22125 	LEDGER_CHECK_BALANCE(reusable);
22126 	LEDGER_CHECK_BALANCE(iokit_mapped);
22127 	LEDGER_CHECK_BALANCE(alternate_accounting);
22128 	LEDGER_CHECK_BALANCE(alternate_accounting_compressed);
22129 	LEDGER_CHECK_BALANCE(page_table);
22130 	LEDGER_CHECK_BALANCE(purgeable_volatile);
22131 	LEDGER_CHECK_BALANCE(purgeable_nonvolatile);
22132 	LEDGER_CHECK_BALANCE(purgeable_volatile_compressed);
22133 	LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed);
22134 	LEDGER_CHECK_BALANCE(tagged_nofootprint);
22135 	LEDGER_CHECK_BALANCE(tagged_footprint);
22136 	LEDGER_CHECK_BALANCE(tagged_nofootprint_compressed);
22137 	LEDGER_CHECK_BALANCE(tagged_footprint_compressed);
22138 	LEDGER_CHECK_BALANCE(network_volatile);
22139 	LEDGER_CHECK_BALANCE(network_nonvolatile);
22140 	LEDGER_CHECK_BALANCE(network_volatile_compressed);
22141 	LEDGER_CHECK_BALANCE(network_nonvolatile_compressed);
22142 	LEDGER_CHECK_BALANCE(media_nofootprint);
22143 	LEDGER_CHECK_BALANCE(media_footprint);
22144 	LEDGER_CHECK_BALANCE(media_nofootprint_compressed);
22145 	LEDGER_CHECK_BALANCE(media_footprint_compressed);
22146 	LEDGER_CHECK_BALANCE(graphics_nofootprint);
22147 	LEDGER_CHECK_BALANCE(graphics_footprint);
22148 	LEDGER_CHECK_BALANCE(graphics_nofootprint_compressed);
22149 	LEDGER_CHECK_BALANCE(graphics_footprint_compressed);
22150 	LEDGER_CHECK_BALANCE(neural_nofootprint);
22151 	LEDGER_CHECK_BALANCE(neural_footprint);
22152 	LEDGER_CHECK_BALANCE(neural_nofootprint_compressed);
22153 	LEDGER_CHECK_BALANCE(neural_footprint_compressed);
22154 
22155 	if (do_panic) {
22156 		if (pmap_ledgers_panic) {
22157 			panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers",
22158 			    pmap, pid, procname);
22159 		} else {
22160 			printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
22161 			    pmap, pid, procname);
22162 		}
22163 	}
22164 }
22165 #endif /* MACH_ASSERT */
22166