xref: /xnu-8019.80.24/osfmk/vm/vm_kern.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 /*
59  *	File:	vm/vm_kern.c
60  *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
61  *	Date:	1985
62  *
63  *	Kernel memory management.
64  */
65 
66 #include <mach/kern_return.h>
67 #include <mach/vm_param.h>
68 #include <kern/assert.h>
69 #include <kern/thread.h>
70 #include <vm/vm_kern.h>
71 #include <vm/vm_map.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_compressor.h>
75 #include <vm/vm_pageout.h>
76 #include <kern/misc_protos.h>
77 #include <vm/cpm.h>
78 #include <kern/ledger.h>
79 #include <kern/bits.h>
80 #include <kern/startup.h>
81 
82 #include <string.h>
83 
84 #include <libkern/OSDebug.h>
85 #include <libkern/crypto/sha2.h>
86 #include <libkern/section_keywords.h>
87 #include <sys/kdebug.h>
88 
89 #include <san/kasan.h>
90 
91 /*
92  *	Variables exported by this module.
93  */
94 
95 SECURITY_READ_ONLY_LATE(vm_map_t) kernel_map;
96 vm_map_t         kernel_pageable_map;
97 
98 /*
99  * Forward declarations for internal functions.
100  */
101 extern kern_return_t kmem_alloc_pages(
102 	vm_object_t             object,
103 	vm_object_offset_t      offset,
104 	vm_object_size_t        size);
105 
106 kern_return_t
kmem_alloc_contig(vm_map_t map,vm_offset_t * addrp,vm_size_t size,vm_offset_t mask,ppnum_t max_pnum,ppnum_t pnum_mask,kma_flags_t flags,vm_tag_t tag)107 kmem_alloc_contig(
108 	vm_map_t                map,
109 	vm_offset_t             *addrp,
110 	vm_size_t               size,
111 	vm_offset_t             mask,
112 	ppnum_t                 max_pnum,
113 	ppnum_t                 pnum_mask,
114 	kma_flags_t             flags,
115 	vm_tag_t                tag)
116 {
117 	vm_object_t             object;
118 	vm_object_offset_t      offset;
119 	vm_map_offset_t         map_addr;
120 	vm_map_offset_t         map_mask;
121 	vm_map_size_t           map_size, i;
122 	vm_map_entry_t          entry;
123 	vm_page_t               m, pages;
124 	kern_return_t           kr;
125 
126 	assert(VM_KERN_MEMORY_NONE != tag);
127 
128 	if (map == VM_MAP_NULL || (flags & ~(KMA_KOBJECT | KMA_LOMEM | KMA_NOPAGEWAIT))) {
129 		return KERN_INVALID_ARGUMENT;
130 	}
131 
132 	map_size = vm_map_round_page(size,
133 	    VM_MAP_PAGE_MASK(map));
134 	map_mask = (vm_map_offset_t)mask;
135 
136 	/* Check for zero allocation size (either directly or via overflow) */
137 	if (map_size == 0) {
138 		*addrp = 0;
139 		return KERN_INVALID_ARGUMENT;
140 	}
141 
142 	/*
143 	 *	Allocate a new object (if necessary) and the reference we
144 	 *	will be donating to the map entry.  We must do this before
145 	 *	locking the map, or risk deadlock with the default pager.
146 	 */
147 	if ((flags & KMA_KOBJECT) != 0) {
148 		object = kernel_object;
149 		vm_object_reference(object);
150 	} else {
151 		object = vm_object_allocate(map_size);
152 	}
153 
154 	kr = vm_map_find_space(map, &map_addr, map_size, map_mask, 0,
155 	    VM_MAP_KERNEL_FLAGS_NONE, tag, &entry);
156 	if (KERN_SUCCESS != kr) {
157 		vm_object_deallocate(object);
158 		return kr;
159 	}
160 
161 	if (object == kernel_object) {
162 		offset = map_addr;
163 	} else {
164 		offset = 0;
165 	}
166 	VME_OBJECT_SET(entry, object);
167 	VME_OFFSET_SET(entry, offset);
168 
169 	/* Take an extra object ref in case the map entry gets deleted */
170 	vm_object_reference(object);
171 	vm_map_unlock(map);
172 
173 	kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, pnum_mask, FALSE, flags);
174 
175 	if (kr != KERN_SUCCESS) {
176 		vm_map_remove(map,
177 		    vm_map_trunc_page(map_addr,
178 		    VM_MAP_PAGE_MASK(map)),
179 		    vm_map_round_page(map_addr + map_size,
180 		    VM_MAP_PAGE_MASK(map)),
181 		    VM_MAP_REMOVE_NO_FLAGS);
182 		vm_object_deallocate(object);
183 		*addrp = 0;
184 		return kr;
185 	}
186 
187 	vm_object_lock(object);
188 	for (i = 0; i < map_size; i += PAGE_SIZE) {
189 		m = pages;
190 		pages = NEXT_PAGE(m);
191 		*(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
192 		m->vmp_busy = FALSE;
193 		vm_page_insert(m, object, offset + i);
194 	}
195 	vm_object_unlock(object);
196 
197 	kr = vm_map_wire_kernel(map,
198 	    vm_map_trunc_page(map_addr,
199 	    VM_MAP_PAGE_MASK(map)),
200 	    vm_map_round_page(map_addr + map_size,
201 	    VM_MAP_PAGE_MASK(map)),
202 	    VM_PROT_DEFAULT, tag,
203 	    FALSE);
204 
205 	if (kr != KERN_SUCCESS) {
206 		if (object == kernel_object) {
207 			vm_object_lock(object);
208 			vm_object_page_remove(object, offset, offset + map_size);
209 			vm_object_unlock(object);
210 		}
211 		vm_map_remove(map,
212 		    vm_map_trunc_page(map_addr,
213 		    VM_MAP_PAGE_MASK(map)),
214 		    vm_map_round_page(map_addr + map_size,
215 		    VM_MAP_PAGE_MASK(map)),
216 		    VM_MAP_REMOVE_NO_FLAGS);
217 		vm_object_deallocate(object);
218 		return kr;
219 	}
220 	vm_object_deallocate(object);
221 
222 	if (object == kernel_object) {
223 		vm_map_simplify(map, map_addr);
224 		vm_tag_update_size(tag, map_size);
225 	}
226 	*addrp = (vm_offset_t) map_addr;
227 	assert((vm_map_offset_t) *addrp == map_addr);
228 
229 	return KERN_SUCCESS;
230 }
231 
232 /*
233  * Master entry point for allocating kernel memory.
234  * NOTE: this routine is _never_ interrupt safe.
235  *
236  * map		: map to allocate into
237  * addrp	: pointer to start address of new memory
238  * size		: size of memory requested
239  * flags	: options
240  *		  KMA_HERE		*addrp is base address, else "anywhere"
241  *		  KMA_NOPAGEWAIT	don't wait for pages if unavailable
242  *		  KMA_KOBJECT		use kernel_object
243  *		  KMA_LOMEM		support for 32 bit devices in a 64 bit world
244  *					if set and a lomemory pool is available
245  *					grab pages from it... this also implies
246  *					KMA_NOPAGEWAIT
247  */
248 
249 kern_return_t
kernel_memory_allocate(vm_map_t map,vm_offset_t * addrp,vm_size_t size,vm_offset_t mask,kma_flags_t flags,vm_tag_t tag)250 kernel_memory_allocate(
251 	vm_map_t        map,
252 	vm_offset_t     *addrp,
253 	vm_size_t       size,
254 	vm_offset_t     mask,
255 	kma_flags_t     flags,
256 	vm_tag_t        tag)
257 {
258 	return kernel_memory_allocate_prot(map, addrp, size, mask, flags, tag,
259 	           VM_PROT_DEFAULT, VM_PROT_ALL);
260 }
261 
262 kern_return_t
kernel_memory_allocate_prot(vm_map_t map,vm_offset_t * addrp,vm_size_t size,vm_offset_t mask,kma_flags_t flags,vm_tag_t tag,vm_prot_t protection,vm_prot_t max_protection)263 kernel_memory_allocate_prot(
264 	vm_map_t        map,
265 	vm_offset_t     *addrp,
266 	vm_size_t       size,
267 	vm_offset_t     mask,
268 	kma_flags_t     flags,
269 	vm_tag_t        tag,
270 	vm_prot_t               protection,
271 	vm_prot_t               max_protection)
272 {
273 	vm_object_t             object;
274 	vm_object_offset_t      offset;
275 	vm_object_offset_t      pg_offset;
276 	vm_map_entry_t          entry = NULL;
277 	vm_map_offset_t         map_addr, fill_start;
278 	vm_map_offset_t         map_mask;
279 	vm_map_size_t           map_size, fill_size;
280 	kern_return_t           kr, pe_result;
281 	vm_page_t               mem;
282 	vm_page_t               guard_page_list = NULL;
283 	vm_page_t               wired_page_list = NULL;
284 	int                     guard_page_count = 0;
285 	int                     wired_page_count = 0;
286 	int                     vm_alloc_flags;
287 	vm_map_kernel_flags_t   vmk_flags;
288 	vm_prot_t               kma_prot;
289 
290 	if (startup_phase < STARTUP_SUB_KMEM) {
291 		panic("kernel_memory_allocate: VM is not ready");
292 	}
293 
294 	map_size = vm_map_round_page(size,
295 	    VM_MAP_PAGE_MASK(map));
296 	map_mask = (vm_map_offset_t) mask;
297 
298 	vm_alloc_flags = 0; //VM_MAKE_TAG(tag);
299 	vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
300 
301 	/* Check for zero allocation size (either directly or via overflow) */
302 	if (map_size == 0) {
303 		*addrp = 0;
304 		return KERN_INVALID_ARGUMENT;
305 	}
306 
307 	/*
308 	 * limit the size of a single extent of wired memory
309 	 * to try and limit the damage to the system if
310 	 * too many pages get wired down
311 	 * limit raised to 2GB with 128GB max physical limit,
312 	 * but scaled by installed memory above this
313 	 */
314 	if (!(flags & (KMA_VAONLY | KMA_PAGEABLE)) &&
315 	    map_size > MAX(1ULL << 31, sane_size / 64)) {
316 		return KERN_RESOURCE_SHORTAGE;
317 	}
318 
319 	/*
320 	 * Guard pages:
321 	 *
322 	 * Guard pages are implemented as ficticious pages.  By placing guard pages
323 	 * on either end of a stack, they can help detect cases where a thread walks
324 	 * off either end of its stack.  They are allocated and set up here and attempts
325 	 * to access those pages are trapped in vm_fault_page().
326 	 *
327 	 * The map_size we were passed may include extra space for
328 	 * guard pages.  If those were requested, then back it out of fill_size
329 	 * since vm_map_find_space() takes just the actual size not including
330 	 * guard pages.  Similarly, fill_start indicates where the actual pages
331 	 * will begin in the range.
332 	 */
333 
334 	fill_start = 0;
335 	fill_size = map_size;
336 
337 	if (flags & KMA_GUARD_FIRST) {
338 		vmk_flags.vmkf_guard_before = TRUE;
339 		fill_start += PAGE_SIZE_64;
340 		fill_size -= PAGE_SIZE_64;
341 		if (map_size < fill_start + fill_size) {
342 			/* no space for a guard page */
343 			*addrp = 0;
344 			return KERN_INVALID_ARGUMENT;
345 		}
346 		guard_page_count++;
347 	}
348 	if (flags & KMA_GUARD_LAST) {
349 		vmk_flags.vmkf_guard_after = TRUE;
350 		fill_size -= PAGE_SIZE_64;
351 		if (map_size <= fill_start + fill_size) {
352 			/* no space for a guard page */
353 			*addrp = 0;
354 			return KERN_INVALID_ARGUMENT;
355 		}
356 		guard_page_count++;
357 	}
358 	wired_page_count = (int) (fill_size / PAGE_SIZE_64);
359 	assert(wired_page_count * PAGE_SIZE_64 == fill_size);
360 
361 #if DEBUG || DEVELOPMENT
362 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START,
363 	    size, 0, 0, 0);
364 #endif
365 
366 	for (int i = 0; i < guard_page_count; i++) {
367 		mem = vm_page_grab_guard((flags & KMA_NOPAGEWAIT) == 0);
368 		if (mem == VM_PAGE_NULL) {
369 			kr = KERN_RESOURCE_SHORTAGE;
370 			goto out;
371 		}
372 		mem->vmp_snext = guard_page_list;
373 		guard_page_list = mem;
374 	}
375 
376 	if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
377 		kr = vm_page_alloc_list(wired_page_count, flags,
378 		    &wired_page_list);
379 		if (kr != KERN_SUCCESS) {
380 			goto out;
381 		}
382 	}
383 
384 	/*
385 	 *	Allocate a new object (if necessary).  We must do this before
386 	 *	locking the map, or risk deadlock with the default pager.
387 	 */
388 	if ((flags & KMA_KOBJECT) != 0) {
389 		object = kernel_object;
390 		vm_object_reference(object);
391 	} else if ((flags & KMA_COMPRESSOR) != 0) {
392 		object = compressor_object;
393 		vm_object_reference(object);
394 	} else {
395 		object = vm_object_allocate(map_size);
396 	}
397 
398 	if (flags & KMA_ATOMIC) {
399 		vmk_flags.vmkf_atomic_entry = TRUE;
400 	}
401 
402 	if (flags & KMA_LAST_FREE) {
403 		vm_alloc_flags |= VM_MAP_FIND_LAST_FREE;
404 	}
405 
406 	kr = vm_map_find_space(map, &map_addr,
407 	    fill_size, map_mask,
408 	    vm_alloc_flags, vmk_flags, tag, &entry);
409 
410 	if (KERN_SUCCESS != kr) {
411 		vm_object_deallocate(object);
412 		goto out;
413 	}
414 
415 	entry->protection = protection;
416 	entry->max_protection = max_protection;
417 
418 	if (object == kernel_object || object == compressor_object) {
419 		offset = map_addr;
420 	} else {
421 		offset = 0;
422 	}
423 	VME_OBJECT_SET(entry, object);
424 	VME_OFFSET_SET(entry, offset);
425 
426 	if (!(flags & (KMA_COMPRESSOR | KMA_PAGEABLE))) {
427 		entry->wired_count++;
428 	}
429 
430 	if (flags & KMA_PERMANENT) {
431 		entry->permanent = TRUE;
432 	}
433 
434 	if (object != kernel_object && object != compressor_object) {
435 		vm_object_reference(object);
436 	}
437 
438 	vm_object_lock(object);
439 	vm_map_unlock(map);
440 
441 	pg_offset = 0;
442 
443 	if (fill_start) {
444 		if (guard_page_list == NULL) {
445 			panic("kernel_memory_allocate: guard_page_list == NULL");
446 		}
447 
448 		mem = guard_page_list;
449 		guard_page_list = mem->vmp_snext;
450 		mem->vmp_snext = NULL;
451 
452 		vm_page_insert(mem, object, offset + pg_offset);
453 
454 		mem->vmp_busy = FALSE;
455 		pg_offset += PAGE_SIZE_64;
456 	}
457 
458 	kma_prot = VM_PROT_READ | VM_PROT_WRITE;
459 
460 #if KASAN
461 	if (!(flags & KMA_VAONLY)) {
462 		/* for VAONLY mappings we notify in populate only */
463 		kasan_notify_address(map_addr, size);
464 	}
465 #endif
466 
467 	if (flags & (KMA_VAONLY | KMA_PAGEABLE)) {
468 		pg_offset = fill_start + fill_size;
469 	} else {
470 		for (pg_offset = fill_start; pg_offset < fill_start + fill_size; pg_offset += PAGE_SIZE_64) {
471 			if (wired_page_list == NULL) {
472 				panic("kernel_memory_allocate: wired_page_list == NULL");
473 			}
474 
475 			mem = wired_page_list;
476 			wired_page_list = mem->vmp_snext;
477 			mem->vmp_snext = NULL;
478 
479 			assert(mem->vmp_wire_count == 0);
480 			assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
481 
482 			mem->vmp_q_state = VM_PAGE_IS_WIRED;
483 			mem->vmp_wire_count++;
484 			if (__improbable(mem->vmp_wire_count == 0)) {
485 				panic("kernel_memory_allocate(%p): wire_count overflow",
486 				    mem);
487 			}
488 
489 			vm_page_insert_wired(mem, object, offset + pg_offset, tag);
490 
491 			mem->vmp_busy = FALSE;
492 			mem->vmp_pmapped = TRUE;
493 			mem->vmp_wpmapped = TRUE;
494 
495 			PMAP_ENTER_OPTIONS(kernel_pmap, map_addr + pg_offset,
496 			    0, /* fault_phys_offset */
497 			    mem,
498 			    kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
499 			    PMAP_OPTIONS_NOWAIT, pe_result);
500 
501 			if (pe_result == KERN_RESOURCE_SHORTAGE) {
502 				vm_object_unlock(object);
503 
504 				PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem,
505 				    kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
506 				    pe_result);
507 
508 				vm_object_lock(object);
509 			}
510 
511 			assert(pe_result == KERN_SUCCESS);
512 
513 			if (flags & KMA_NOENCRYPT) {
514 				bzero(CAST_DOWN(void *, (map_addr + pg_offset)), PAGE_SIZE);
515 
516 				pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
517 			}
518 		}
519 		if (kernel_object == object) {
520 			vm_tag_update_size(tag, fill_size);
521 		}
522 	}
523 	if ((fill_start + fill_size) < map_size) {
524 		if (guard_page_list == NULL) {
525 			panic("kernel_memory_allocate: guard_page_list == NULL");
526 		}
527 
528 		mem = guard_page_list;
529 		guard_page_list = mem->vmp_snext;
530 		mem->vmp_snext = NULL;
531 
532 		vm_page_insert(mem, object, offset + pg_offset);
533 
534 		mem->vmp_busy = FALSE;
535 	}
536 	if (guard_page_list || wired_page_list) {
537 		panic("kernel_memory_allocate: non empty list");
538 	}
539 
540 	if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
541 		vm_page_lockspin_queues();
542 		vm_page_wire_count += wired_page_count;
543 		vm_page_unlock_queues();
544 	}
545 
546 	vm_object_unlock(object);
547 
548 	/*
549 	 * now that the pages are wired, we no longer have to fear coalesce
550 	 */
551 	if (object == kernel_object || object == compressor_object) {
552 		vm_map_simplify(map, map_addr);
553 	} else {
554 		vm_object_deallocate(object);
555 	}
556 
557 #if DEBUG || DEVELOPMENT
558 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
559 	    wired_page_count, 0, 0, 0);
560 #endif
561 	/*
562 	 *	Return the memory, not zeroed.
563 	 */
564 	*addrp = CAST_DOWN(vm_offset_t, map_addr);
565 	return KERN_SUCCESS;
566 
567 out:
568 	if (guard_page_list) {
569 		vm_page_free_list(guard_page_list, FALSE);
570 	}
571 
572 	if (wired_page_list) {
573 		vm_page_free_list(wired_page_list, FALSE);
574 	}
575 
576 #if DEBUG || DEVELOPMENT
577 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
578 	    wired_page_count, 0, 0, 0);
579 #endif
580 	return kr;
581 }
582 
583 void
kernel_memory_populate_with_pages(vm_map_t map,vm_offset_t addr,vm_size_t size,vm_page_t page_list,kma_flags_t flags,vm_tag_t tag,vm_prot_t prot)584 kernel_memory_populate_with_pages(
585 	vm_map_t        map,
586 	vm_offset_t     addr,
587 	vm_size_t       size,
588 	vm_page_t       page_list,
589 	kma_flags_t     flags,
590 	vm_tag_t        tag,
591 	vm_prot_t       prot)
592 {
593 	vm_object_t     object;
594 	kern_return_t   pe_result;
595 	vm_page_t       mem;
596 	int             page_count = atop_64(size);
597 
598 	if (flags & KMA_COMPRESSOR) {
599 		panic("%s(%p,0x%llx,0x%llx,0x%x): KMA_COMPRESSOR", __func__,
600 		    map, (uint64_t) addr, (uint64_t) size, flags);
601 	}
602 
603 	if (flags & KMA_KOBJECT) {
604 		object = kernel_object;
605 
606 		vm_object_lock(object);
607 	} else {
608 		/*
609 		 * If it's not the kernel object, we need to:
610 		 *      lock map;
611 		 *      lookup entry;
612 		 *      lock object;
613 		 *	take reference on object;
614 		 *      unlock map;
615 		 */
616 		panic("%s(%p,0x%llx,0x%llx,0x%x): !KMA_KOBJECT", __func__,
617 		    map, (uint64_t) addr, (uint64_t) size, flags);
618 	}
619 
620 	for (vm_object_offset_t pg_offset = 0;
621 	    pg_offset < size;
622 	    pg_offset += PAGE_SIZE_64) {
623 		if (page_list == NULL) {
624 			panic("%s: page_list too short", __func__);
625 		}
626 
627 		mem = page_list;
628 		page_list = mem->vmp_snext;
629 		mem->vmp_snext = NULL;
630 
631 		assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
632 		mem->vmp_q_state = VM_PAGE_IS_WIRED;
633 		mem->vmp_wire_count++;
634 		if (mem->vmp_wire_count == 0) {
635 			panic("%s(%p): wire_count overflow", __func__, mem);
636 		}
637 
638 		vm_page_insert_wired(mem, object, addr + pg_offset, tag);
639 
640 		mem->vmp_busy = FALSE;
641 		mem->vmp_pmapped = TRUE;
642 		mem->vmp_wpmapped = TRUE;
643 
644 		PMAP_ENTER_OPTIONS(kernel_pmap, addr + pg_offset,
645 		    0, /* fault_phys_offset */
646 		    mem,
647 		    prot, VM_PROT_NONE,
648 		    ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
649 		    PMAP_OPTIONS_NOWAIT, pe_result);
650 
651 		if (pe_result == KERN_RESOURCE_SHORTAGE) {
652 			vm_object_unlock(object);
653 
654 			PMAP_ENTER(kernel_pmap, addr + pg_offset, mem,
655 			    prot, VM_PROT_NONE,
656 			    ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
657 			    pe_result);
658 
659 			vm_object_lock(object);
660 		}
661 
662 		assert(pe_result == KERN_SUCCESS);
663 
664 		if (flags & KMA_NOENCRYPT) {
665 			__nosan_bzero(CAST_DOWN(void *, (addr + pg_offset)), PAGE_SIZE);
666 			pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
667 		}
668 	}
669 	if (page_list) {
670 		panic("%s: page_list too long", __func__);
671 	}
672 	vm_object_unlock(object);
673 
674 	vm_page_lockspin_queues();
675 	vm_page_wire_count += page_count;
676 	vm_page_unlock_queues();
677 	vm_tag_update_size(tag, size);
678 
679 #if KASAN
680 	if (map == compressor_map) {
681 		kasan_notify_address_nopoison(addr, size);
682 	} else {
683 		kasan_notify_address(addr, size);
684 	}
685 #endif
686 }
687 
688 kern_return_t
kernel_memory_populate(vm_map_t map,vm_offset_t addr,vm_size_t size,kma_flags_t flags,vm_tag_t tag)689 kernel_memory_populate(
690 	vm_map_t        map,
691 	vm_offset_t     addr,
692 	vm_size_t       size,
693 	kma_flags_t     flags,
694 	vm_tag_t        tag)
695 {
696 	vm_object_t             object;
697 	vm_object_offset_t      offset, pg_offset;
698 	kern_return_t           kr = KERN_SUCCESS;
699 	vm_page_t               mem;
700 	vm_page_t               page_list = NULL;
701 	int                     page_count = atop_64(size);
702 
703 #if DEBUG || DEVELOPMENT
704 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START,
705 	    size, 0, 0, 0);
706 #endif
707 
708 	assert((flags & (KMA_COMPRESSOR | KMA_KOBJECT)) != (KMA_COMPRESSOR | KMA_KOBJECT));
709 
710 	if (flags & KMA_COMPRESSOR) {
711 		pg_offset = page_count * PAGE_SIZE_64;
712 
713 		do {
714 			for (;;) {
715 				mem = vm_page_grab();
716 
717 				if (mem != VM_PAGE_NULL) {
718 					break;
719 				}
720 
721 				VM_PAGE_WAIT();
722 			}
723 			if (KMA_ZERO & flags) {
724 				vm_page_zero_fill(mem);
725 			}
726 			mem->vmp_snext = page_list;
727 			page_list = mem;
728 
729 			pg_offset -= PAGE_SIZE_64;
730 
731 			kr = pmap_enter_options(kernel_pmap,
732 			    addr + pg_offset, VM_PAGE_GET_PHYS_PAGE(mem),
733 			    VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE,
734 			    PMAP_OPTIONS_INTERNAL, NULL);
735 			assert(kr == KERN_SUCCESS);
736 		} while (pg_offset);
737 
738 		offset = addr;
739 		object = compressor_object;
740 
741 		vm_object_lock(object);
742 
743 		for (pg_offset = 0;
744 		    pg_offset < size;
745 		    pg_offset += PAGE_SIZE_64) {
746 			mem = page_list;
747 			page_list = mem->vmp_snext;
748 			mem->vmp_snext = NULL;
749 
750 			vm_page_insert(mem, object, offset + pg_offset);
751 			assert(mem->vmp_busy);
752 
753 			mem->vmp_busy = FALSE;
754 			mem->vmp_pmapped = TRUE;
755 			mem->vmp_wpmapped = TRUE;
756 			mem->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
757 		}
758 		vm_object_unlock(object);
759 
760 #if KASAN
761 		if (map == compressor_map) {
762 			kasan_notify_address_nopoison(addr, size);
763 		} else {
764 			kasan_notify_address(addr, size);
765 		}
766 #endif
767 
768 #if DEBUG || DEVELOPMENT
769 		task_t task = current_task();
770 		if (task != NULL) {
771 			ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_count);
772 		}
773 #endif
774 	} else {
775 		kr = vm_page_alloc_list(page_count, flags, &page_list);
776 		if (kr == KERN_SUCCESS) {
777 			kernel_memory_populate_with_pages(map, addr, size,
778 			    page_list, flags, tag, VM_PROT_READ | VM_PROT_WRITE);
779 		}
780 	}
781 
782 #if DEBUG || DEVELOPMENT
783 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
784 	    page_count, 0, 0, 0);
785 #endif
786 	return kr;
787 }
788 
789 
790 void
kernel_memory_depopulate(vm_map_t map,vm_offset_t addr,vm_size_t size,kma_flags_t flags,vm_tag_t tag)791 kernel_memory_depopulate(
792 	vm_map_t           map,
793 	vm_offset_t        addr,
794 	vm_size_t          size,
795 	kma_flags_t        flags,
796 	vm_tag_t           tag)
797 {
798 	vm_object_t        object;
799 	vm_object_offset_t offset, pg_offset;
800 	vm_page_t          mem;
801 	vm_page_t          local_freeq = NULL;
802 	unsigned int       pages_unwired;
803 
804 	assert((flags & (KMA_COMPRESSOR | KMA_KOBJECT)) != (KMA_COMPRESSOR | KMA_KOBJECT));
805 
806 	if (flags & KMA_COMPRESSOR) {
807 		offset = addr;
808 		object = compressor_object;
809 
810 		vm_object_lock(object);
811 	} else if (flags & KMA_KOBJECT) {
812 		offset = addr;
813 		object = kernel_object;
814 		vm_object_lock(object);
815 	} else {
816 		offset = 0;
817 		object = NULL;
818 		/*
819 		 * If it's not the kernel object, we need to:
820 		 *      lock map;
821 		 *      lookup entry;
822 		 *      lock object;
823 		 *      unlock map;
824 		 */
825 		panic("kernel_memory_depopulate(%p,0x%llx,0x%llx,0x%x): "
826 		    "!KMA_KOBJECT",
827 		    map, (uint64_t) addr, (uint64_t) size, flags);
828 	}
829 	pmap_protect(kernel_map->pmap, offset, offset + size, VM_PROT_NONE);
830 
831 	for (pg_offset = 0, pages_unwired = 0;
832 	    pg_offset < size;
833 	    pg_offset += PAGE_SIZE_64) {
834 		mem = vm_page_lookup(object, offset + pg_offset);
835 
836 		assert(mem);
837 
838 		if (mem->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR) {
839 			pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
840 			pages_unwired++;
841 		}
842 
843 		mem->vmp_busy = TRUE;
844 
845 		assert(mem->vmp_tabled);
846 		vm_page_remove(mem, TRUE);
847 		assert(mem->vmp_busy);
848 
849 		assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
850 		assert((mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
851 		    (mem->vmp_q_state == VM_PAGE_IS_WIRED));
852 
853 		mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
854 		mem->vmp_snext = local_freeq;
855 		local_freeq = mem;
856 	}
857 	vm_object_unlock(object);
858 
859 
860 	if (local_freeq) {
861 		vm_page_free_list(local_freeq, TRUE);
862 		if (pages_unwired != 0) {
863 			vm_page_lockspin_queues();
864 			vm_page_wire_count -= pages_unwired;
865 			vm_page_unlock_queues();
866 			vm_tag_update_size(tag, -ptoa_64(pages_unwired));
867 		}
868 	}
869 }
870 
871 /*
872  *	kmem_alloc:
873  *
874  *	Allocate wired-down memory in the kernel's address map
875  *	or a submap.  The memory is not zero-filled.
876  */
877 
878 kern_return_t
kmem_alloc_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)879 kmem_alloc_external(
880 	vm_map_t        map,
881 	vm_offset_t     *addrp,
882 	vm_size_t       size)
883 {
884 	return kmem_alloc(map, addrp, size, vm_tag_bt());
885 }
886 
887 
888 kern_return_t
kmem_alloc(vm_map_t map,vm_offset_t * addrp,vm_size_t size,vm_tag_t tag)889 kmem_alloc(
890 	vm_map_t        map,
891 	vm_offset_t     *addrp,
892 	vm_size_t       size,
893 	vm_tag_t        tag)
894 {
895 	return kmem_alloc_flags(map, addrp, size, tag, 0);
896 }
897 
898 kern_return_t
kmem_alloc_flags(vm_map_t map,vm_offset_t * addrp,vm_size_t size,vm_tag_t tag,kma_flags_t flags)899 kmem_alloc_flags(
900 	vm_map_t        map,
901 	vm_offset_t     *addrp,
902 	vm_size_t       size,
903 	vm_tag_t        tag,
904 	kma_flags_t     flags)
905 {
906 	kern_return_t kr = kernel_memory_allocate(map, addrp, size, 0, flags, tag);
907 	if (kr == KERN_SUCCESS) {
908 		TRACE_MACHLEAKS(KMEM_ALLOC_CODE, KMEM_ALLOC_CODE_2, size, *addrp);
909 	}
910 	return kr;
911 }
912 
913 /*
914  *	kmem_realloc:
915  *
916  *	Reallocate wired-down memory in the kernel's address map
917  *	or a submap.  Newly allocated pages are not zeroed.
918  *	This can only be used on regions allocated with kmem_alloc.
919  *
920  *	If successful, the pages in the old region are mapped twice.
921  *	The old region is unchanged.  Use kmem_free to get rid of it.
922  */
923 kern_return_t
kmem_realloc(vm_map_t map,vm_offset_t oldaddr,vm_size_t oldsize,vm_offset_t * newaddrp,vm_size_t newsize,vm_tag_t tag)924 kmem_realloc(
925 	vm_map_t                map,
926 	vm_offset_t             oldaddr,
927 	vm_size_t               oldsize,
928 	vm_offset_t             *newaddrp,
929 	vm_size_t               newsize,
930 	vm_tag_t                tag)
931 {
932 	vm_object_t             object;
933 	vm_object_offset_t      offset;
934 	vm_map_offset_t         oldmapmin;
935 	vm_map_offset_t         oldmapmax;
936 	vm_map_offset_t         newmapaddr;
937 	vm_map_size_t           oldmapsize;
938 	vm_map_size_t           newmapsize;
939 	vm_map_entry_t          oldentry;
940 	vm_map_entry_t          newentry;
941 	vm_page_t               mem;
942 	kern_return_t           kr;
943 	vm_map_kernel_flags_t   vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
944 
945 	oldmapmin = vm_map_trunc_page(oldaddr,
946 	    VM_MAP_PAGE_MASK(map));
947 	oldmapmax = vm_map_round_page(oldaddr + oldsize,
948 	    VM_MAP_PAGE_MASK(map));
949 	oldmapsize = oldmapmax - oldmapmin;
950 	newmapsize = vm_map_round_page(newsize,
951 	    VM_MAP_PAGE_MASK(map));
952 	if (newmapsize < newsize) {
953 		/* overflow */
954 		*newaddrp = 0;
955 		return KERN_INVALID_ARGUMENT;
956 	}
957 
958 	/*
959 	 *	Find the VM object backing the old region.
960 	 */
961 
962 	vm_map_lock(map);
963 
964 	if (!vm_map_lookup_entry(map, oldmapmin, &oldentry)) {
965 		panic("kmem_realloc");
966 	}
967 	if (oldentry->vme_atomic) {
968 		vmk_flags.vmkf_atomic_entry = TRUE;
969 	}
970 	object = VME_OBJECT(oldentry);
971 
972 	/*
973 	 *	Increase the size of the object and
974 	 *	fill in the new region.
975 	 */
976 
977 	vm_object_reference(object);
978 	/* by grabbing the object lock before unlocking the map */
979 	/* we guarantee that we will panic if more than one     */
980 	/* attempt is made to realloc a kmem_alloc'd area       */
981 	vm_object_lock(object);
982 	vm_map_unlock(map);
983 	if (object->vo_size != oldmapsize) {
984 		panic("kmem_realloc");
985 	}
986 	object->vo_size = newmapsize;
987 	vm_object_unlock(object);
988 
989 	/* allocate the new pages while expanded portion of the */
990 	/* object is still not mapped */
991 	kmem_alloc_pages(object, vm_object_round_page(oldmapsize),
992 	    vm_object_round_page(newmapsize - oldmapsize));
993 
994 	/*
995 	 *	Find space for the new region.
996 	 */
997 
998 	kr = vm_map_find_space(map, &newmapaddr, newmapsize,
999 	    (vm_map_offset_t) 0, 0,
1000 	    vmk_flags,
1001 	    tag,
1002 	    &newentry);
1003 	if (kr != KERN_SUCCESS) {
1004 		vm_object_lock(object);
1005 		for (offset = oldmapsize;
1006 		    offset < newmapsize; offset += PAGE_SIZE) {
1007 			if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
1008 				VM_PAGE_FREE(mem);
1009 			}
1010 		}
1011 		object->vo_size = oldmapsize;
1012 		vm_object_unlock(object);
1013 		vm_object_deallocate(object);
1014 		return kr;
1015 	}
1016 	VME_OBJECT_SET(newentry, object);
1017 	VME_OFFSET_SET(newentry, 0);
1018 	assert(newentry->wired_count == 0);
1019 
1020 
1021 	/* add an extra reference in case we have someone doing an */
1022 	/* unexpected deallocate */
1023 	vm_object_reference(object);
1024 	vm_map_unlock(map);
1025 
1026 	kr = vm_map_wire_kernel(map, newmapaddr, newmapaddr + newmapsize,
1027 	    VM_PROT_DEFAULT, tag, FALSE);
1028 	if (KERN_SUCCESS != kr) {
1029 		vm_map_remove(map, newmapaddr, newmapaddr + newmapsize, VM_MAP_REMOVE_NO_FLAGS);
1030 		vm_object_lock(object);
1031 		for (offset = oldsize; offset < newmapsize; offset += PAGE_SIZE) {
1032 			if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
1033 				VM_PAGE_FREE(mem);
1034 			}
1035 		}
1036 		object->vo_size = oldmapsize;
1037 		vm_object_unlock(object);
1038 		vm_object_deallocate(object);
1039 		return kr;
1040 	}
1041 	vm_object_deallocate(object);
1042 
1043 	if (kernel_object == object) {
1044 		vm_tag_update_size(tag, newmapsize);
1045 	}
1046 
1047 	*newaddrp = CAST_DOWN(vm_offset_t, newmapaddr);
1048 	return KERN_SUCCESS;
1049 }
1050 
1051 /*
1052  *	kmem_alloc_kobject:
1053  *
1054  *	Allocate wired-down memory in the kernel's address map
1055  *	or a submap.  The memory is not zero-filled.
1056  *
1057  *	The memory is allocated in the kernel_object.
1058  *	It may not be copied with vm_map_copy, and
1059  *	it may not be reallocated with kmem_realloc.
1060  */
1061 
1062 kern_return_t
kmem_alloc_kobject_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1063 kmem_alloc_kobject_external(
1064 	vm_map_t        map,
1065 	vm_offset_t     *addrp,
1066 	vm_size_t       size)
1067 {
1068 	return kmem_alloc_kobject(map, addrp, size, vm_tag_bt());
1069 }
1070 
1071 kern_return_t
kmem_alloc_kobject(vm_map_t map,vm_offset_t * addrp,vm_size_t size,vm_tag_t tag)1072 kmem_alloc_kobject(
1073 	vm_map_t        map,
1074 	vm_offset_t     *addrp,
1075 	vm_size_t       size,
1076 	vm_tag_t        tag)
1077 {
1078 	return kernel_memory_allocate(map, addrp, size, 0, KMA_KOBJECT, tag);
1079 }
1080 
1081 /*
1082  *	kmem_alloc_aligned:
1083  *
1084  *	Like kmem_alloc_kobject, except that the memory is aligned.
1085  *	The size should be a power-of-2.
1086  */
1087 
1088 kern_return_t
kmem_alloc_aligned(vm_map_t map,vm_offset_t * addrp,vm_size_t size,vm_tag_t tag)1089 kmem_alloc_aligned(
1090 	vm_map_t        map,
1091 	vm_offset_t     *addrp,
1092 	vm_size_t       size,
1093 	vm_tag_t        tag)
1094 {
1095 	if ((size & (size - 1)) != 0) {
1096 		panic("kmem_alloc_aligned: size not aligned");
1097 	}
1098 	return kernel_memory_allocate(map, addrp, size, size - 1, KMA_KOBJECT, tag);
1099 }
1100 
1101 /*
1102  *	kmem_alloc_pageable:
1103  *
1104  *	Allocate pageable memory in the kernel's address map.
1105  */
1106 
1107 kern_return_t
kmem_alloc_pageable_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1108 kmem_alloc_pageable_external(
1109 	vm_map_t        map,
1110 	vm_offset_t     *addrp,
1111 	vm_size_t       size)
1112 {
1113 	return kmem_alloc_pageable(map, addrp, size, vm_tag_bt());
1114 }
1115 
1116 kern_return_t
kmem_alloc_pageable(vm_map_t map,vm_offset_t * addrp,vm_size_t size,vm_tag_t tag)1117 kmem_alloc_pageable(
1118 	vm_map_t        map,
1119 	vm_offset_t     *addrp,
1120 	vm_size_t       size,
1121 	vm_tag_t        tag)
1122 {
1123 	vm_map_offset_t map_addr;
1124 	vm_map_size_t   map_size;
1125 	kern_return_t kr;
1126 
1127 #ifndef normal
1128 	map_addr = (vm_map_min(map)) + PAGE_SIZE;
1129 #else
1130 	map_addr = vm_map_min(map);
1131 #endif
1132 	map_size = vm_map_round_page(size,
1133 	    VM_MAP_PAGE_MASK(map));
1134 	if (map_size < size) {
1135 		/* overflow */
1136 		*addrp = 0;
1137 		return KERN_INVALID_ARGUMENT;
1138 	}
1139 
1140 	kr = vm_map_enter(map, &map_addr, map_size,
1141 	    (vm_map_offset_t) 0,
1142 	    VM_FLAGS_ANYWHERE,
1143 	    VM_MAP_KERNEL_FLAGS_NONE,
1144 	    tag,
1145 	    VM_OBJECT_NULL, (vm_object_offset_t) 0, FALSE,
1146 	    VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1147 
1148 	if (kr != KERN_SUCCESS) {
1149 		return kr;
1150 	}
1151 
1152 #if KASAN
1153 	kasan_notify_address(map_addr, map_size);
1154 #endif
1155 	*addrp = CAST_DOWN(vm_offset_t, map_addr);
1156 	return KERN_SUCCESS;
1157 }
1158 
1159 /*
1160  *	kmem_free:
1161  *
1162  *	Release a region of kernel virtual memory allocated
1163  *	with kmem_alloc, kmem_alloc_kobject, or kmem_alloc_pageable,
1164  *	and return the physical pages associated with that region.
1165  */
1166 
1167 void
kmem_free(vm_map_t map,vm_offset_t addr,vm_size_t size)1168 kmem_free(
1169 	vm_map_t        map,
1170 	vm_offset_t     addr,
1171 	vm_size_t       size)
1172 {
1173 	kern_return_t kr;
1174 
1175 	assert(addr >= VM_MIN_KERNEL_AND_KEXT_ADDRESS);
1176 
1177 	TRACE_MACHLEAKS(KMEM_FREE_CODE, KMEM_FREE_CODE_2, size, addr);
1178 
1179 	if (size == 0) {
1180 #if MACH_ASSERT
1181 		printf("kmem_free called with size==0 for map: %p with addr: 0x%llx\n", map, (uint64_t)addr);
1182 #endif
1183 		return;
1184 	}
1185 
1186 	kr = vm_map_remove(map,
1187 	    vm_map_trunc_page(addr,
1188 	    VM_MAP_PAGE_MASK(map)),
1189 	    vm_map_round_page(addr + size,
1190 	    VM_MAP_PAGE_MASK(map)),
1191 	    VM_MAP_REMOVE_KUNWIRE);
1192 	if (kr != KERN_SUCCESS) {
1193 		panic("kmem_free");
1194 	}
1195 }
1196 
1197 /*
1198  *	Allocate new pages in an object.
1199  */
1200 
1201 kern_return_t
kmem_alloc_pages(vm_object_t object,vm_object_offset_t offset,vm_object_size_t size)1202 kmem_alloc_pages(
1203 	vm_object_t             object,
1204 	vm_object_offset_t      offset,
1205 	vm_object_size_t        size)
1206 {
1207 	vm_object_size_t                alloc_size;
1208 
1209 	alloc_size = vm_object_round_page(size);
1210 	vm_object_lock(object);
1211 	while (alloc_size) {
1212 		vm_page_t   mem;
1213 
1214 
1215 		/*
1216 		 *	Allocate a page
1217 		 */
1218 		while (VM_PAGE_NULL ==
1219 		    (mem = vm_page_alloc(object, offset))) {
1220 			vm_object_unlock(object);
1221 			VM_PAGE_WAIT();
1222 			vm_object_lock(object);
1223 		}
1224 		mem->vmp_busy = FALSE;
1225 
1226 		alloc_size -= PAGE_SIZE;
1227 		offset += PAGE_SIZE;
1228 	}
1229 	vm_object_unlock(object);
1230 	return KERN_SUCCESS;
1231 }
1232 
1233 /*
1234  *	kmem_suballoc:
1235  *
1236  *	Allocates a map to manage a subrange
1237  *	of the kernel virtual address space.
1238  *
1239  *	Arguments are as follows:
1240  *
1241  *	parent		Map to take range from
1242  *	addr		Address of start of range (IN/OUT)
1243  *	size		Size of range to find
1244  *	pageable	Can region be paged
1245  *	anywhere	Can region be located anywhere in map
1246  *	new_map		Pointer to new submap
1247  */
1248 kern_return_t
kmem_suballoc(vm_map_t parent,vm_offset_t * addr,vm_size_t size,boolean_t pageable,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,vm_map_t * new_map)1249 kmem_suballoc(
1250 	vm_map_t        parent,
1251 	vm_offset_t     *addr,
1252 	vm_size_t       size,
1253 	boolean_t       pageable,
1254 	int             flags,
1255 	vm_map_kernel_flags_t vmk_flags,
1256 	vm_tag_t    tag,
1257 	vm_map_t        *new_map)
1258 {
1259 	vm_map_t        map;
1260 	vm_map_offset_t map_addr;
1261 	vm_map_size_t   map_size;
1262 	kern_return_t   kr;
1263 
1264 	map_size = vm_map_round_page(size,
1265 	    VM_MAP_PAGE_MASK(parent));
1266 	if (map_size < size) {
1267 		/* overflow */
1268 		*addr = 0;
1269 		return KERN_INVALID_ARGUMENT;
1270 	}
1271 
1272 	/*
1273 	 *	Need reference on submap object because it is internal
1274 	 *	to the vm_system.  vm_object_enter will never be called
1275 	 *	on it (usual source of reference for vm_map_enter).
1276 	 */
1277 	vm_object_reference(vm_submap_object);
1278 
1279 	map_addr = ((flags & VM_FLAGS_ANYWHERE)
1280 	    ? vm_map_min(parent)
1281 	    : vm_map_trunc_page(*addr,
1282 	    VM_MAP_PAGE_MASK(parent)));
1283 
1284 	kr = vm_map_enter(parent, &map_addr, map_size,
1285 	    (vm_map_offset_t) 0, flags, vmk_flags, tag,
1286 	    vm_submap_object, (vm_object_offset_t) 0, FALSE,
1287 	    VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1288 	if (kr != KERN_SUCCESS) {
1289 		vm_object_deallocate(vm_submap_object);
1290 		return kr;
1291 	}
1292 
1293 	pmap_reference(vm_map_pmap(parent));
1294 	map = vm_map_create(vm_map_pmap(parent), map_addr, map_addr + map_size, pageable);
1295 	if (map == VM_MAP_NULL) {
1296 		panic("kmem_suballoc: vm_map_create failed");   /* "can't happen" */
1297 	}
1298 	/* inherit the parent map's page size */
1299 	vm_map_set_page_shift(map, VM_MAP_PAGE_SHIFT(parent));
1300 
1301 	kr = vm_map_submap(parent, map_addr, map_addr + map_size, map, map_addr, FALSE);
1302 	if (kr != KERN_SUCCESS) {
1303 		/*
1304 		 * See comment preceding vm_map_submap().
1305 		 */
1306 		vm_map_remove(parent, map_addr, map_addr + map_size,
1307 		    VM_MAP_REMOVE_NO_FLAGS);
1308 		vm_map_deallocate(map); /* also removes ref to pmap */
1309 		vm_object_deallocate(vm_submap_object);
1310 		return kr;
1311 	}
1312 	*addr = CAST_DOWN(vm_offset_t, map_addr);
1313 	*new_map = map;
1314 	return KERN_SUCCESS;
1315 }
1316 /*
1317  * The default percentage of memory that can be mlocked is scaled based on the total
1318  * amount of memory in the system. These percentages are caclulated
1319  * offline and stored in this table. We index this table by
1320  * log2(max_mem) - VM_USER_WIREABLE_MIN_CONFIG. We clamp this index in the range
1321  * [0, sizeof(wire_limit_percents) / sizeof(vm_map_size_t))
1322  *
1323  * Note that these values were picked for mac.
1324  * If we ever have very large memory config arm devices, we may want to revisit
1325  * since the kernel overhead is smaller there due to the larger page size.
1326  */
1327 
1328 /* Start scaling iff we're managing > 2^32 = 4GB of RAM. */
1329 #define VM_USER_WIREABLE_MIN_CONFIG 32
1330 #if CONFIG_JETSAM
1331 /* Systems with jetsam can wire a bit more b/c the system can relieve wired
1332  * pressure.
1333  */
1334 static vm_map_size_t wire_limit_percents[] =
1335 { 80, 80, 80, 80, 82, 85, 88, 91, 94, 97};
1336 #else
1337 static vm_map_size_t wire_limit_percents[] =
1338 { 70, 73, 76, 79, 82, 85, 88, 91, 94, 97};
1339 #endif /* CONFIG_JETSAM */
1340 
1341 /*
1342  * Sets the default global user wire limit which limits the amount of
1343  * memory that can be locked via mlock() based on the above algorithm..
1344  * This can be overridden via a sysctl.
1345  */
1346 static void
kmem_set_user_wire_limits(void)1347 kmem_set_user_wire_limits(void)
1348 {
1349 	uint64_t available_mem_log;
1350 	uint64_t max_wire_percent;
1351 	size_t wire_limit_percents_length = sizeof(wire_limit_percents) /
1352 	    sizeof(vm_map_size_t);
1353 	vm_map_size_t limit;
1354 	uint64_t config_memsize = max_mem;
1355 #if defined(XNU_TARGET_OS_OSX)
1356 	config_memsize = max_mem_actual;
1357 #endif /* defined(XNU_TARGET_OS_OSX) */
1358 
1359 	available_mem_log = bit_floor(config_memsize);
1360 
1361 	if (available_mem_log < VM_USER_WIREABLE_MIN_CONFIG) {
1362 		available_mem_log = 0;
1363 	} else {
1364 		available_mem_log -= VM_USER_WIREABLE_MIN_CONFIG;
1365 	}
1366 	if (available_mem_log >= wire_limit_percents_length) {
1367 		available_mem_log = wire_limit_percents_length - 1;
1368 	}
1369 	max_wire_percent = wire_limit_percents[available_mem_log];
1370 
1371 	limit = config_memsize * max_wire_percent / 100;
1372 	/* Cap the number of non lockable bytes at VM_NOT_USER_WIREABLE_MAX */
1373 	if (config_memsize - limit > VM_NOT_USER_WIREABLE_MAX) {
1374 		limit = config_memsize - VM_NOT_USER_WIREABLE_MAX;
1375 	}
1376 
1377 	vm_global_user_wire_limit = limit;
1378 	/* the default per task limit is the same as the global limit */
1379 	vm_per_task_user_wire_limit = limit;
1380 	vm_add_wire_count_over_global_limit = 0;
1381 	vm_add_wire_count_over_user_limit = 0;
1382 }
1383 
1384 
1385 /*
1386  *	kmem_init:
1387  *
1388  *	Initialize the kernel's virtual memory map, taking
1389  *	into account all memory allocated up to this time.
1390  */
1391 __startup_func
1392 void
kmem_init(vm_offset_t start,vm_offset_t end)1393 kmem_init(
1394 	vm_offset_t     start,
1395 	vm_offset_t     end)
1396 {
1397 	vm_map_offset_t map_start;
1398 	vm_map_offset_t map_end;
1399 	vm_map_kernel_flags_t vmk_flags;
1400 
1401 	vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1402 	vmk_flags.vmkf_permanent = TRUE;
1403 	vmk_flags.vmkf_no_pmap_check = TRUE;
1404 
1405 	map_start = vm_map_trunc_page(start,
1406 	    VM_MAP_PAGE_MASK(kernel_map));
1407 	map_end = vm_map_round_page(end,
1408 	    VM_MAP_PAGE_MASK(kernel_map));
1409 
1410 #if     defined(__arm__) || defined(__arm64__)
1411 	kernel_map = vm_map_create(pmap_kernel(), VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1412 	    VM_MAX_KERNEL_ADDRESS, FALSE);
1413 	/*
1414 	 *	Reserve virtual memory allocated up to this time.
1415 	 */
1416 	{
1417 		unsigned int    region_select = 0;
1418 		vm_map_offset_t region_start;
1419 		vm_map_size_t   region_size;
1420 		vm_map_offset_t map_addr;
1421 		kern_return_t kr;
1422 
1423 		while (pmap_virtual_region(region_select, &region_start, &region_size)) {
1424 			map_addr = region_start;
1425 			kr = vm_map_enter(kernel_map, &map_addr,
1426 			    vm_map_round_page(region_size,
1427 			    VM_MAP_PAGE_MASK(kernel_map)),
1428 			    (vm_map_offset_t) 0,
1429 			    VM_FLAGS_FIXED,
1430 			    vmk_flags,
1431 			    VM_KERN_MEMORY_NONE,
1432 			    VM_OBJECT_NULL,
1433 			    (vm_object_offset_t) 0, FALSE, VM_PROT_NONE, VM_PROT_NONE,
1434 			    VM_INHERIT_DEFAULT);
1435 
1436 			if (kr != KERN_SUCCESS) {
1437 				panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
1438 				    (uint64_t) start, (uint64_t) end, (uint64_t) region_start,
1439 				    (uint64_t) region_size, kr);
1440 			}
1441 
1442 			region_select++;
1443 		}
1444 	}
1445 #else
1446 	kernel_map = vm_map_create(pmap_kernel(), VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1447 	    map_end, FALSE);
1448 	/*
1449 	 *	Reserve virtual memory allocated up to this time.
1450 	 */
1451 	if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
1452 		vm_map_offset_t map_addr;
1453 		kern_return_t kr;
1454 
1455 		vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1456 		vmk_flags.vmkf_no_pmap_check = TRUE;
1457 
1458 		map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
1459 		kr = vm_map_enter(kernel_map,
1460 		    &map_addr,
1461 		    (vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
1462 		    (vm_map_offset_t) 0,
1463 		    VM_FLAGS_FIXED,
1464 		    vmk_flags,
1465 		    VM_KERN_MEMORY_NONE,
1466 		    VM_OBJECT_NULL,
1467 		    (vm_object_offset_t) 0, FALSE,
1468 		    VM_PROT_NONE, VM_PROT_NONE,
1469 		    VM_INHERIT_DEFAULT);
1470 
1471 		if (kr != KERN_SUCCESS) {
1472 			panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
1473 			    (uint64_t) start, (uint64_t) end,
1474 			    (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1475 			    (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
1476 			    kr);
1477 		}
1478 	}
1479 #endif
1480 
1481 	kmem_set_user_wire_limits();
1482 }
1483 
1484 /*
1485  *	Routine:	copyinmap
1486  *	Purpose:
1487  *		Like copyin, except that fromaddr is an address
1488  *		in the specified VM map.  This implementation
1489  *		is incomplete; it handles the current user map
1490  *		and the kernel map/submaps.
1491  */
1492 kern_return_t
copyinmap(vm_map_t map,vm_map_offset_t fromaddr,void * todata,vm_size_t length)1493 copyinmap(
1494 	vm_map_t                map,
1495 	vm_map_offset_t         fromaddr,
1496 	void                    *todata,
1497 	vm_size_t               length)
1498 {
1499 	kern_return_t   kr = KERN_SUCCESS;
1500 	vm_map_t oldmap;
1501 
1502 	if (vm_map_pmap(map) == pmap_kernel()) {
1503 		/* assume a correct copy */
1504 		memcpy(todata, CAST_DOWN(void *, fromaddr), length);
1505 	} else if (current_map() == map) {
1506 		if (copyin(fromaddr, todata, length) != 0) {
1507 			kr = KERN_INVALID_ADDRESS;
1508 		}
1509 	} else {
1510 		vm_map_reference(map);
1511 		oldmap = vm_map_switch(map);
1512 		if (copyin(fromaddr, todata, length) != 0) {
1513 			kr = KERN_INVALID_ADDRESS;
1514 		}
1515 		vm_map_switch(oldmap);
1516 		vm_map_deallocate(map);
1517 	}
1518 	return kr;
1519 }
1520 
1521 /*
1522  *	Routine:	copyoutmap
1523  *	Purpose:
1524  *		Like copyout, except that toaddr is an address
1525  *		in the specified VM map.
1526  */
1527 kern_return_t
copyoutmap(vm_map_t map,void * fromdata,vm_map_address_t toaddr,vm_size_t length)1528 copyoutmap(
1529 	vm_map_t                map,
1530 	void                    *fromdata,
1531 	vm_map_address_t        toaddr,
1532 	vm_size_t               length)
1533 {
1534 	kern_return_t   kr = KERN_SUCCESS;
1535 	vm_map_t        oldmap;
1536 
1537 	if (vm_map_pmap(map) == pmap_kernel()) {
1538 		/* assume a correct copy */
1539 		memcpy(CAST_DOWN(void *, toaddr), fromdata, length);
1540 	} else if (current_map() == map) {
1541 		if (copyout(fromdata, toaddr, length) != 0) {
1542 			kr = KERN_INVALID_ADDRESS;
1543 		}
1544 	} else {
1545 		vm_map_reference(map);
1546 		oldmap = vm_map_switch(map);
1547 		if (copyout(fromdata, toaddr, length) != 0) {
1548 			kr = KERN_INVALID_ADDRESS;
1549 		}
1550 		vm_map_switch(oldmap);
1551 		vm_map_deallocate(map);
1552 	}
1553 	return kr;
1554 }
1555 
1556 /*
1557  *	Routine:	copyoutmap_atomic{32, 64}
1558  *	Purpose:
1559  *		Like copyoutmap, except that the operation is atomic.
1560  *      Takes in value rather than *fromdata pointer.
1561  */
1562 kern_return_t
copyoutmap_atomic32(vm_map_t map,uint32_t value,vm_map_address_t toaddr)1563 copyoutmap_atomic32(
1564 	vm_map_t                map,
1565 	uint32_t                value,
1566 	vm_map_address_t        toaddr)
1567 {
1568 	kern_return_t   kr = KERN_SUCCESS;
1569 	vm_map_t        oldmap;
1570 
1571 	if (vm_map_pmap(map) == pmap_kernel()) {
1572 		/* assume a correct toaddr */
1573 		*(uint32_t *)toaddr = value;
1574 	} else if (current_map() == map) {
1575 		if (copyout_atomic32(value, toaddr) != 0) {
1576 			kr = KERN_INVALID_ADDRESS;
1577 		}
1578 	} else {
1579 		vm_map_reference(map);
1580 		oldmap = vm_map_switch(map);
1581 		if (copyout_atomic32(value, toaddr) != 0) {
1582 			kr = KERN_INVALID_ADDRESS;
1583 		}
1584 		vm_map_switch(oldmap);
1585 		vm_map_deallocate(map);
1586 	}
1587 	return kr;
1588 }
1589 
1590 kern_return_t
copyoutmap_atomic64(vm_map_t map,uint64_t value,vm_map_address_t toaddr)1591 copyoutmap_atomic64(
1592 	vm_map_t                map,
1593 	uint64_t                value,
1594 	vm_map_address_t        toaddr)
1595 {
1596 	kern_return_t   kr = KERN_SUCCESS;
1597 	vm_map_t        oldmap;
1598 
1599 	if (vm_map_pmap(map) == pmap_kernel()) {
1600 		/* assume a correct toaddr */
1601 		*(uint64_t *)toaddr = value;
1602 	} else if (current_map() == map) {
1603 		if (copyout_atomic64(value, toaddr) != 0) {
1604 			kr = KERN_INVALID_ADDRESS;
1605 		}
1606 	} else {
1607 		vm_map_reference(map);
1608 		oldmap = vm_map_switch(map);
1609 		if (copyout_atomic64(value, toaddr) != 0) {
1610 			kr = KERN_INVALID_ADDRESS;
1611 		}
1612 		vm_map_switch(oldmap);
1613 		vm_map_deallocate(map);
1614 	}
1615 	return kr;
1616 }
1617 
1618 /*
1619  *
1620  *	The following two functions are to be used when exposing kernel
1621  *	addresses to userspace via any of the various debug or info
1622  *	facilities that exist. These are basically the same as VM_KERNEL_ADDRPERM()
1623  *	and VM_KERNEL_UNSLIDE_OR_PERM() except they use a different random seed and
1624  *	are exported to KEXTs.
1625  *
1626  *	NOTE: USE THE MACRO VERSIONS OF THESE FUNCTIONS (in vm_param.h) FROM WITHIN THE KERNEL
1627  */
1628 
1629 static void
vm_kernel_addrhash_internal(vm_offset_t addr,vm_offset_t * hash_addr,uint64_t salt)1630 vm_kernel_addrhash_internal(
1631 	vm_offset_t addr,
1632 	vm_offset_t *hash_addr,
1633 	uint64_t salt)
1634 {
1635 	assert(salt != 0);
1636 
1637 	if (addr == 0) {
1638 		*hash_addr = 0;
1639 		return;
1640 	}
1641 
1642 	if (VM_KERNEL_IS_SLID(addr)) {
1643 		*hash_addr = VM_KERNEL_UNSLIDE(addr);
1644 		return;
1645 	}
1646 
1647 	vm_offset_t sha_digest[SHA256_DIGEST_LENGTH / sizeof(vm_offset_t)];
1648 	SHA256_CTX sha_ctx;
1649 
1650 	SHA256_Init(&sha_ctx);
1651 	SHA256_Update(&sha_ctx, &salt, sizeof(salt));
1652 	SHA256_Update(&sha_ctx, &addr, sizeof(addr));
1653 	SHA256_Final(sha_digest, &sha_ctx);
1654 
1655 	*hash_addr = sha_digest[0];
1656 }
1657 
1658 void
vm_kernel_addrhash_external(vm_offset_t addr,vm_offset_t * hash_addr)1659 vm_kernel_addrhash_external(
1660 	vm_offset_t addr,
1661 	vm_offset_t *hash_addr)
1662 {
1663 	return vm_kernel_addrhash_internal(addr, hash_addr, vm_kernel_addrhash_salt_ext);
1664 }
1665 
1666 vm_offset_t
vm_kernel_addrhash(vm_offset_t addr)1667 vm_kernel_addrhash(vm_offset_t addr)
1668 {
1669 	vm_offset_t hash_addr;
1670 	vm_kernel_addrhash_internal(addr, &hash_addr, vm_kernel_addrhash_salt);
1671 	return hash_addr;
1672 }
1673 
1674 void
vm_kernel_addrhide(vm_offset_t addr,vm_offset_t * hide_addr)1675 vm_kernel_addrhide(
1676 	vm_offset_t addr,
1677 	vm_offset_t *hide_addr)
1678 {
1679 	*hide_addr = VM_KERNEL_ADDRHIDE(addr);
1680 }
1681 
1682 /*
1683  *	vm_kernel_addrperm_external:
1684  *	vm_kernel_unslide_or_perm_external:
1685  *
1686  *	Use these macros when exposing an address to userspace that could come from
1687  *	either kernel text/data *or* the heap.
1688  */
1689 void
vm_kernel_addrperm_external(vm_offset_t addr,vm_offset_t * perm_addr)1690 vm_kernel_addrperm_external(
1691 	vm_offset_t addr,
1692 	vm_offset_t *perm_addr)
1693 {
1694 	if (VM_KERNEL_IS_SLID(addr)) {
1695 		*perm_addr = VM_KERNEL_UNSLIDE(addr);
1696 	} else if (VM_KERNEL_ADDRESS(addr)) {
1697 		*perm_addr = addr + vm_kernel_addrperm_ext;
1698 	} else {
1699 		*perm_addr = addr;
1700 	}
1701 }
1702 
1703 void
vm_kernel_unslide_or_perm_external(vm_offset_t addr,vm_offset_t * up_addr)1704 vm_kernel_unslide_or_perm_external(
1705 	vm_offset_t addr,
1706 	vm_offset_t *up_addr)
1707 {
1708 	vm_kernel_addrperm_external(addr, up_addr);
1709 }
1710 
1711 void
vm_packing_pointer_invalid(vm_offset_t ptr,vm_packing_params_t params)1712 vm_packing_pointer_invalid(vm_offset_t ptr, vm_packing_params_t params)
1713 {
1714 	if (ptr & ((1ul << params.vmpp_shift) - 1)) {
1715 		panic("pointer %p can't be packed: low %d bits aren't 0",
1716 		    (void *)ptr, params.vmpp_shift);
1717 	} else if (ptr <= params.vmpp_base) {
1718 		panic("pointer %p can't be packed: below base %p",
1719 		    (void *)ptr, (void *)params.vmpp_base);
1720 	} else {
1721 		panic("pointer %p can't be packed: maximum encodable pointer is %p",
1722 		    (void *)ptr, (void *)vm_packing_max_packable(params));
1723 	}
1724 }
1725 
1726 void
vm_packing_verify_range(const char * subsystem,vm_offset_t min_address,vm_offset_t max_address,vm_packing_params_t params)1727 vm_packing_verify_range(
1728 	const char *subsystem,
1729 	vm_offset_t min_address,
1730 	vm_offset_t max_address,
1731 	vm_packing_params_t params)
1732 {
1733 	if (min_address > max_address) {
1734 		panic("%s: %s range invalid min:%p > max:%p",
1735 		    __func__, subsystem, (void *)min_address, (void *)max_address);
1736 	}
1737 
1738 	if (!params.vmpp_base_relative) {
1739 		return;
1740 	}
1741 
1742 	if (min_address <= params.vmpp_base) {
1743 		panic("%s: %s range invalid min:%p <= base:%p",
1744 		    __func__, subsystem, (void *)min_address, (void *)params.vmpp_base);
1745 	}
1746 
1747 	if (max_address > vm_packing_max_packable(params)) {
1748 		panic("%s: %s range invalid max:%p >= max packable:%p",
1749 		    __func__, subsystem, (void *)max_address,
1750 		    (void *)vm_packing_max_packable(params));
1751 	}
1752 }
1753