xref: /xnu-8020.101.4/osfmk/vm/vm_kern.c (revision e7776783b89a353188416a9a346c6cdb4928faad)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 /*
59  *	File:	vm/vm_kern.c
60  *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
61  *	Date:	1985
62  *
63  *	Kernel memory management.
64  */
65 
66 #include <mach/kern_return.h>
67 #include <mach/vm_param.h>
68 #include <kern/assert.h>
69 #include <kern/thread.h>
70 #include <vm/vm_kern.h>
71 #include <vm/vm_map.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_compressor.h>
75 #include <vm/vm_pageout.h>
76 #include <kern/misc_protos.h>
77 #include <vm/cpm.h>
78 #include <kern/ledger.h>
79 #include <kern/bits.h>
80 #include <kern/startup.h>
81 
82 #include <string.h>
83 
84 #include <libkern/OSDebug.h>
85 #include <libkern/crypto/sha2.h>
86 #include <libkern/section_keywords.h>
87 #include <sys/kdebug.h>
88 
89 #include <san/kasan.h>
90 
91 /*
92  *	Variables exported by this module.
93  */
94 
95 SECURITY_READ_ONLY_LATE(vm_map_t) kernel_map;
96 vm_map_t         kernel_pageable_map;
97 
98 /*
99  * Forward declarations for internal functions.
100  */
101 extern kern_return_t kmem_alloc_pages(
102 	vm_object_t             object,
103 	vm_object_offset_t      offset,
104 	vm_object_size_t        size);
105 
106 kern_return_t
kmem_alloc_contig(vm_map_t map,vm_offset_t * addrp,vm_size_t size,vm_offset_t mask,ppnum_t max_pnum,ppnum_t pnum_mask,kma_flags_t flags,vm_tag_t tag)107 kmem_alloc_contig(
108 	vm_map_t                map,
109 	vm_offset_t             *addrp,
110 	vm_size_t               size,
111 	vm_offset_t             mask,
112 	ppnum_t                 max_pnum,
113 	ppnum_t                 pnum_mask,
114 	kma_flags_t             flags,
115 	vm_tag_t                tag)
116 {
117 	vm_object_t             object;
118 	vm_object_offset_t      offset;
119 	vm_map_offset_t         map_addr;
120 	vm_map_offset_t         map_mask;
121 	vm_map_size_t           map_size, i;
122 	vm_map_entry_t          entry;
123 	vm_page_t               m, pages;
124 	kern_return_t           kr;
125 
126 	assert(VM_KERN_MEMORY_NONE != tag);
127 
128 	if (map == VM_MAP_NULL || (flags & ~(KMA_KOBJECT | KMA_LOMEM | KMA_NOPAGEWAIT))) {
129 		return KERN_INVALID_ARGUMENT;
130 	}
131 
132 	map_size = vm_map_round_page(size,
133 	    VM_MAP_PAGE_MASK(map));
134 	map_mask = (vm_map_offset_t)mask;
135 
136 	/* Check for zero allocation size (either directly or via overflow) */
137 	if (map_size == 0) {
138 		*addrp = 0;
139 		return KERN_INVALID_ARGUMENT;
140 	}
141 
142 	/*
143 	 *	Allocate a new object (if necessary) and the reference we
144 	 *	will be donating to the map entry.  We must do this before
145 	 *	locking the map, or risk deadlock with the default pager.
146 	 */
147 	if ((flags & KMA_KOBJECT) != 0) {
148 		object = kernel_object;
149 		vm_object_reference(object);
150 	} else {
151 		object = vm_object_allocate(map_size);
152 	}
153 
154 	kr = vm_map_find_space(map, &map_addr, map_size, map_mask,
155 	    VM_MAP_KERNEL_FLAGS_NONE, tag, &entry);
156 	if (KERN_SUCCESS != kr) {
157 		vm_object_deallocate(object);
158 		return kr;
159 	}
160 
161 	if (object == kernel_object) {
162 		offset = map_addr;
163 	} else {
164 		offset = 0;
165 	}
166 	VME_OBJECT_SET(entry, object);
167 	VME_OFFSET_SET(entry, offset);
168 
169 	/* Take an extra object ref in case the map entry gets deleted */
170 	vm_object_reference(object);
171 	vm_map_unlock(map);
172 
173 	kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, pnum_mask, FALSE, flags);
174 
175 	if (kr != KERN_SUCCESS) {
176 		vm_map_remove(map,
177 		    vm_map_trunc_page(map_addr,
178 		    VM_MAP_PAGE_MASK(map)),
179 		    vm_map_round_page(map_addr + map_size,
180 		    VM_MAP_PAGE_MASK(map)),
181 		    VM_MAP_REMOVE_NO_FLAGS);
182 		vm_object_deallocate(object);
183 		*addrp = 0;
184 		return kr;
185 	}
186 
187 	vm_object_lock(object);
188 	for (i = 0; i < map_size; i += PAGE_SIZE) {
189 		m = pages;
190 		pages = NEXT_PAGE(m);
191 		*(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
192 		m->vmp_busy = FALSE;
193 		vm_page_insert(m, object, offset + i);
194 	}
195 	vm_object_unlock(object);
196 
197 	kr = vm_map_wire_kernel(map,
198 	    vm_map_trunc_page(map_addr,
199 	    VM_MAP_PAGE_MASK(map)),
200 	    vm_map_round_page(map_addr + map_size,
201 	    VM_MAP_PAGE_MASK(map)),
202 	    VM_PROT_DEFAULT, tag,
203 	    FALSE);
204 
205 	if (kr != KERN_SUCCESS) {
206 		if (object == kernel_object) {
207 			vm_object_lock(object);
208 			vm_object_page_remove(object, offset, offset + map_size);
209 			vm_object_unlock(object);
210 		}
211 		vm_map_remove(map,
212 		    vm_map_trunc_page(map_addr,
213 		    VM_MAP_PAGE_MASK(map)),
214 		    vm_map_round_page(map_addr + map_size,
215 		    VM_MAP_PAGE_MASK(map)),
216 		    VM_MAP_REMOVE_NO_FLAGS);
217 		vm_object_deallocate(object);
218 		return kr;
219 	}
220 	vm_object_deallocate(object);
221 
222 	if (object == kernel_object) {
223 		vm_map_simplify(map, map_addr);
224 		vm_tag_update_size(tag, map_size);
225 	}
226 	*addrp = (vm_offset_t) map_addr;
227 	assert((vm_map_offset_t) *addrp == map_addr);
228 
229 	return KERN_SUCCESS;
230 }
231 
232 /*
233  * Master entry point for allocating kernel memory.
234  * NOTE: this routine is _never_ interrupt safe.
235  *
236  * map		: map to allocate into
237  * addrp	: pointer to start address of new memory
238  * size		: size of memory requested
239  * flags	: options
240  *		  KMA_NOPAGEWAIT	don't wait for pages if unavailable
241  *		  KMA_KOBJECT		use kernel_object
242  *		  KMA_LOMEM		support for 32 bit devices in a 64 bit world
243  *					if set and a lomemory pool is available
244  *					grab pages from it... this also implies
245  *					KMA_NOPAGEWAIT
246  */
247 
248 kern_return_t
kernel_memory_allocate(vm_map_t map,vm_offset_t * addrp,vm_size_t size,vm_offset_t mask,kma_flags_t flags,vm_tag_t tag)249 kernel_memory_allocate(
250 	vm_map_t        map,
251 	vm_offset_t     *addrp,
252 	vm_size_t       size,
253 	vm_offset_t     mask,
254 	kma_flags_t     flags,
255 	vm_tag_t        tag)
256 {
257 	vm_object_t             object;
258 	vm_object_offset_t      offset;
259 	vm_object_offset_t      pg_offset;
260 	vm_map_entry_t          entry = NULL;
261 	vm_map_offset_t         map_addr, fill_start;
262 	vm_map_offset_t         map_mask;
263 	vm_map_size_t           map_size, fill_size;
264 	kern_return_t           kr, pe_result;
265 	vm_page_t               mem;
266 	vm_page_t               guard_left = VM_PAGE_NULL;
267 	vm_page_t               guard_right = VM_PAGE_NULL;
268 	vm_page_t               wired_page_list = VM_PAGE_NULL;
269 	int                     wired_page_count = 0;
270 	vm_map_kernel_flags_t   vmk_flags;
271 
272 	if (kernel_map == VM_MAP_NULL) {
273 		panic("kernel_memory_allocate: VM is not ready");
274 	}
275 	if (map->pmap != kernel_pmap) {
276 		panic("kernel_memory_allocate: %p is not a kernel map", map);
277 	}
278 
279 #if DEBUG || DEVELOPMENT
280 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START,
281 	    size, 0, 0, 0);
282 #endif
283 
284 	map_size = vm_map_round_page(size, VM_MAP_PAGE_MASK(map));
285 	map_mask = (vm_map_offset_t) mask;
286 
287 	vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
288 
289 	/* Check for zero allocation size (either directly or via overflow) */
290 	if (__improbable(map_size == 0)) {
291 		kr = KERN_INVALID_ARGUMENT;
292 		goto out;
293 	}
294 
295 	/*
296 	 * limit the size of a single extent of wired memory
297 	 * to try and limit the damage to the system if
298 	 * too many pages get wired down
299 	 * limit raised to 2GB with 128GB max physical limit,
300 	 * but scaled by installed memory above this
301 	 */
302 	if (!(flags & (KMA_VAONLY | KMA_PAGEABLE)) &&
303 	    map_size > MAX(1ULL << 31, sane_size / 64)) {
304 		kr = KERN_RESOURCE_SHORTAGE;
305 		goto out;
306 	}
307 
308 	/*
309 	 * Guard pages:
310 	 *
311 	 * Guard pages are implemented as fictitious pages.
312 	 *
313 	 * By placing guard pages on either end of a stack,
314 	 * they can help detect cases where a thread walks
315 	 * off either end of its stack.
316 	 *
317 	 * They are allocated and set up here and attempts
318 	 * to access those pages are trapped in vm_fault_page().
319 	 *
320 	 * The map_size we were passed may include extra space for
321 	 * guard pages. fill_size represents the actual size to populate.
322 	 * Similarly, fill_start indicates where the actual pages
323 	 * will begin in the range.
324 	 */
325 
326 	fill_start = 0;
327 	fill_size = map_size;
328 
329 	if (flags & KMA_GUARD_FIRST) {
330 		vmk_flags.vmkf_guard_before = TRUE;
331 		fill_start += PAGE_SIZE_64;
332 		if (os_sub_overflow(fill_size, PAGE_SIZE_64, &fill_size)) {
333 			/* no space for a guard page */
334 			kr = KERN_INVALID_ARGUMENT;
335 			goto out;
336 		}
337 		if (!map->never_faults) {
338 			guard_left = vm_page_grab_guard((flags & KMA_NOPAGEWAIT) == 0);
339 			if (guard_left == VM_PAGE_NULL) {
340 				kr = KERN_RESOURCE_SHORTAGE;
341 				goto out;
342 			}
343 		}
344 	}
345 	if (flags & KMA_GUARD_LAST) {
346 		vmk_flags.vmkf_guard_after = TRUE;
347 		if (os_sub_overflow(fill_size, PAGE_SIZE_64, &fill_size)) {
348 			/* no space for a guard page */
349 			kr = KERN_INVALID_ARGUMENT;
350 			goto out;
351 		}
352 		if (!map->never_faults) {
353 			guard_right = vm_page_grab_guard((flags & KMA_NOPAGEWAIT) == 0);
354 			if (guard_right == VM_PAGE_NULL) {
355 				kr = KERN_RESOURCE_SHORTAGE;
356 				goto out;
357 			}
358 		}
359 	}
360 
361 	wired_page_count = (int)atop(fill_size);
362 	assert(ptoa_64(wired_page_count) == fill_size);
363 
364 	if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
365 		kr = vm_page_alloc_list(wired_page_count, flags,
366 		    &wired_page_list);
367 		if (kr != KERN_SUCCESS) {
368 			goto out;
369 		}
370 	}
371 
372 	/*
373 	 *	Allocate a new object (if necessary).  We must do this before
374 	 *	locking the map, or risk deadlock with the default pager.
375 	 */
376 	if ((flags & KMA_KOBJECT) != 0) {
377 		object = kernel_object;
378 		vm_object_reference(object);
379 	} else if ((flags & KMA_COMPRESSOR) != 0) {
380 		object = compressor_object;
381 		vm_object_reference(object);
382 	} else {
383 		object = vm_object_allocate(map_size);
384 	}
385 
386 	if (flags & KMA_ATOMIC) {
387 		vmk_flags.vmkf_atomic_entry = TRUE;
388 	}
389 
390 	if (flags & KMA_LAST_FREE) {
391 		vmk_flags.vmkf_last_free = true;
392 	}
393 
394 	kr = vm_map_find_space(map, &map_addr,
395 	    fill_size, map_mask, vmk_flags, tag, &entry);
396 
397 	if (KERN_SUCCESS != kr) {
398 		vm_object_deallocate(object);
399 		goto out;
400 	}
401 
402 	if (object == kernel_object || object == compressor_object) {
403 		offset = map_addr;
404 	} else {
405 		offset = 0;
406 	}
407 	VME_OBJECT_SET(entry, object);
408 	VME_OFFSET_SET(entry, offset);
409 
410 	if (!(flags & (KMA_COMPRESSOR | KMA_PAGEABLE))) {
411 		entry->wired_count++;
412 	}
413 
414 	if (flags & KMA_PERMANENT) {
415 		entry->permanent = TRUE;
416 	}
417 
418 	if (object != kernel_object && object != compressor_object) {
419 		vm_object_reference(object);
420 	}
421 
422 	vm_object_lock(object);
423 	vm_map_unlock(map);
424 
425 	if (guard_left) {
426 		vm_page_insert(guard_left, object, offset);
427 		guard_left->vmp_busy = FALSE;
428 		guard_left = VM_PAGE_NULL;
429 	} else {
430 		assert(fill_start == 0 || map->never_faults);
431 	}
432 
433 #if KASAN
434 	if (!(flags & KMA_VAONLY)) {
435 		/* for VAONLY mappings we notify in populate only */
436 		kasan_notify_address(map_addr, size);
437 	}
438 #endif
439 
440 	if (flags & (KMA_VAONLY | KMA_PAGEABLE)) {
441 		pg_offset = fill_start + fill_size;
442 	} else {
443 		for (pg_offset = fill_start; pg_offset < fill_start + fill_size; pg_offset += PAGE_SIZE_64) {
444 			if (wired_page_list == NULL) {
445 				panic("kernel_memory_allocate: wired_page_list == NULL");
446 			}
447 
448 			mem = wired_page_list;
449 			wired_page_list = mem->vmp_snext;
450 			mem->vmp_snext = NULL;
451 
452 			assert(mem->vmp_wire_count == 0);
453 			assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
454 
455 			mem->vmp_q_state = VM_PAGE_IS_WIRED;
456 			mem->vmp_wire_count++;
457 			if (__improbable(mem->vmp_wire_count == 0)) {
458 				panic("kernel_memory_allocate(%p): wire_count overflow",
459 				    mem);
460 			}
461 
462 			vm_page_insert_wired(mem, object, offset + pg_offset, tag);
463 
464 			mem->vmp_busy = FALSE;
465 			mem->vmp_pmapped = TRUE;
466 			mem->vmp_wpmapped = TRUE;
467 
468 			PMAP_ENTER_OPTIONS(kernel_pmap, map_addr + pg_offset,
469 			    /* fault_phys_offset */ 0, mem,
470 			    VM_PROT_DEFAULT, VM_PROT_NONE,
471 			    ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
472 			    PMAP_OPTIONS_NOWAIT, pe_result);
473 
474 			if (pe_result == KERN_RESOURCE_SHORTAGE) {
475 				vm_object_unlock(object);
476 
477 				PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem,
478 				    VM_PROT_DEFAULT, VM_PROT_NONE,
479 				    ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
480 				    pe_result);
481 
482 				vm_object_lock(object);
483 			}
484 
485 			assert(pe_result == KERN_SUCCESS);
486 
487 			if (flags & KMA_NOENCRYPT) {
488 				bzero(CAST_DOWN(void *, (map_addr + pg_offset)), PAGE_SIZE);
489 
490 				pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
491 			}
492 		}
493 		if (kernel_object == object) {
494 			vm_tag_update_size(tag, fill_size);
495 		}
496 	}
497 
498 	if (guard_right) {
499 		vm_page_insert(guard_right, object, offset + pg_offset);
500 		guard_right->vmp_busy = FALSE;
501 		guard_right = VM_PAGE_NULL;
502 	} else {
503 		assert(fill_start + fill_size == map_size || map->never_faults);
504 	}
505 
506 	if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
507 		vm_page_lockspin_queues();
508 		vm_page_wire_count += wired_page_count;
509 		vm_page_unlock_queues();
510 	}
511 
512 	vm_object_unlock(object);
513 
514 	/*
515 	 * now that the pages are wired, we no longer have to fear coalesce
516 	 */
517 	if (object == kernel_object || object == compressor_object) {
518 		vm_map_simplify(map, map_addr);
519 	} else {
520 		vm_object_deallocate(object);
521 	}
522 
523 #if DEBUG || DEVELOPMENT
524 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
525 	    wired_page_count, 0, 0, 0);
526 #endif
527 
528 	*addrp = CAST_DOWN(vm_offset_t, map_addr);
529 	return KERN_SUCCESS;
530 
531 out:
532 	if (guard_left) {
533 		guard_left->vmp_snext = wired_page_list;
534 		wired_page_list = guard_left;
535 	}
536 	if (guard_right) {
537 		guard_right->vmp_snext = wired_page_list;
538 		wired_page_list = guard_right;
539 	}
540 	if (wired_page_list) {
541 		vm_page_free_list(wired_page_list, FALSE);
542 	}
543 	*addrp = 0;
544 
545 #if DEBUG || DEVELOPMENT
546 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
547 	    wired_page_count, 0, 0, 0);
548 #endif
549 	return kr;
550 }
551 
552 void
kernel_memory_populate_with_pages(vm_map_t map,vm_offset_t addr,vm_size_t size,vm_page_t page_list,kma_flags_t flags,vm_tag_t tag,vm_prot_t prot)553 kernel_memory_populate_with_pages(
554 	vm_map_t        map,
555 	vm_offset_t     addr,
556 	vm_size_t       size,
557 	vm_page_t       page_list,
558 	kma_flags_t     flags,
559 	vm_tag_t        tag,
560 	vm_prot_t       prot)
561 {
562 	vm_object_t     object;
563 	kern_return_t   pe_result;
564 	vm_page_t       mem;
565 	int             page_count = atop_64(size);
566 
567 	if (flags & KMA_COMPRESSOR) {
568 		panic("%s(%p,0x%llx,0x%llx,0x%x): KMA_COMPRESSOR", __func__,
569 		    map, (uint64_t) addr, (uint64_t) size, flags);
570 	}
571 
572 	if (flags & KMA_KOBJECT) {
573 		object = kernel_object;
574 
575 		vm_object_lock(object);
576 	} else {
577 		/*
578 		 * If it's not the kernel object, we need to:
579 		 *      lock map;
580 		 *      lookup entry;
581 		 *      lock object;
582 		 *	take reference on object;
583 		 *      unlock map;
584 		 */
585 		panic("%s(%p,0x%llx,0x%llx,0x%x): !KMA_KOBJECT", __func__,
586 		    map, (uint64_t) addr, (uint64_t) size, flags);
587 	}
588 
589 	for (vm_object_offset_t pg_offset = 0;
590 	    pg_offset < size;
591 	    pg_offset += PAGE_SIZE_64) {
592 		if (page_list == NULL) {
593 			panic("%s: page_list too short", __func__);
594 		}
595 
596 		mem = page_list;
597 		page_list = mem->vmp_snext;
598 		mem->vmp_snext = NULL;
599 
600 		assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
601 		mem->vmp_q_state = VM_PAGE_IS_WIRED;
602 		mem->vmp_wire_count++;
603 		if (mem->vmp_wire_count == 0) {
604 			panic("%s(%p): wire_count overflow", __func__, mem);
605 		}
606 
607 		vm_page_insert_wired(mem, object, addr + pg_offset, tag);
608 
609 		mem->vmp_busy = FALSE;
610 		mem->vmp_pmapped = TRUE;
611 		mem->vmp_wpmapped = TRUE;
612 
613 		PMAP_ENTER_OPTIONS(kernel_pmap, addr + pg_offset,
614 		    0, /* fault_phys_offset */
615 		    mem,
616 		    prot, VM_PROT_NONE,
617 		    ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
618 		    PMAP_OPTIONS_NOWAIT, pe_result);
619 
620 		if (pe_result == KERN_RESOURCE_SHORTAGE) {
621 			vm_object_unlock(object);
622 
623 			PMAP_ENTER(kernel_pmap, addr + pg_offset, mem,
624 			    prot, VM_PROT_NONE,
625 			    ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
626 			    pe_result);
627 
628 			vm_object_lock(object);
629 		}
630 
631 		assert(pe_result == KERN_SUCCESS);
632 
633 		if (flags & KMA_NOENCRYPT) {
634 			__nosan_bzero(CAST_DOWN(void *, (addr + pg_offset)), PAGE_SIZE);
635 			pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
636 		}
637 	}
638 	if (page_list) {
639 		panic("%s: page_list too long", __func__);
640 	}
641 	vm_object_unlock(object);
642 
643 	vm_page_lockspin_queues();
644 	vm_page_wire_count += page_count;
645 	vm_page_unlock_queues();
646 	vm_tag_update_size(tag, size);
647 
648 #if KASAN
649 	if (map == compressor_map) {
650 		kasan_notify_address_nopoison(addr, size);
651 	} else {
652 		kasan_notify_address(addr, size);
653 	}
654 #endif
655 }
656 
657 kern_return_t
kernel_memory_populate(vm_map_t map,vm_offset_t addr,vm_size_t size,kma_flags_t flags,vm_tag_t tag)658 kernel_memory_populate(
659 	vm_map_t        map,
660 	vm_offset_t     addr,
661 	vm_size_t       size,
662 	kma_flags_t     flags,
663 	vm_tag_t        tag)
664 {
665 	vm_object_t             object;
666 	vm_object_offset_t      offset, pg_offset;
667 	kern_return_t           kr = KERN_SUCCESS;
668 	vm_page_t               mem;
669 	vm_page_t               page_list = NULL;
670 	int                     page_count = atop_64(size);
671 
672 #if DEBUG || DEVELOPMENT
673 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START,
674 	    size, 0, 0, 0);
675 #endif
676 
677 	assert((flags & (KMA_COMPRESSOR | KMA_KOBJECT)) != (KMA_COMPRESSOR | KMA_KOBJECT));
678 
679 	if (flags & KMA_COMPRESSOR) {
680 		pg_offset = page_count * PAGE_SIZE_64;
681 
682 		do {
683 			for (;;) {
684 				mem = vm_page_grab();
685 
686 				if (mem != VM_PAGE_NULL) {
687 					break;
688 				}
689 
690 				VM_PAGE_WAIT();
691 			}
692 			if (KMA_ZERO & flags) {
693 				vm_page_zero_fill(mem);
694 			}
695 			mem->vmp_snext = page_list;
696 			page_list = mem;
697 
698 			pg_offset -= PAGE_SIZE_64;
699 
700 			kr = pmap_enter_options(kernel_pmap,
701 			    addr + pg_offset, VM_PAGE_GET_PHYS_PAGE(mem),
702 			    VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE,
703 			    PMAP_OPTIONS_INTERNAL, NULL);
704 			assert(kr == KERN_SUCCESS);
705 		} while (pg_offset);
706 
707 		offset = addr;
708 		object = compressor_object;
709 
710 		vm_object_lock(object);
711 
712 		for (pg_offset = 0;
713 		    pg_offset < size;
714 		    pg_offset += PAGE_SIZE_64) {
715 			mem = page_list;
716 			page_list = mem->vmp_snext;
717 			mem->vmp_snext = NULL;
718 
719 			vm_page_insert(mem, object, offset + pg_offset);
720 			assert(mem->vmp_busy);
721 
722 			mem->vmp_busy = FALSE;
723 			mem->vmp_pmapped = TRUE;
724 			mem->vmp_wpmapped = TRUE;
725 			mem->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
726 		}
727 		vm_object_unlock(object);
728 
729 #if KASAN
730 		if (map == compressor_map) {
731 			kasan_notify_address_nopoison(addr, size);
732 		} else {
733 			kasan_notify_address(addr, size);
734 		}
735 #endif
736 
737 #if DEBUG || DEVELOPMENT
738 		task_t task = current_task();
739 		if (task != NULL) {
740 			ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_count);
741 		}
742 #endif
743 	} else {
744 		kr = vm_page_alloc_list(page_count, flags, &page_list);
745 		if (kr == KERN_SUCCESS) {
746 			kernel_memory_populate_with_pages(map, addr, size,
747 			    page_list, flags, tag, VM_PROT_READ | VM_PROT_WRITE);
748 		}
749 	}
750 
751 #if DEBUG || DEVELOPMENT
752 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
753 	    page_count, 0, 0, 0);
754 #endif
755 	return kr;
756 }
757 
758 
759 void
kernel_memory_depopulate(vm_map_t map,vm_offset_t addr,vm_size_t size,kma_flags_t flags,vm_tag_t tag)760 kernel_memory_depopulate(
761 	vm_map_t           map,
762 	vm_offset_t        addr,
763 	vm_size_t          size,
764 	kma_flags_t        flags,
765 	vm_tag_t           tag)
766 {
767 	vm_object_t        object;
768 	vm_object_offset_t offset, pg_offset;
769 	vm_page_t          mem;
770 	vm_page_t          local_freeq = NULL;
771 	unsigned int       pages_unwired;
772 
773 	assert((flags & (KMA_COMPRESSOR | KMA_KOBJECT)) != (KMA_COMPRESSOR | KMA_KOBJECT));
774 
775 	if (flags & KMA_COMPRESSOR) {
776 		offset = addr;
777 		object = compressor_object;
778 
779 		vm_object_lock(object);
780 	} else if (flags & KMA_KOBJECT) {
781 		offset = addr;
782 		object = kernel_object;
783 		vm_object_lock(object);
784 	} else {
785 		offset = 0;
786 		object = NULL;
787 		/*
788 		 * If it's not the kernel object, we need to:
789 		 *      lock map;
790 		 *      lookup entry;
791 		 *      lock object;
792 		 *      unlock map;
793 		 */
794 		panic("kernel_memory_depopulate(%p,0x%llx,0x%llx,0x%x): "
795 		    "!KMA_KOBJECT",
796 		    map, (uint64_t) addr, (uint64_t) size, flags);
797 	}
798 	pmap_protect(kernel_map->pmap, offset, offset + size, VM_PROT_NONE);
799 
800 	for (pg_offset = 0, pages_unwired = 0;
801 	    pg_offset < size;
802 	    pg_offset += PAGE_SIZE_64) {
803 		mem = vm_page_lookup(object, offset + pg_offset);
804 
805 		assert(mem);
806 
807 		if (mem->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR) {
808 			pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
809 			pages_unwired++;
810 		}
811 
812 		mem->vmp_busy = TRUE;
813 
814 		assert(mem->vmp_tabled);
815 		vm_page_remove(mem, TRUE);
816 		assert(mem->vmp_busy);
817 
818 		assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
819 		assert((mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
820 		    (mem->vmp_q_state == VM_PAGE_IS_WIRED));
821 
822 		mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
823 		mem->vmp_snext = local_freeq;
824 		local_freeq = mem;
825 	}
826 	vm_object_unlock(object);
827 
828 
829 	if (local_freeq) {
830 		vm_page_free_list(local_freeq, TRUE);
831 		if (pages_unwired != 0) {
832 			vm_page_lockspin_queues();
833 			vm_page_wire_count -= pages_unwired;
834 			vm_page_unlock_queues();
835 			vm_tag_update_size(tag, -ptoa_64(pages_unwired));
836 		}
837 	}
838 }
839 
840 /*
841  *	kmem_realloc:
842  *
843  *	Reallocate wired-down memory in the kernel's address map
844  *	or a submap.  Newly allocated pages are not zeroed.
845  *	This can only be used on regions allocated with kmem_alloc.
846  *
847  *	If successful, the pages in the old region are mapped twice.
848  *	The old region is unchanged.  Use kmem_free to get rid of it.
849  */
850 kern_return_t
kmem_realloc(vm_map_t map,vm_offset_t oldaddr,vm_size_t oldsize,vm_offset_t * newaddrp,vm_size_t newsize,vm_tag_t tag)851 kmem_realloc(
852 	vm_map_t                map,
853 	vm_offset_t             oldaddr,
854 	vm_size_t               oldsize,
855 	vm_offset_t             *newaddrp,
856 	vm_size_t               newsize,
857 	vm_tag_t                tag)
858 {
859 	vm_object_t             object;
860 	vm_object_offset_t      offset;
861 	vm_map_offset_t         oldmapmin;
862 	vm_map_offset_t         oldmapmax;
863 	vm_map_offset_t         newmapaddr;
864 	vm_map_size_t           oldmapsize;
865 	vm_map_size_t           newmapsize;
866 	vm_map_entry_t          oldentry;
867 	vm_map_entry_t          newentry;
868 	vm_page_t               mem;
869 	kern_return_t           kr;
870 	vm_map_kernel_flags_t   vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
871 
872 	oldmapmin = vm_map_trunc_page(oldaddr,
873 	    VM_MAP_PAGE_MASK(map));
874 	oldmapmax = vm_map_round_page(oldaddr + oldsize,
875 	    VM_MAP_PAGE_MASK(map));
876 	oldmapsize = oldmapmax - oldmapmin;
877 	newmapsize = vm_map_round_page(newsize,
878 	    VM_MAP_PAGE_MASK(map));
879 	if (newmapsize < newsize) {
880 		/* overflow */
881 		*newaddrp = 0;
882 		return KERN_INVALID_ARGUMENT;
883 	}
884 
885 	/*
886 	 *	Find the VM object backing the old region.
887 	 */
888 
889 	vm_map_lock(map);
890 
891 	if (!vm_map_lookup_entry(map, oldmapmin, &oldentry)) {
892 		panic("kmem_realloc");
893 	}
894 	if (oldentry->vme_atomic) {
895 		vmk_flags.vmkf_atomic_entry = TRUE;
896 	}
897 	object = VME_OBJECT(oldentry);
898 
899 	/*
900 	 *	Increase the size of the object and
901 	 *	fill in the new region.
902 	 */
903 
904 	vm_object_reference(object);
905 	/* by grabbing the object lock before unlocking the map */
906 	/* we guarantee that we will panic if more than one     */
907 	/* attempt is made to realloc a kmem_alloc'd area       */
908 	vm_object_lock(object);
909 	vm_map_unlock(map);
910 	if (object->vo_size != oldmapsize) {
911 		panic("kmem_realloc");
912 	}
913 	object->vo_size = newmapsize;
914 	vm_object_unlock(object);
915 
916 	/* allocate the new pages while expanded portion of the */
917 	/* object is still not mapped */
918 	kmem_alloc_pages(object, vm_object_round_page(oldmapsize),
919 	    vm_object_round_page(newmapsize - oldmapsize));
920 
921 	/*
922 	 *	Find space for the new region.
923 	 */
924 
925 	kr = vm_map_find_space(map, &newmapaddr, newmapsize,
926 	    (vm_map_offset_t)0, vmk_flags, tag, &newentry);
927 	if (kr != KERN_SUCCESS) {
928 		vm_object_lock(object);
929 		for (offset = oldmapsize;
930 		    offset < newmapsize; offset += PAGE_SIZE) {
931 			if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
932 				VM_PAGE_FREE(mem);
933 			}
934 		}
935 		object->vo_size = oldmapsize;
936 		vm_object_unlock(object);
937 		vm_object_deallocate(object);
938 		return kr;
939 	}
940 	VME_OBJECT_SET(newentry, object);
941 	VME_OFFSET_SET(newentry, 0);
942 	assert(newentry->wired_count == 0);
943 
944 
945 	/* add an extra reference in case we have someone doing an */
946 	/* unexpected deallocate */
947 	vm_object_reference(object);
948 	vm_map_unlock(map);
949 
950 	kr = vm_map_wire_kernel(map, newmapaddr, newmapaddr + newmapsize,
951 	    VM_PROT_DEFAULT, tag, FALSE);
952 	if (KERN_SUCCESS != kr) {
953 		vm_map_remove(map, newmapaddr, newmapaddr + newmapsize, VM_MAP_REMOVE_NO_FLAGS);
954 		vm_object_lock(object);
955 		for (offset = oldsize; offset < newmapsize; offset += PAGE_SIZE) {
956 			if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
957 				VM_PAGE_FREE(mem);
958 			}
959 		}
960 		object->vo_size = oldmapsize;
961 		vm_object_unlock(object);
962 		vm_object_deallocate(object);
963 		return kr;
964 	}
965 	vm_object_deallocate(object);
966 
967 	if (kernel_object == object) {
968 		vm_tag_update_size(tag, newmapsize);
969 	}
970 
971 	*newaddrp = CAST_DOWN(vm_offset_t, newmapaddr);
972 	return KERN_SUCCESS;
973 }
974 
975 /*
976  *	kmem_alloc:
977  *
978  *	Allocate wired-down memory in the kernel's address map
979  *	or a submap.  The memory is not zero-filled.
980  */
981 
982 kern_return_t
kmem_alloc_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)983 kmem_alloc_external(
984 	vm_map_t        map,
985 	vm_offset_t     *addrp,
986 	vm_size_t       size)
987 {
988 	return kmem_alloc(map, addrp, size, vm_tag_bt());
989 }
990 
991 
992 /*
993  *	kmem_alloc_kobject:
994  *
995  *	Allocate wired-down memory in the kernel's address map
996  *	or a submap.  The memory is not zero-filled.
997  *
998  *	The memory is allocated in the kernel_object.
999  *	It may not be copied with vm_map_copy, and
1000  *	it may not be reallocated with kmem_realloc.
1001  */
1002 
1003 kern_return_t
kmem_alloc_kobject_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1004 kmem_alloc_kobject_external(
1005 	vm_map_t        map,
1006 	vm_offset_t     *addrp,
1007 	vm_size_t       size)
1008 {
1009 	return kmem_alloc_kobject(map, addrp, size, vm_tag_bt());
1010 }
1011 
1012 /*
1013  *	kmem_alloc_pageable:
1014  *
1015  *	Allocate pageable memory in the kernel's address map.
1016  */
1017 
1018 kern_return_t
kmem_alloc_pageable_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1019 kmem_alloc_pageable_external(
1020 	vm_map_t        map,
1021 	vm_offset_t     *addrp,
1022 	vm_size_t       size)
1023 {
1024 	return kmem_alloc_pageable(map, addrp, size, vm_tag_bt());
1025 }
1026 
1027 /*
1028  *	kmem_free:
1029  *
1030  *	Release a region of kernel virtual memory allocated
1031  *	with kmem_alloc, kmem_alloc_kobject, or kmem_alloc_pageable,
1032  *	and return the physical pages associated with that region.
1033  */
1034 
1035 void
kmem_free(vm_map_t map,vm_offset_t addr,vm_size_t size)1036 kmem_free(
1037 	vm_map_t        map,
1038 	vm_offset_t     addr,
1039 	vm_size_t       size)
1040 {
1041 	kern_return_t kr;
1042 
1043 	assert(addr >= VM_MIN_KERNEL_AND_KEXT_ADDRESS);
1044 
1045 	if (size == 0) {
1046 #if MACH_ASSERT
1047 		printf("kmem_free called with size==0 for map: %p with addr: 0x%llx\n", map, (uint64_t)addr);
1048 #endif
1049 		return;
1050 	}
1051 
1052 	kr = vm_map_remove(map,
1053 	    vm_map_trunc_page(addr,
1054 	    VM_MAP_PAGE_MASK(map)),
1055 	    vm_map_round_page(addr + size,
1056 	    VM_MAP_PAGE_MASK(map)),
1057 	    VM_MAP_REMOVE_KUNWIRE);
1058 	if (kr != KERN_SUCCESS) {
1059 		panic("kmem_free");
1060 	}
1061 }
1062 
1063 /*
1064  *	Allocate new pages in an object.
1065  */
1066 
1067 kern_return_t
kmem_alloc_pages(vm_object_t object,vm_object_offset_t offset,vm_object_size_t size)1068 kmem_alloc_pages(
1069 	vm_object_t             object,
1070 	vm_object_offset_t      offset,
1071 	vm_object_size_t        size)
1072 {
1073 	vm_object_size_t                alloc_size;
1074 
1075 	alloc_size = vm_object_round_page(size);
1076 	vm_object_lock(object);
1077 	while (alloc_size) {
1078 		vm_page_t   mem;
1079 
1080 
1081 		/*
1082 		 *	Allocate a page
1083 		 */
1084 		while (VM_PAGE_NULL ==
1085 		    (mem = vm_page_alloc(object, offset))) {
1086 			vm_object_unlock(object);
1087 			VM_PAGE_WAIT();
1088 			vm_object_lock(object);
1089 		}
1090 		mem->vmp_busy = FALSE;
1091 
1092 		alloc_size -= PAGE_SIZE;
1093 		offset += PAGE_SIZE;
1094 	}
1095 	vm_object_unlock(object);
1096 	return KERN_SUCCESS;
1097 }
1098 
1099 /*
1100  *	kmem_suballoc:
1101  *
1102  *	Allocates a map to manage a subrange
1103  *	of the kernel virtual address space.
1104  *
1105  *	Arguments are as follows:
1106  *
1107  *	parent		Map to take range from
1108  *	addr		Address of start of range (IN/OUT)
1109  *	size		Size of range to find
1110  *	pageable	Can region be paged
1111  *	anywhere	Can region be located anywhere in map
1112  *	new_map		Pointer to new submap
1113  */
1114 kern_return_t
kmem_suballoc(vm_map_t parent,vm_offset_t * addr,vm_size_t size,vm_map_create_options_t vmc_options,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,vm_map_t * new_map)1115 kmem_suballoc(
1116 	vm_map_t        parent,
1117 	vm_offset_t     *addr,
1118 	vm_size_t       size,
1119 	vm_map_create_options_t vmc_options,
1120 	int             flags,
1121 	vm_map_kernel_flags_t vmk_flags,
1122 	vm_tag_t    tag,
1123 	vm_map_t        *new_map)
1124 {
1125 	vm_map_t        map;
1126 	vm_map_offset_t map_addr;
1127 	vm_map_size_t   map_size;
1128 	kern_return_t   kr;
1129 
1130 	map_size = vm_map_round_page(size, VM_MAP_PAGE_MASK(parent));
1131 	if (map_size < size) {
1132 		/* overflow */
1133 		*addr = 0;
1134 		return KERN_INVALID_ARGUMENT;
1135 	}
1136 
1137 	/*
1138 	 *	Need reference on submap object because it is internal
1139 	 *	to the vm_system.  vm_object_enter will never be called
1140 	 *	on it (usual source of reference for vm_map_enter).
1141 	 */
1142 	vm_object_reference(vm_submap_object);
1143 
1144 	map_addr = ((flags & VM_FLAGS_ANYWHERE)
1145 	    ? vm_map_min(parent)
1146 	    : vm_map_trunc_page(*addr,
1147 	    VM_MAP_PAGE_MASK(parent)));
1148 
1149 	kr = vm_map_enter(parent, &map_addr, map_size,
1150 	    (vm_map_offset_t) 0, flags, vmk_flags, tag,
1151 	    vm_submap_object, (vm_object_offset_t) 0, FALSE,
1152 	    VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1153 	if (kr != KERN_SUCCESS) {
1154 		vm_object_deallocate(vm_submap_object);
1155 		return kr;
1156 	}
1157 
1158 	pmap_reference(vm_map_pmap(parent));
1159 	map = vm_map_create_options(vm_map_pmap(parent), map_addr,
1160 	    map_addr + map_size, vmc_options);
1161 	/* inherit the parent map's page size */
1162 	vm_map_set_page_shift(map, VM_MAP_PAGE_SHIFT(parent));
1163 
1164 	kr = vm_map_submap(parent, map_addr, map_addr + map_size, map, map_addr, FALSE);
1165 	if (kr != KERN_SUCCESS) {
1166 		/*
1167 		 * See comment preceding vm_map_submap().
1168 		 */
1169 		vm_map_remove(parent, map_addr, map_addr + map_size,
1170 		    VM_MAP_REMOVE_NO_FLAGS);
1171 		vm_map_deallocate(map); /* also removes ref to pmap */
1172 		vm_object_deallocate(vm_submap_object);
1173 		return kr;
1174 	}
1175 	*addr = CAST_DOWN(vm_offset_t, map_addr);
1176 	*new_map = map;
1177 	return KERN_SUCCESS;
1178 }
1179 /*
1180  * The default percentage of memory that can be mlocked is scaled based on the total
1181  * amount of memory in the system. These percentages are caclulated
1182  * offline and stored in this table. We index this table by
1183  * log2(max_mem) - VM_USER_WIREABLE_MIN_CONFIG. We clamp this index in the range
1184  * [0, sizeof(wire_limit_percents) / sizeof(vm_map_size_t))
1185  *
1186  * Note that these values were picked for mac.
1187  * If we ever have very large memory config arm devices, we may want to revisit
1188  * since the kernel overhead is smaller there due to the larger page size.
1189  */
1190 
1191 /* Start scaling iff we're managing > 2^32 = 4GB of RAM. */
1192 #define VM_USER_WIREABLE_MIN_CONFIG 32
1193 #if CONFIG_JETSAM
1194 /* Systems with jetsam can wire a bit more b/c the system can relieve wired
1195  * pressure.
1196  */
1197 static vm_map_size_t wire_limit_percents[] =
1198 { 80, 80, 80, 80, 82, 85, 88, 91, 94, 97};
1199 #else
1200 static vm_map_size_t wire_limit_percents[] =
1201 { 70, 73, 76, 79, 82, 85, 88, 91, 94, 97};
1202 #endif /* CONFIG_JETSAM */
1203 
1204 /*
1205  * Sets the default global user wire limit which limits the amount of
1206  * memory that can be locked via mlock() based on the above algorithm..
1207  * This can be overridden via a sysctl.
1208  */
1209 static void
kmem_set_user_wire_limits(void)1210 kmem_set_user_wire_limits(void)
1211 {
1212 	uint64_t available_mem_log;
1213 	uint64_t max_wire_percent;
1214 	size_t wire_limit_percents_length = sizeof(wire_limit_percents) /
1215 	    sizeof(vm_map_size_t);
1216 	vm_map_size_t limit;
1217 	uint64_t config_memsize = max_mem;
1218 #if defined(XNU_TARGET_OS_OSX)
1219 	config_memsize = max_mem_actual;
1220 #endif /* defined(XNU_TARGET_OS_OSX) */
1221 
1222 	available_mem_log = bit_floor(config_memsize);
1223 
1224 	if (available_mem_log < VM_USER_WIREABLE_MIN_CONFIG) {
1225 		available_mem_log = 0;
1226 	} else {
1227 		available_mem_log -= VM_USER_WIREABLE_MIN_CONFIG;
1228 	}
1229 	if (available_mem_log >= wire_limit_percents_length) {
1230 		available_mem_log = wire_limit_percents_length - 1;
1231 	}
1232 	max_wire_percent = wire_limit_percents[available_mem_log];
1233 
1234 	limit = config_memsize * max_wire_percent / 100;
1235 	/* Cap the number of non lockable bytes at VM_NOT_USER_WIREABLE_MAX */
1236 	if (config_memsize - limit > VM_NOT_USER_WIREABLE_MAX) {
1237 		limit = config_memsize - VM_NOT_USER_WIREABLE_MAX;
1238 	}
1239 
1240 	vm_global_user_wire_limit = limit;
1241 	/* the default per task limit is the same as the global limit */
1242 	vm_per_task_user_wire_limit = limit;
1243 	vm_add_wire_count_over_global_limit = 0;
1244 	vm_add_wire_count_over_user_limit = 0;
1245 }
1246 
1247 
1248 /*
1249  *	kmem_init:
1250  *
1251  *	Initialize the kernel's virtual memory map, taking
1252  *	into account all memory allocated up to this time.
1253  */
1254 __startup_func
1255 void
kmem_init(vm_offset_t start,vm_offset_t end)1256 kmem_init(
1257 	vm_offset_t     start,
1258 	vm_offset_t     end)
1259 {
1260 	vm_map_offset_t map_start;
1261 	vm_map_offset_t map_end;
1262 	vm_map_kernel_flags_t vmk_flags;
1263 
1264 	vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1265 	vmk_flags.vmkf_permanent = TRUE;
1266 	vmk_flags.vmkf_no_pmap_check = TRUE;
1267 
1268 	map_start = vm_map_trunc_page(start,
1269 	    VM_MAP_PAGE_MASK(kernel_map));
1270 	map_end = vm_map_round_page(end,
1271 	    VM_MAP_PAGE_MASK(kernel_map));
1272 
1273 #if     defined(__arm__) || defined(__arm64__)
1274 	kernel_map = vm_map_create_options(pmap_kernel(),
1275 	    VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1276 	    VM_MAX_KERNEL_ADDRESS,
1277 	    VM_MAP_CREATE_DEFAULT);
1278 	/*
1279 	 *	Reserve virtual memory allocated up to this time.
1280 	 */
1281 	{
1282 		unsigned int    region_select = 0;
1283 		vm_map_offset_t region_start;
1284 		vm_map_size_t   region_size;
1285 		vm_map_offset_t map_addr;
1286 		kern_return_t kr;
1287 
1288 		while (pmap_virtual_region(region_select, &region_start, &region_size)) {
1289 			map_addr = region_start;
1290 			kr = vm_map_enter(kernel_map, &map_addr,
1291 			    vm_map_round_page(region_size,
1292 			    VM_MAP_PAGE_MASK(kernel_map)),
1293 			    (vm_map_offset_t) 0,
1294 			    VM_FLAGS_FIXED,
1295 			    vmk_flags,
1296 			    VM_KERN_MEMORY_NONE,
1297 			    VM_OBJECT_NULL,
1298 			    (vm_object_offset_t) 0, FALSE, VM_PROT_NONE, VM_PROT_NONE,
1299 			    VM_INHERIT_DEFAULT);
1300 
1301 			if (kr != KERN_SUCCESS) {
1302 				panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
1303 				    (uint64_t) start, (uint64_t) end, (uint64_t) region_start,
1304 				    (uint64_t) region_size, kr);
1305 			}
1306 
1307 			region_select++;
1308 		}
1309 	}
1310 #else
1311 	kernel_map = vm_map_create_options(pmap_kernel(),
1312 	    VM_MIN_KERNEL_AND_KEXT_ADDRESS, map_end,
1313 	    VM_MAP_CREATE_DEFAULT);
1314 	/*
1315 	 *	Reserve virtual memory allocated up to this time.
1316 	 */
1317 	if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
1318 		vm_map_offset_t map_addr;
1319 		kern_return_t kr;
1320 
1321 		vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1322 		vmk_flags.vmkf_no_pmap_check = TRUE;
1323 
1324 		map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
1325 		kr = vm_map_enter(kernel_map,
1326 		    &map_addr,
1327 		    (vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
1328 		    (vm_map_offset_t) 0,
1329 		    VM_FLAGS_FIXED,
1330 		    vmk_flags,
1331 		    VM_KERN_MEMORY_NONE,
1332 		    VM_OBJECT_NULL,
1333 		    (vm_object_offset_t) 0, FALSE,
1334 		    VM_PROT_NONE, VM_PROT_NONE,
1335 		    VM_INHERIT_DEFAULT);
1336 
1337 		if (kr != KERN_SUCCESS) {
1338 			panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
1339 			    (uint64_t) start, (uint64_t) end,
1340 			    (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1341 			    (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
1342 			    kr);
1343 		}
1344 	}
1345 #endif
1346 
1347 	kmem_set_user_wire_limits();
1348 }
1349 
1350 /*
1351  *	Routine:	copyinmap
1352  *	Purpose:
1353  *		Like copyin, except that fromaddr is an address
1354  *		in the specified VM map.  This implementation
1355  *		is incomplete; it handles the current user map
1356  *		and the kernel map/submaps.
1357  */
1358 kern_return_t
copyinmap(vm_map_t map,vm_map_offset_t fromaddr,void * todata,vm_size_t length)1359 copyinmap(
1360 	vm_map_t                map,
1361 	vm_map_offset_t         fromaddr,
1362 	void                    *todata,
1363 	vm_size_t               length)
1364 {
1365 	kern_return_t   kr = KERN_SUCCESS;
1366 	vm_map_t oldmap;
1367 
1368 	if (vm_map_pmap(map) == pmap_kernel()) {
1369 		/* assume a correct copy */
1370 		memcpy(todata, CAST_DOWN(void *, fromaddr), length);
1371 	} else if (current_map() == map) {
1372 		if (copyin(fromaddr, todata, length) != 0) {
1373 			kr = KERN_INVALID_ADDRESS;
1374 		}
1375 	} else {
1376 		vm_map_reference(map);
1377 		oldmap = vm_map_switch(map);
1378 		if (copyin(fromaddr, todata, length) != 0) {
1379 			kr = KERN_INVALID_ADDRESS;
1380 		}
1381 		vm_map_switch(oldmap);
1382 		vm_map_deallocate(map);
1383 	}
1384 	return kr;
1385 }
1386 
1387 /*
1388  *	Routine:	copyoutmap
1389  *	Purpose:
1390  *		Like copyout, except that toaddr is an address
1391  *		in the specified VM map.
1392  */
1393 kern_return_t
copyoutmap(vm_map_t map,void * fromdata,vm_map_address_t toaddr,vm_size_t length)1394 copyoutmap(
1395 	vm_map_t                map,
1396 	void                    *fromdata,
1397 	vm_map_address_t        toaddr,
1398 	vm_size_t               length)
1399 {
1400 	kern_return_t   kr = KERN_SUCCESS;
1401 	vm_map_t        oldmap;
1402 
1403 	if (vm_map_pmap(map) == pmap_kernel()) {
1404 		/* assume a correct copy */
1405 		memcpy(CAST_DOWN(void *, toaddr), fromdata, length);
1406 	} else if (current_map() == map) {
1407 		if (copyout(fromdata, toaddr, length) != 0) {
1408 			kr = KERN_INVALID_ADDRESS;
1409 		}
1410 	} else {
1411 		vm_map_reference(map);
1412 		oldmap = vm_map_switch(map);
1413 		if (copyout(fromdata, toaddr, length) != 0) {
1414 			kr = KERN_INVALID_ADDRESS;
1415 		}
1416 		vm_map_switch(oldmap);
1417 		vm_map_deallocate(map);
1418 	}
1419 	return kr;
1420 }
1421 
1422 /*
1423  *	Routine:	copyoutmap_atomic{32, 64}
1424  *	Purpose:
1425  *		Like copyoutmap, except that the operation is atomic.
1426  *      Takes in value rather than *fromdata pointer.
1427  */
1428 kern_return_t
copyoutmap_atomic32(vm_map_t map,uint32_t value,vm_map_address_t toaddr)1429 copyoutmap_atomic32(
1430 	vm_map_t                map,
1431 	uint32_t                value,
1432 	vm_map_address_t        toaddr)
1433 {
1434 	kern_return_t   kr = KERN_SUCCESS;
1435 	vm_map_t        oldmap;
1436 
1437 	if (vm_map_pmap(map) == pmap_kernel()) {
1438 		/* assume a correct toaddr */
1439 		*(uint32_t *)toaddr = value;
1440 	} else if (current_map() == map) {
1441 		if (copyout_atomic32(value, toaddr) != 0) {
1442 			kr = KERN_INVALID_ADDRESS;
1443 		}
1444 	} else {
1445 		vm_map_reference(map);
1446 		oldmap = vm_map_switch(map);
1447 		if (copyout_atomic32(value, toaddr) != 0) {
1448 			kr = KERN_INVALID_ADDRESS;
1449 		}
1450 		vm_map_switch(oldmap);
1451 		vm_map_deallocate(map);
1452 	}
1453 	return kr;
1454 }
1455 
1456 kern_return_t
copyoutmap_atomic64(vm_map_t map,uint64_t value,vm_map_address_t toaddr)1457 copyoutmap_atomic64(
1458 	vm_map_t                map,
1459 	uint64_t                value,
1460 	vm_map_address_t        toaddr)
1461 {
1462 	kern_return_t   kr = KERN_SUCCESS;
1463 	vm_map_t        oldmap;
1464 
1465 	if (vm_map_pmap(map) == pmap_kernel()) {
1466 		/* assume a correct toaddr */
1467 		*(uint64_t *)toaddr = value;
1468 	} else if (current_map() == map) {
1469 		if (copyout_atomic64(value, toaddr) != 0) {
1470 			kr = KERN_INVALID_ADDRESS;
1471 		}
1472 	} else {
1473 		vm_map_reference(map);
1474 		oldmap = vm_map_switch(map);
1475 		if (copyout_atomic64(value, toaddr) != 0) {
1476 			kr = KERN_INVALID_ADDRESS;
1477 		}
1478 		vm_map_switch(oldmap);
1479 		vm_map_deallocate(map);
1480 	}
1481 	return kr;
1482 }
1483 
1484 /*
1485  *
1486  *	The following two functions are to be used when exposing kernel
1487  *	addresses to userspace via any of the various debug or info
1488  *	facilities that exist. These are basically the same as VM_KERNEL_ADDRPERM()
1489  *	and VM_KERNEL_UNSLIDE_OR_PERM() except they use a different random seed and
1490  *	are exported to KEXTs.
1491  *
1492  *	NOTE: USE THE MACRO VERSIONS OF THESE FUNCTIONS (in vm_param.h) FROM WITHIN THE KERNEL
1493  */
1494 
1495 static void
vm_kernel_addrhash_internal(vm_offset_t addr,vm_offset_t * hash_addr,uint64_t salt)1496 vm_kernel_addrhash_internal(
1497 	vm_offset_t addr,
1498 	vm_offset_t *hash_addr,
1499 	uint64_t salt)
1500 {
1501 	assert(salt != 0);
1502 
1503 	if (addr == 0) {
1504 		*hash_addr = 0;
1505 		return;
1506 	}
1507 
1508 	if (VM_KERNEL_IS_SLID(addr)) {
1509 		*hash_addr = VM_KERNEL_UNSLIDE(addr);
1510 		return;
1511 	}
1512 
1513 	vm_offset_t sha_digest[SHA256_DIGEST_LENGTH / sizeof(vm_offset_t)];
1514 	SHA256_CTX sha_ctx;
1515 
1516 	SHA256_Init(&sha_ctx);
1517 	SHA256_Update(&sha_ctx, &salt, sizeof(salt));
1518 	SHA256_Update(&sha_ctx, &addr, sizeof(addr));
1519 	SHA256_Final(sha_digest, &sha_ctx);
1520 
1521 	*hash_addr = sha_digest[0];
1522 }
1523 
1524 void
vm_kernel_addrhash_external(vm_offset_t addr,vm_offset_t * hash_addr)1525 vm_kernel_addrhash_external(
1526 	vm_offset_t addr,
1527 	vm_offset_t *hash_addr)
1528 {
1529 	return vm_kernel_addrhash_internal(addr, hash_addr, vm_kernel_addrhash_salt_ext);
1530 }
1531 
1532 vm_offset_t
vm_kernel_addrhash(vm_offset_t addr)1533 vm_kernel_addrhash(vm_offset_t addr)
1534 {
1535 	vm_offset_t hash_addr;
1536 	vm_kernel_addrhash_internal(addr, &hash_addr, vm_kernel_addrhash_salt);
1537 	return hash_addr;
1538 }
1539 
1540 void
vm_kernel_addrhide(vm_offset_t addr,vm_offset_t * hide_addr)1541 vm_kernel_addrhide(
1542 	vm_offset_t addr,
1543 	vm_offset_t *hide_addr)
1544 {
1545 	*hide_addr = VM_KERNEL_ADDRHIDE(addr);
1546 }
1547 
1548 /*
1549  *	vm_kernel_addrperm_external:
1550  *	vm_kernel_unslide_or_perm_external:
1551  *
1552  *	Use these macros when exposing an address to userspace that could come from
1553  *	either kernel text/data *or* the heap.
1554  */
1555 void
vm_kernel_addrperm_external(vm_offset_t addr,vm_offset_t * perm_addr)1556 vm_kernel_addrperm_external(
1557 	vm_offset_t addr,
1558 	vm_offset_t *perm_addr)
1559 {
1560 	if (VM_KERNEL_IS_SLID(addr)) {
1561 		*perm_addr = VM_KERNEL_UNSLIDE(addr);
1562 	} else if (VM_KERNEL_ADDRESS(addr)) {
1563 		*perm_addr = addr + vm_kernel_addrperm_ext;
1564 	} else {
1565 		*perm_addr = addr;
1566 	}
1567 }
1568 
1569 void
vm_kernel_unslide_or_perm_external(vm_offset_t addr,vm_offset_t * up_addr)1570 vm_kernel_unslide_or_perm_external(
1571 	vm_offset_t addr,
1572 	vm_offset_t *up_addr)
1573 {
1574 	vm_kernel_addrperm_external(addr, up_addr);
1575 }
1576 
1577 void
vm_packing_pointer_invalid(vm_offset_t ptr,vm_packing_params_t params)1578 vm_packing_pointer_invalid(vm_offset_t ptr, vm_packing_params_t params)
1579 {
1580 	if (ptr & ((1ul << params.vmpp_shift) - 1)) {
1581 		panic("pointer %p can't be packed: low %d bits aren't 0",
1582 		    (void *)ptr, params.vmpp_shift);
1583 	} else if (ptr <= params.vmpp_base) {
1584 		panic("pointer %p can't be packed: below base %p",
1585 		    (void *)ptr, (void *)params.vmpp_base);
1586 	} else {
1587 		panic("pointer %p can't be packed: maximum encodable pointer is %p",
1588 		    (void *)ptr, (void *)vm_packing_max_packable(params));
1589 	}
1590 }
1591 
1592 void
vm_packing_verify_range(const char * subsystem,vm_offset_t min_address,vm_offset_t max_address,vm_packing_params_t params)1593 vm_packing_verify_range(
1594 	const char *subsystem,
1595 	vm_offset_t min_address,
1596 	vm_offset_t max_address,
1597 	vm_packing_params_t params)
1598 {
1599 	if (min_address > max_address) {
1600 		panic("%s: %s range invalid min:%p > max:%p",
1601 		    __func__, subsystem, (void *)min_address, (void *)max_address);
1602 	}
1603 
1604 	if (!params.vmpp_base_relative) {
1605 		return;
1606 	}
1607 
1608 	if (min_address <= params.vmpp_base) {
1609 		panic("%s: %s range invalid min:%p <= base:%p",
1610 		    __func__, subsystem, (void *)min_address, (void *)params.vmpp_base);
1611 	}
1612 
1613 	if (max_address > vm_packing_max_packable(params)) {
1614 		panic("%s: %s range invalid max:%p >= max packable:%p",
1615 		    __func__, subsystem, (void *)max_address,
1616 		    (void *)vm_packing_max_packable(params));
1617 	}
1618 }
1619