xref: /xnu-8020.121.3/osfmk/vm/vm_kern.c (revision fdd8201d7b966f0c3ea610489d29bd841d358941)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 /*
59  *	File:	vm/vm_kern.c
60  *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
61  *	Date:	1985
62  *
63  *	Kernel memory management.
64  */
65 
66 #include <mach/kern_return.h>
67 #include <mach/vm_param.h>
68 #include <kern/assert.h>
69 #include <kern/thread.h>
70 #include <vm/vm_kern.h>
71 #include <vm/vm_map_internal.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_compressor.h>
75 #include <vm/vm_pageout.h>
76 #include <vm/vm_init.h>
77 #include <kern/misc_protos.h>
78 #include <vm/cpm.h>
79 #include <kern/ledger.h>
80 #include <kern/bits.h>
81 #include <kern/startup.h>
82 
83 #include <string.h>
84 
85 #include <libkern/OSDebug.h>
86 #include <libkern/crypto/sha2.h>
87 #include <libkern/section_keywords.h>
88 #include <sys/kdebug.h>
89 
90 #include <san/kasan.h>
91 #include <kern/kext_alloc.h>
92 
93 /*
94  *	Variables exported by this module.
95  */
96 
97 SECURITY_READ_ONLY_LATE(vm_map_t) kernel_map;
98 SECURITY_READ_ONLY_LATE(struct kmem_range) kmem_ranges[KMEM_RANGE_COUNT] = {};
99 #if ZSECURITY_CONFIG(KERNEL_DATA_SPLIT)
100 SECURITY_READ_ONLY_LATE(struct kmem_range)
101 kmem_large_ranges[KMEM_RANGE_COUNT] = {};
102 #endif
103 
104 /*
105  * Forward declarations for internal functions.
106  */
107 extern kern_return_t kmem_alloc_pages(
108 	vm_object_t             object,
109 	vm_object_offset_t      offset,
110 	vm_object_size_t        size);
111 
112 #pragma mark kmem range methods
113 
114 __attribute__((overloadable))
115 __header_always_inline bool
kmem_range_contains(const struct kmem_range * r,vm_offset_t addr)116 kmem_range_contains(const struct kmem_range *r, vm_offset_t addr)
117 {
118 	vm_offset_t rmin, rmax;
119 
120 #if CONFIG_KERNEL_TBI
121 	addr = VM_KERNEL_TBI_FILL(addr);
122 #endif /* CONFIG_KERNEL_TBI */
123 
124 	/*
125 	 * The `&` is not a typo: we really expect the check to pass,
126 	 * so encourage the compiler to eagerly load and test without branches
127 	 */
128 	kmem_range_load(r, rmin, rmax);
129 	return (addr >= rmin) & (addr < rmax);
130 }
131 
132 __attribute__((overloadable))
133 __header_always_inline bool
kmem_range_contains(const struct kmem_range * r,vm_offset_t addr,vm_offset_t size)134 kmem_range_contains(const struct kmem_range *r, vm_offset_t addr, vm_offset_t size)
135 {
136 	vm_offset_t rmin, rmax;
137 
138 #if CONFIG_KERNEL_TBI
139 	addr = VM_KERNEL_TBI_FILL(addr);
140 #endif /* CONFIG_KERNEL_TBI */
141 
142 	/*
143 	 * The `&` is not a typo: we really expect the check to pass,
144 	 * so encourage the compiler to eagerly load and test without branches
145 	 */
146 	kmem_range_load(r, rmin, rmax);
147 	return (addr >= rmin) & (addr + size >= rmin) & (addr + size <= rmax);
148 }
149 
150 __header_always_inline vm_size_t
kmem_range_size(const struct kmem_range * r)151 kmem_range_size(const struct kmem_range *r)
152 {
153 	vm_offset_t rmin, rmax;
154 
155 	kmem_range_load(r, rmin, rmax);
156 	return rmax - rmin;
157 }
158 
159 bool
kmem_range_id_contains(kmem_range_id_t range_id,vm_map_offset_t addr,vm_map_size_t size)160 kmem_range_id_contains(kmem_range_id_t range_id, vm_map_offset_t addr,
161     vm_map_size_t size)
162 {
163 	return kmem_range_contains(&kmem_ranges[range_id], addr, size);
164 }
165 
166 kmem_range_id_t
kmem_addr_get_range(vm_map_offset_t addr,vm_map_size_t size)167 kmem_addr_get_range(vm_map_offset_t addr, vm_map_size_t size)
168 {
169 	kmem_range_id_t range_id = 0;
170 	for (; range_id < KMEM_RANGE_COUNT; range_id++) {
171 		if (kmem_range_id_contains(range_id, addr, size)) {
172 			break;
173 		}
174 	}
175 	return range_id;
176 }
177 
178 
179 
180 kern_return_t
kmem_alloc_contig(vm_map_t map,vm_offset_t * addrp,vm_size_t size,vm_offset_t mask,ppnum_t max_pnum,ppnum_t pnum_mask,kma_flags_t flags,vm_tag_t tag)181 kmem_alloc_contig(
182 	vm_map_t                map,
183 	vm_offset_t             *addrp,
184 	vm_size_t               size,
185 	vm_offset_t             mask,
186 	ppnum_t                 max_pnum,
187 	ppnum_t                 pnum_mask,
188 	kma_flags_t             flags,
189 	vm_tag_t                tag)
190 {
191 	vm_object_t             object;
192 	vm_object_offset_t      offset;
193 	vm_map_offset_t         map_addr;
194 	vm_map_offset_t         map_mask;
195 	vm_map_size_t           map_size, i;
196 	vm_map_entry_t          entry;
197 	vm_page_t               m, pages;
198 	kern_return_t           kr;
199 	vm_map_kernel_flags_t   vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
200 
201 	assert(VM_KERN_MEMORY_NONE != tag);
202 	assert(map);
203 	assert3u(flags & ~KMEM_ALLOC_CONTIG_FLAGS, ==, 0);
204 
205 	map_size = vm_map_round_page(size, VM_MAP_PAGE_MASK(map));
206 	map_mask = (vm_map_offset_t)mask;
207 
208 	/* Check for zero allocation size (either directly or via overflow) */
209 	if (map_size == 0) {
210 		*addrp = 0;
211 		return KERN_INVALID_ARGUMENT;
212 	}
213 
214 	/*
215 	 *	Allocate a new object (if necessary) and the reference we
216 	 *	will be donating to the map entry.  We must do this before
217 	 *	locking the map, or risk deadlock with the default pager.
218 	 */
219 	if ((flags & KMA_KOBJECT) != 0) {
220 		object = kernel_object;
221 		vm_object_reference(object);
222 	} else {
223 		object = vm_object_allocate(map_size);
224 	}
225 	if (flags & KMA_PERMANENT) {
226 		vmk_flags.vmkf_permanent = true;
227 	}
228 	if (flags & KMA_DATA) {
229 		vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
230 		if (flags & KMA_PERMANENT) {
231 			vmk_flags.vmkf_last_free = true;
232 		}
233 	}
234 
235 	kr = vm_map_find_space(map, 0, map_size, map_mask,
236 	    vmk_flags, &entry);
237 	if (KERN_SUCCESS != kr) {
238 		vm_object_deallocate(object);
239 		return kr;
240 	}
241 
242 	map_addr = entry->vme_start;
243 	if (object == kernel_object) {
244 		offset = map_addr;
245 	} else {
246 		offset = 0;
247 	}
248 	VME_OBJECT_SET(entry, object);
249 	VME_OFFSET_SET(entry, offset);
250 	VME_ALIAS_SET(entry, tag);
251 
252 	/* Take an extra object ref in case the map entry gets deleted */
253 	vm_object_reference(object);
254 	vm_map_unlock(map);
255 
256 	kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, pnum_mask, FALSE, flags);
257 
258 	if (kr != KERN_SUCCESS) {
259 		vm_map_remove(map,
260 		    vm_map_trunc_page(map_addr,
261 		    VM_MAP_PAGE_MASK(map)),
262 		    vm_map_round_page(map_addr + map_size,
263 		    VM_MAP_PAGE_MASK(map)));
264 		vm_object_deallocate(object);
265 		*addrp = 0;
266 		return kr;
267 	}
268 
269 	if (flags & KMA_ZERO) {
270 		for (m = pages; m; m = NEXT_PAGE(m)) {
271 			vm_page_zero_fill(m);
272 		}
273 	}
274 
275 
276 	vm_object_lock(object);
277 	for (i = 0; i < map_size; i += PAGE_SIZE) {
278 		m = pages;
279 		pages = NEXT_PAGE(m);
280 		*(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
281 		m->vmp_busy = FALSE;
282 		vm_page_insert(m, object, offset + i);
283 	}
284 	vm_object_unlock(object);
285 
286 	kr = vm_map_wire_kernel(map,
287 	    vm_map_trunc_page(map_addr,
288 	    VM_MAP_PAGE_MASK(map)),
289 	    vm_map_round_page(map_addr + map_size,
290 	    VM_MAP_PAGE_MASK(map)),
291 	    VM_PROT_DEFAULT, tag,
292 	    FALSE);
293 
294 	if (kr != KERN_SUCCESS) {
295 		if (object == kernel_object) {
296 			vm_object_lock(object);
297 			vm_object_page_remove(object, offset, offset + map_size);
298 			vm_object_unlock(object);
299 		}
300 		vm_map_remove(map,
301 		    vm_map_trunc_page(map_addr,
302 		    VM_MAP_PAGE_MASK(map)),
303 		    vm_map_round_page(map_addr + map_size,
304 		    VM_MAP_PAGE_MASK(map)));
305 		vm_object_deallocate(object);
306 		return kr;
307 	}
308 	vm_object_deallocate(object);
309 
310 	if (object == kernel_object) {
311 		vm_map_simplify(map, map_addr);
312 		vm_tag_update_size(tag, map_size);
313 	}
314 	*addrp = (vm_offset_t) map_addr;
315 	assert((vm_map_offset_t) *addrp == map_addr);
316 
317 	return KERN_SUCCESS;
318 }
319 
320 /*
321  * Master entry point for allocating kernel memory.
322  * NOTE: this routine is _never_ interrupt safe.
323  *
324  * map		: map to allocate into
325  * addrp	: pointer to start address of new memory
326  * size		: size of memory requested
327  * flags	: see kma_flags_t.
328  */
329 
330 __abortlike
331 static void
__kma_failed_panic(vm_map_t map,kern_return_t kr,vm_size_t size,vm_offset_t mask,kma_flags_t flags,vm_tag_t tag)332 __kma_failed_panic(
333 	vm_map_t        map,
334 	kern_return_t   kr,
335 	vm_size_t       size,
336 	vm_offset_t     mask,
337 	kma_flags_t     flags,
338 	vm_tag_t        tag)
339 {
340 	panic("kernel_memory_allocate(%p, _, %zd, 0x%zx, 0x%x, %d) "
341 	    "failed unexpectedly with %d",
342 	    map, (size_t)size, (size_t)mask, flags, tag, kr);
343 }
344 
345 kern_return_t
kernel_memory_allocate(vm_map_t map,vm_offset_t * addrp,vm_size_t size,vm_offset_t mask,kma_flags_t flags,vm_tag_t tag)346 kernel_memory_allocate(
347 	vm_map_t        map,
348 	vm_offset_t     *addrp,
349 	vm_size_t       size,
350 	vm_offset_t     mask,
351 	kma_flags_t     flags,
352 	vm_tag_t        tag)
353 {
354 	vm_object_t             object;
355 	vm_object_offset_t      offset;
356 	vm_map_entry_t          entry = NULL;
357 	vm_map_offset_t         map_addr, fill_start;
358 	vm_map_size_t           map_size, fill_size;
359 	kern_return_t           kr;
360 	vm_page_t               guard_left = VM_PAGE_NULL;
361 	vm_page_t               guard_right = VM_PAGE_NULL;
362 	vm_page_t               wired_page_list = VM_PAGE_NULL;
363 	vm_map_kernel_flags_t   vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
364 	bool                    need_guards;
365 
366 	assert(kernel_map && map->pmap == kernel_pmap);
367 
368 #if DEBUG || DEVELOPMENT
369 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START,
370 	    size, 0, 0, 0);
371 #endif
372 
373 	/* Check for zero allocation size (either directly or via overflow) */
374 	map_size = vm_map_round_page(size, VM_MAP_PAGE_MASK(map));
375 	if (__improbable(map_size == 0)) {
376 		kr = KERN_INVALID_ARGUMENT;
377 		goto out;
378 	}
379 
380 	/*
381 	 * limit the size of a single extent of wired memory
382 	 * to try and limit the damage to the system if
383 	 * too many pages get wired down
384 	 * limit raised to 2GB with 128GB max physical limit,
385 	 * but scaled by installed memory above this
386 	 */
387 	if (__improbable(!(flags & (KMA_VAONLY | KMA_PAGEABLE)) &&
388 	    map_size > MAX(1ULL << 31, sane_size / 64))) {
389 		kr = KERN_RESOURCE_SHORTAGE;
390 		goto out;
391 	}
392 
393 	/*
394 	 * Guard pages:
395 	 *
396 	 * Guard pages are implemented as fictitious pages.
397 	 *
398 	 * However, some maps, and some objects are known
399 	 * to manage their memory explicitly, and do not need
400 	 * those to be materialized, which saves memory.
401 	 *
402 	 * By placing guard pages on either end of a stack,
403 	 * they can help detect cases where a thread walks
404 	 * off either end of its stack.
405 	 *
406 	 * They are allocated and set up here and attempts
407 	 * to access those pages are trapped in vm_fault_page().
408 	 *
409 	 * The map_size we were passed may include extra space for
410 	 * guard pages. fill_size represents the actual size to populate.
411 	 * Similarly, fill_start indicates where the actual pages
412 	 * will begin in the range.
413 	 */
414 
415 	fill_start = 0;
416 	fill_size = map_size;
417 
418 	need_guards = flags & (KMA_KOBJECT | KMA_COMPRESSOR) ||
419 	    !map->never_faults;
420 
421 	if (flags & KMA_GUARD_FIRST) {
422 		vmk_flags.vmkf_guard_before = true;
423 		fill_start += PAGE_SIZE;
424 		if (__improbable(os_sub_overflow(fill_size, PAGE_SIZE, &fill_size))) {
425 			/* no space for a guard page */
426 			kr = KERN_INVALID_ARGUMENT;
427 			goto out;
428 		}
429 		if (need_guards) {
430 			guard_left = vm_page_grab_guard((flags & KMA_NOPAGEWAIT) == 0);
431 			if (__improbable(guard_left == VM_PAGE_NULL)) {
432 				kr = KERN_RESOURCE_SHORTAGE;
433 				goto out;
434 			}
435 		}
436 	}
437 	if (flags & KMA_GUARD_LAST) {
438 		if (__improbable(os_sub_overflow(fill_size, PAGE_SIZE, &fill_size))) {
439 			/* no space for a guard page */
440 			kr = KERN_INVALID_ARGUMENT;
441 			goto out;
442 		}
443 		if (need_guards) {
444 			guard_right = vm_page_grab_guard((flags & KMA_NOPAGEWAIT) == 0);
445 			if (__improbable(guard_right == VM_PAGE_NULL)) {
446 				kr = KERN_RESOURCE_SHORTAGE;
447 				goto out;
448 			}
449 		}
450 	}
451 
452 	if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
453 		kr = vm_page_alloc_list(atop(fill_size), flags,
454 		    &wired_page_list);
455 		if (__improbable(kr != KERN_SUCCESS)) {
456 			goto out;
457 		}
458 	}
459 
460 	/*
461 	 *	Allocate a new object (if necessary).  We must do this before
462 	 *	locking the map, or risk deadlock with the default pager.
463 	 */
464 	if (flags & KMA_KOBJECT) {
465 		object = kernel_object;
466 		vm_object_reference(object);
467 	} else if (flags & KMA_COMPRESSOR) {
468 		object = compressor_object;
469 		vm_object_reference(object);
470 	} else {
471 		object = vm_object_allocate(map_size);
472 	}
473 
474 	if (flags & KMA_ATOMIC) {
475 		vmk_flags.vmkf_atomic_entry = TRUE;
476 	}
477 	if (flags & KMA_LAST_FREE) {
478 		vmk_flags.vmkf_last_free = true;
479 	}
480 	if (flags & KMA_PERMANENT) {
481 		vmk_flags.vmkf_permanent = true;
482 	}
483 	if (flags & KMA_DATA) {
484 		vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
485 		if (flags & KMA_PERMANENT) {
486 			vmk_flags.vmkf_last_free = true;
487 		}
488 	}
489 
490 	kr = vm_map_find_space(map, 0, map_size, mask, vmk_flags, &entry);
491 	if (__improbable(KERN_SUCCESS != kr)) {
492 		vm_object_deallocate(object);
493 		goto out;
494 	}
495 
496 	map_addr = entry->vme_start;
497 	if (flags & (KMA_COMPRESSOR | KMA_KOBJECT)) {
498 		offset = map_addr;
499 	} else {
500 		offset = 0;
501 		vm_object_reference(object);
502 	}
503 	VME_OBJECT_SET(entry, object);
504 	VME_OFFSET_SET(entry, offset);
505 	VME_ALIAS_SET(entry, tag);
506 
507 	if (!(flags & (KMA_COMPRESSOR | KMA_PAGEABLE))) {
508 		entry->wired_count = 1;
509 	}
510 
511 	if (guard_left || guard_right || wired_page_list) {
512 		vm_object_lock(object);
513 		vm_map_unlock(map);
514 
515 		if (guard_left) {
516 			vm_page_insert(guard_left, object, offset);
517 			guard_left->vmp_busy = FALSE;
518 			guard_left = VM_PAGE_NULL;
519 		}
520 
521 		if (guard_right) {
522 			vm_page_insert(guard_right, object,
523 			    offset + fill_start + fill_size);
524 			guard_right->vmp_busy = FALSE;
525 			guard_right = VM_PAGE_NULL;
526 		}
527 
528 		if (wired_page_list) {
529 			kernel_memory_populate_object_and_unlock(object,
530 			    map_addr + fill_start, offset + fill_start, fill_size,
531 			    wired_page_list, flags, tag, VM_PROT_DEFAULT);
532 		} else {
533 			vm_object_unlock(object);
534 		}
535 	} else {
536 		vm_map_unlock(map);
537 	}
538 
539 #if KASAN
540 	if (flags & KMA_PAGEABLE) {
541 		/*
542 		 * We need to allow the range for pageable memory,
543 		 * or faulting will not be allowed.
544 		 */
545 		kasan_notify_address(map_addr, size);
546 	}
547 #endif
548 	/*
549 	 * now that the pages are wired, we no longer have to fear coalesce
550 	 */
551 	if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
552 		vm_map_simplify(map, map_addr);
553 	} else {
554 		vm_object_deallocate(object);
555 	}
556 
557 #if DEBUG || DEVELOPMENT
558 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
559 	    atop(fill_size), 0, 0, 0);
560 #endif
561 
562 	*addrp = CAST_DOWN(vm_offset_t, map_addr);
563 	return KERN_SUCCESS;
564 
565 out:
566 	if (kr != KERN_SUCCESS && (flags & KMA_NOFAIL)) {
567 		__kma_failed_panic(map, kr, size, mask, flags, tag);
568 	}
569 	if (guard_left) {
570 		guard_left->vmp_snext = wired_page_list;
571 		wired_page_list = guard_left;
572 	}
573 	if (guard_right) {
574 		guard_right->vmp_snext = wired_page_list;
575 		wired_page_list = guard_right;
576 	}
577 	if (wired_page_list) {
578 		vm_page_free_list(wired_page_list, FALSE);
579 	}
580 	*addrp = 0;
581 
582 #if DEBUG || DEVELOPMENT
583 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
584 	    0, 0, 0, 0);
585 #endif
586 	return kr;
587 }
588 
589 void
kernel_memory_populate_object_and_unlock(vm_object_t object,vm_address_t addr,vm_offset_t offset,vm_size_t size,vm_page_t page_list,kma_flags_t flags,vm_tag_t tag,vm_prot_t prot)590 kernel_memory_populate_object_and_unlock(
591 	vm_object_t     object, /* must be locked */
592 	vm_address_t    addr,
593 	vm_offset_t     offset,
594 	vm_size_t       size,
595 	vm_page_t       page_list,
596 	kma_flags_t     flags,
597 	vm_tag_t        tag,
598 	vm_prot_t       prot)
599 {
600 	kern_return_t   pe_result;
601 	vm_page_t       mem;
602 	int             pe_options;
603 	int             pe_flags;
604 
605 	assert3u((bool)(flags & KMA_KOBJECT), ==, object == kernel_object);
606 	assert3u((bool)(flags & KMA_COMPRESSOR), ==, object == compressor_object);
607 	if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
608 		assert3u(offset, ==, addr);
609 	}
610 
611 	if (flags & KMA_KSTACK) {
612 		pe_flags = VM_MEM_STACK;
613 	} else {
614 		pe_flags = 0;
615 	}
616 
617 	for (vm_object_offset_t pg_offset = 0;
618 	    pg_offset < size;
619 	    pg_offset += PAGE_SIZE_64) {
620 		if (page_list == NULL) {
621 			panic("%s: page_list too short", __func__);
622 		}
623 
624 		mem = page_list;
625 		page_list = mem->vmp_snext;
626 		mem->vmp_snext = NULL;
627 
628 		assert(mem->vmp_wire_count == 0);
629 		assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
630 
631 		if (flags & KMA_COMPRESSOR) {
632 			mem->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
633 
634 			vm_page_insert(mem, object, offset + pg_offset);
635 		} else {
636 			mem->vmp_q_state = VM_PAGE_IS_WIRED;
637 			mem->vmp_wire_count = 1;
638 
639 			vm_page_insert_wired(mem, object, offset + pg_offset, tag);
640 		}
641 
642 		mem->vmp_busy = false;
643 		mem->vmp_pmapped = true;
644 		mem->vmp_wpmapped = true;
645 
646 		/*
647 		 * Manual PMAP_ENTER_OPTIONS() with shortcuts
648 		 * for the kernel and compressor objects.
649 		 */
650 
651 		PMAP_ENTER_CHECK(kernel_pmap, mem);
652 
653 		pe_options = PMAP_OPTIONS_NOWAIT;
654 		if (flags & (KMA_COMPRESSOR | KMA_KOBJECT)) {
655 			pe_options |= PMAP_OPTIONS_INTERNAL;
656 		} else {
657 			if (object->internal) {
658 				pe_options |= PMAP_OPTIONS_INTERNAL;
659 			}
660 			if (mem->vmp_reusable || object->all_reusable) {
661 				pe_options |= PMAP_OPTIONS_REUSABLE;
662 			}
663 		}
664 
665 		pe_result = pmap_enter_options(kernel_pmap,
666 		    addr + pg_offset, VM_PAGE_GET_PHYS_PAGE(mem),
667 		    prot, VM_PROT_NONE, pe_flags,
668 		    /* wired */ TRUE, pe_options, NULL);
669 
670 		if (pe_result == KERN_RESOURCE_SHORTAGE) {
671 			vm_object_unlock(object);
672 
673 			pe_options &= ~PMAP_OPTIONS_NOWAIT;
674 
675 			pe_result = pmap_enter_options(kernel_pmap,
676 			    addr + pg_offset, VM_PAGE_GET_PHYS_PAGE(mem),
677 			    prot, VM_PROT_NONE, pe_flags,
678 			    /* wired */ TRUE, pe_options, NULL);
679 
680 			vm_object_lock(object);
681 		}
682 
683 		assert(pe_result == KERN_SUCCESS);
684 
685 		if (flags & KMA_NOENCRYPT) {
686 			pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
687 		}
688 	}
689 
690 	if (page_list) {
691 		panic("%s: page_list too long", __func__);
692 	}
693 
694 	vm_object_unlock(object);
695 
696 	if (!(flags & KMA_COMPRESSOR)) {
697 		vm_page_lockspin_queues();
698 		vm_page_wire_count += atop(size);
699 		vm_page_unlock_queues();
700 	}
701 
702 	if (flags & KMA_KOBJECT) {
703 		/* vm_page_insert_wired() handles regular objects already */
704 		vm_tag_update_size(tag, size);
705 	}
706 
707 #if KASAN
708 	if (flags & KMA_COMPRESSOR) {
709 		kasan_notify_address_nopoison(addr, size);
710 	} else {
711 		kasan_notify_address(addr, size);
712 	}
713 #endif
714 }
715 
716 __abortlike
717 static void
__kernel_or_compressor_object_panic(kma_flags_t flags)718 __kernel_or_compressor_object_panic(kma_flags_t flags)
719 {
720 	if (flags == 0) {
721 		panic("KMA_KOBJECT or KMA_COMPRESSOR is required");
722 	}
723 	panic("more than one of KMA_KOBJECT or KMA_COMPRESSOR specified");
724 }
725 
726 static inline vm_object_t
kernel_or_compressor_object(kma_flags_t flags)727 kernel_or_compressor_object(kma_flags_t flags)
728 {
729 	flags &= (KMA_KOBJECT | KMA_COMPRESSOR);
730 	if (flags == 0 || (flags & (flags - 1))) {
731 		__kernel_or_compressor_object_panic(flags);
732 	}
733 
734 	return (flags & KMA_KOBJECT) ? kernel_object : compressor_object;
735 }
736 
737 kern_return_t
kernel_memory_populate(vm_offset_t addr,vm_size_t size,kma_flags_t flags,vm_tag_t tag)738 kernel_memory_populate(
739 	vm_offset_t     addr,
740 	vm_size_t       size,
741 	kma_flags_t     flags,
742 	vm_tag_t        tag)
743 {
744 	kern_return_t   kr = KERN_SUCCESS;
745 	vm_page_t       page_list = NULL;
746 	vm_size_t       page_count = atop_64(size);
747 	vm_object_t     object = kernel_or_compressor_object(flags);
748 
749 #if DEBUG || DEVELOPMENT
750 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START,
751 	    size, 0, 0, 0);
752 #endif
753 
754 	kr = vm_page_alloc_list(page_count, flags, &page_list);
755 	if (kr == KERN_SUCCESS) {
756 		vm_object_lock(object);
757 		kernel_memory_populate_object_and_unlock(object, addr,
758 		    addr, size, page_list, flags, tag, VM_PROT_DEFAULT);
759 	}
760 
761 #if DEBUG || DEVELOPMENT
762 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
763 	    page_count, 0, 0, 0);
764 #endif
765 	return kr;
766 }
767 
768 void
kernel_memory_depopulate(vm_offset_t addr,vm_size_t size,kma_flags_t flags,vm_tag_t tag)769 kernel_memory_depopulate(
770 	vm_offset_t        addr,
771 	vm_size_t          size,
772 	kma_flags_t        flags,
773 	vm_tag_t           tag)
774 {
775 	vm_object_t        object = kernel_or_compressor_object(flags);
776 	vm_object_offset_t offset = addr;
777 	vm_page_t          mem;
778 	vm_page_t          local_freeq = NULL;
779 	unsigned int       pages_unwired = 0;
780 
781 	vm_object_lock(object);
782 
783 	pmap_protect(kernel_pmap, offset, offset + size, VM_PROT_NONE);
784 
785 	for (vm_object_offset_t pg_offset = 0;
786 	    pg_offset < size;
787 	    pg_offset += PAGE_SIZE_64) {
788 		mem = vm_page_lookup(object, offset + pg_offset);
789 
790 		assert(mem);
791 
792 		if (flags & KMA_COMPRESSOR) {
793 			assert(mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR);
794 		} else {
795 			assert(mem->vmp_q_state == VM_PAGE_IS_WIRED);
796 			pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
797 			pages_unwired++;
798 		}
799 
800 		mem->vmp_busy = TRUE;
801 
802 		assert(mem->vmp_tabled);
803 		vm_page_remove(mem, TRUE);
804 		assert(mem->vmp_busy);
805 
806 		assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
807 
808 		mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
809 		mem->vmp_snext = local_freeq;
810 		local_freeq = mem;
811 	}
812 
813 	vm_object_unlock(object);
814 
815 	vm_page_free_list(local_freeq, TRUE);
816 
817 	if (!(flags & KMA_COMPRESSOR)) {
818 		vm_page_lockspin_queues();
819 		vm_page_wire_count -= pages_unwired;
820 		vm_page_unlock_queues();
821 	}
822 
823 	if (flags & KMA_KOBJECT) {
824 		/* vm_page_remove() handles regular objects already */
825 		vm_tag_update_size(tag, -ptoa_64(pages_unwired));
826 	}
827 }
828 
829 /*
830  *	kmem_realloc:
831  *
832  *	Reallocate wired-down memory in the kernel's address map
833  *	or a submap.  Newly allocated pages are not zeroed.
834  *	This can only be used on regions allocated with kmem_alloc.
835  *
836  *	If successful, the pages in the old region are mapped twice.
837  *	The old region is unchanged.  Use kmem_free to get rid of it.
838  */
839 kern_return_t
kmem_realloc(vm_map_t map,vm_offset_t oldaddr,vm_size_t oldsize,vm_offset_t * newaddrp,vm_size_t newsize,vm_tag_t tag)840 kmem_realloc(
841 	vm_map_t                map,
842 	vm_offset_t             oldaddr,
843 	vm_size_t               oldsize,
844 	vm_offset_t             *newaddrp,
845 	vm_size_t               newsize,
846 	vm_tag_t                tag)
847 {
848 	vm_object_t             object;
849 	vm_object_offset_t      offset;
850 	vm_map_offset_t         oldmapmin;
851 	vm_map_offset_t         oldmapmax;
852 	vm_map_offset_t         newmapaddr;
853 	vm_map_size_t           oldmapsize;
854 	vm_map_size_t           newmapsize;
855 	vm_map_entry_t          oldentry;
856 	vm_map_entry_t          newentry;
857 	vm_page_t               mem;
858 	kern_return_t           kr;
859 	vm_map_kernel_flags_t   vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
860 
861 	oldmapmin = vm_map_trunc_page(oldaddr,
862 	    VM_MAP_PAGE_MASK(map));
863 	oldmapmax = vm_map_round_page(oldaddr + oldsize,
864 	    VM_MAP_PAGE_MASK(map));
865 	oldmapsize = oldmapmax - oldmapmin;
866 	newmapsize = vm_map_round_page(newsize,
867 	    VM_MAP_PAGE_MASK(map));
868 	if (newmapsize < newsize) {
869 		/* overflow */
870 		*newaddrp = 0;
871 		return KERN_INVALID_ARGUMENT;
872 	}
873 
874 	/*
875 	 *	Find the VM object backing the old region.
876 	 */
877 
878 	vm_map_lock(map);
879 
880 	if (!vm_map_lookup_entry(map, oldmapmin, &oldentry)) {
881 		panic("kmem_realloc");
882 	}
883 	if (oldentry->vme_atomic) {
884 		vmk_flags.vmkf_atomic_entry = true;
885 	}
886 	vmk_flags.vmkf_range_id = kmem_addr_get_range(oldmapmin, oldmapsize);
887 
888 	object = VME_OBJECT(oldentry);
889 
890 	/*
891 	 *	Increase the size of the object and
892 	 *	fill in the new region.
893 	 */
894 
895 	vm_object_reference(object);
896 	/* by grabbing the object lock before unlocking the map */
897 	/* we guarantee that we will panic if more than one     */
898 	/* attempt is made to realloc a kmem_alloc'd area       */
899 	vm_object_lock(object);
900 	vm_map_unlock(map);
901 	if (object->vo_size != oldmapsize) {
902 		panic("kmem_realloc");
903 	}
904 	object->vo_size = newmapsize;
905 	vm_object_unlock(object);
906 
907 	/* allocate the new pages while expanded portion of the */
908 	/* object is still not mapped */
909 	kmem_alloc_pages(object, vm_object_round_page(oldmapsize),
910 	    vm_object_round_page(newmapsize - oldmapsize));
911 
912 	/*
913 	 *	Find space for the new region.
914 	 */
915 
916 	kr = vm_map_find_space(map, 0, newmapsize, 0, vmk_flags, &newentry);
917 	if (kr != KERN_SUCCESS) {
918 		vm_object_lock(object);
919 		for (offset = oldmapsize;
920 		    offset < newmapsize; offset += PAGE_SIZE) {
921 			if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
922 				VM_PAGE_FREE(mem);
923 			}
924 		}
925 		object->vo_size = oldmapsize;
926 		vm_object_unlock(object);
927 		vm_object_deallocate(object);
928 		return kr;
929 	}
930 
931 	newmapaddr = newentry->vme_start;
932 	VME_OBJECT_SET(newentry, object);
933 	VME_ALIAS_SET(newentry, tag);
934 	assert(newentry->wired_count == 0);
935 
936 
937 	/* add an extra reference in case we have someone doing an */
938 	/* unexpected deallocate */
939 	vm_object_reference(object);
940 	vm_map_unlock(map);
941 
942 	kr = vm_map_wire_kernel(map, newmapaddr, newmapaddr + newmapsize,
943 	    VM_PROT_DEFAULT, tag, FALSE);
944 	if (KERN_SUCCESS != kr) {
945 		kmem_free(map, newmapaddr, newmapsize);
946 		vm_object_lock(object);
947 		for (offset = oldsize; offset < newmapsize; offset += PAGE_SIZE) {
948 			if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
949 				VM_PAGE_FREE(mem);
950 			}
951 		}
952 		object->vo_size = oldmapsize;
953 		vm_object_unlock(object);
954 		vm_object_deallocate(object);
955 		return kr;
956 	}
957 	vm_object_deallocate(object);
958 
959 	if (kernel_object == object) {
960 		vm_tag_update_size(tag, newmapsize);
961 	}
962 
963 	*newaddrp = CAST_DOWN(vm_offset_t, newmapaddr);
964 	return KERN_SUCCESS;
965 }
966 
967 void
kmem_realloc_down(vm_map_t map,vm_offset_t addr,vm_size_t oldsize,vm_size_t newsize)968 kmem_realloc_down(
969 	vm_map_t                map,
970 	vm_offset_t             addr,
971 	vm_size_t               oldsize,
972 	vm_size_t               newsize)
973 {
974 	vm_object_t             object;
975 	vm_map_entry_t          entry;
976 	bool                    was_atomic;
977 
978 	oldsize = round_page(oldsize);
979 	newsize = round_page(newsize);
980 
981 	if (oldsize <= newsize) {
982 		panic("kmem_realloc_down() called with invalid sizes %zd <= %zd",
983 		    (size_t)oldsize, (size_t)newsize);
984 	}
985 
986 	/*
987 	 *	Find the VM object backing the old region.
988 	 */
989 
990 	vm_map_lock(map);
991 
992 	if (!vm_map_lookup_entry(map, addr, &entry)) {
993 		panic("kmem_realloc");
994 	}
995 	object = VME_OBJECT(entry);
996 	vm_object_reference(object);
997 
998 	/*
999 	 * This function has limited support for what it can do
1000 	 * and assumes the object is fully mapped in the range.
1001 	 *
1002 	 * Its only caller is OSData::clipForCopyout()
1003 	 * and only supports this use-case.
1004 	 */
1005 	assert(entry->vme_start == addr &&
1006 	    entry->vme_end == addr + oldsize &&
1007 	    entry->vme_offset == 0);
1008 
1009 	was_atomic = entry->vme_atomic;
1010 	entry->vme_atomic = false;
1011 	vm_map_clip_end(map, entry, entry->vme_start + newsize);
1012 	entry->vme_atomic = was_atomic;
1013 
1014 	(void)vm_map_remove_and_unlock(map, addr + newsize, addr + oldsize,
1015 	    VM_MAP_REMOVE_KUNWIRE);
1016 
1017 	vm_object_lock(object);
1018 	/* see kmem_realloc(): guarantees concurrent reallocs will panic */
1019 	if (object->vo_size != oldsize) {
1020 		panic("kmem_realloc");
1021 	}
1022 	vm_object_page_remove(object, newsize, oldsize);
1023 	object->vo_size = newsize;
1024 	vm_object_unlock(object);
1025 	vm_object_deallocate(object);
1026 }
1027 
1028 /*
1029  *	kmem_alloc:
1030  *
1031  *	Allocate wired-down memory in the kernel's address map
1032  *	or a submap.  The memory is not zero-filled.
1033  */
1034 
1035 __exported kern_return_t
1036 kmem_alloc_external(
1037 	vm_map_t        map,
1038 	vm_offset_t     *addrp,
1039 	vm_size_t       size);
1040 kern_return_t
kmem_alloc_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1041 kmem_alloc_external(
1042 	vm_map_t        map,
1043 	vm_offset_t     *addrp,
1044 	vm_size_t       size)
1045 {
1046 	return kmem_alloc(map, addrp, size, KMA_NONE, vm_tag_bt());
1047 }
1048 
1049 
1050 /*
1051  *	kmem_alloc_kobject:
1052  *
1053  *	Allocate wired-down memory in the kernel's address map
1054  *	or a submap.  The memory is not zero-filled.
1055  *
1056  *	The memory is allocated in the kernel_object.
1057  *	It may not be copied with vm_map_copy, and
1058  *	it may not be reallocated with kmem_realloc.
1059  */
1060 
1061 __exported kern_return_t
1062 kmem_alloc_kobject_external(
1063 	vm_map_t        map,
1064 	vm_offset_t     *addrp,
1065 	vm_size_t       size);
1066 kern_return_t
kmem_alloc_kobject_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1067 kmem_alloc_kobject_external(
1068 	vm_map_t        map,
1069 	vm_offset_t     *addrp,
1070 	vm_size_t       size)
1071 {
1072 	return kmem_alloc(map, addrp, size, KMA_KOBJECT, vm_tag_bt());
1073 }
1074 
1075 /*
1076  *	kmem_alloc_pageable:
1077  *
1078  *	Allocate pageable memory in the kernel's address map.
1079  */
1080 
1081 __exported kern_return_t
1082 kmem_alloc_pageable_external(
1083 	vm_map_t        map,
1084 	vm_offset_t     *addrp,
1085 	vm_size_t       size);
1086 kern_return_t
kmem_alloc_pageable_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1087 kmem_alloc_pageable_external(
1088 	vm_map_t        map,
1089 	vm_offset_t     *addrp,
1090 	vm_size_t       size)
1091 {
1092 	return kmem_alloc(map, addrp, size, KMA_PAGEABLE, vm_tag_bt());
1093 }
1094 
1095 /*
1096  *	kmem_free:
1097  *
1098  *	Release a region of kernel virtual memory allocated
1099  *	with kmem_alloc, kmem_alloc_kobject, or kmem_alloc_pageable,
1100  *	and return the physical pages associated with that region.
1101  */
1102 
1103 void
kmem_free(vm_map_t map,vm_offset_t addr,vm_size_t size)1104 kmem_free(
1105 	vm_map_t        map,
1106 	vm_offset_t     addr,
1107 	vm_size_t       size)
1108 {
1109 	assert(addr >= VM_MIN_KERNEL_AND_KEXT_ADDRESS);
1110 	assert(map->pmap == kernel_pmap);
1111 
1112 	if (size == 0) {
1113 #if MACH_ASSERT
1114 		printf("kmem_free called with size==0 for map: %p with addr: 0x%llx\n", map, (uint64_t)addr);
1115 #endif
1116 		return;
1117 	}
1118 
1119 	(void)vm_map_remove_flags(map,
1120 	    vm_map_trunc_page(addr, VM_MAP_PAGE_MASK(map)),
1121 	    vm_map_round_page(addr + size, VM_MAP_PAGE_MASK(map)),
1122 	    VM_MAP_REMOVE_KUNWIRE);
1123 }
1124 
1125 /*
1126  *	Allocate new pages in an object.
1127  */
1128 
1129 kern_return_t
kmem_alloc_pages(vm_object_t object,vm_object_offset_t offset,vm_object_size_t size)1130 kmem_alloc_pages(
1131 	vm_object_t             object,
1132 	vm_object_offset_t      offset,
1133 	vm_object_size_t        size)
1134 {
1135 	vm_object_size_t                alloc_size;
1136 
1137 	alloc_size = vm_object_round_page(size);
1138 	vm_object_lock(object);
1139 	while (alloc_size) {
1140 		vm_page_t   mem;
1141 
1142 
1143 		/*
1144 		 *	Allocate a page
1145 		 */
1146 		while (VM_PAGE_NULL ==
1147 		    (mem = vm_page_alloc(object, offset))) {
1148 			vm_object_unlock(object);
1149 			VM_PAGE_WAIT();
1150 			vm_object_lock(object);
1151 		}
1152 		mem->vmp_busy = FALSE;
1153 
1154 		alloc_size -= PAGE_SIZE;
1155 		offset += PAGE_SIZE;
1156 	}
1157 	vm_object_unlock(object);
1158 	return KERN_SUCCESS;
1159 }
1160 
1161 kmem_return_t
kmem_suballoc(vm_map_t parent,vm_offset_t * addr,vm_size_t size,vm_map_create_options_t vmc_options,int vm_flags,kms_flags_t flags,vm_tag_t tag)1162 kmem_suballoc(
1163 	vm_map_t                parent,
1164 	vm_offset_t             *addr,
1165 	vm_size_t               size,
1166 	vm_map_create_options_t vmc_options,
1167 	int                     vm_flags,
1168 	kms_flags_t             flags,
1169 	vm_tag_t                tag)
1170 {
1171 	vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1172 	vm_map_offset_t map_addr = 0;
1173 	kmem_return_t kmr = { };
1174 	vm_map_t map;
1175 
1176 	assert(page_aligned(size));
1177 	assert(parent->pmap == kernel_pmap);
1178 
1179 	if ((vm_flags & VM_FLAGS_ANYWHERE) == 0) {
1180 		map_addr = trunc_page(*addr);
1181 	}
1182 
1183 	pmap_reference(vm_map_pmap(parent));
1184 	map = vm_map_create_options(vm_map_pmap(parent), 0, size, vmc_options);
1185 
1186 	/*
1187 	 * 1. vm_map_enter() will consume one ref on success.
1188 	 *
1189 	 * 2. make the entry atomic as kernel submaps should never be split.
1190 	 *
1191 	 * 3. instruct vm_map_enter() that it is a fresh submap
1192 	 *    that needs to be taught its bounds as it inserted.
1193 	 */
1194 	vm_map_reference(map);
1195 	vmk_flags.vmkf_atomic_entry = true;
1196 	vmk_flags.vmkf_submap = true;
1197 	vmk_flags.vmkf_submap_adjust = true;
1198 	if (flags & KMS_LAST_FREE) {
1199 		vmk_flags.vmkf_last_free = true;
1200 	}
1201 	if (flags & KMS_PERMANENT) {
1202 		vmk_flags.vmkf_permanent = true;
1203 	}
1204 	if (flags & KMS_DATA) {
1205 		vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
1206 	}
1207 
1208 	kmr.kmr_return = vm_map_enter(parent, &map_addr, size, 0,
1209 	    vm_flags, vmk_flags, tag, (vm_object_t)map, 0, FALSE,
1210 	    VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1211 
1212 	if (kmr.kmr_return != KERN_SUCCESS) {
1213 		if (flags & KMS_NOFAIL) {
1214 			panic("kmem_suballoc(map=%p, size=%zd) failed with %d",
1215 			    parent, (size_t)size, kmr.kmr_return);
1216 		}
1217 		assert(os_ref_get_count_raw(&map->map_refcnt) == 2);
1218 		vm_map_deallocate(map);
1219 		vm_map_deallocate(map); /* also removes ref to pmap */
1220 		return kmr;
1221 	}
1222 
1223 	/*
1224 	 * For kmem_suballocs that register a claim and are assigned a range, ensure
1225 	 * that the exact same range is returned.
1226 	 */
1227 	if (*addr != 0 && parent == kernel_map &&
1228 	    startup_phase > STARTUP_SUB_KMEM) {
1229 		assert(CAST_DOWN(vm_offset_t, map_addr) == *addr);
1230 	} else {
1231 		*addr = CAST_DOWN(vm_offset_t, map_addr);
1232 	}
1233 
1234 	kmr.kmr_submap = map;
1235 	return kmr;
1236 }
1237 
1238 /*
1239  * The default percentage of memory that can be mlocked is scaled based on the total
1240  * amount of memory in the system. These percentages are caclulated
1241  * offline and stored in this table. We index this table by
1242  * log2(max_mem) - VM_USER_WIREABLE_MIN_CONFIG. We clamp this index in the range
1243  * [0, sizeof(wire_limit_percents) / sizeof(vm_map_size_t))
1244  *
1245  * Note that these values were picked for mac.
1246  * If we ever have very large memory config arm devices, we may want to revisit
1247  * since the kernel overhead is smaller there due to the larger page size.
1248  */
1249 
1250 /* Start scaling iff we're managing > 2^32 = 4GB of RAM. */
1251 #define VM_USER_WIREABLE_MIN_CONFIG 32
1252 #if CONFIG_JETSAM
1253 /* Systems with jetsam can wire a bit more b/c the system can relieve wired
1254  * pressure.
1255  */
1256 static vm_map_size_t wire_limit_percents[] =
1257 { 80, 80, 80, 80, 82, 85, 88, 91, 94, 97};
1258 #else
1259 static vm_map_size_t wire_limit_percents[] =
1260 { 70, 73, 76, 79, 82, 85, 88, 91, 94, 97};
1261 #endif /* CONFIG_JETSAM */
1262 
1263 /*
1264  * Sets the default global user wire limit which limits the amount of
1265  * memory that can be locked via mlock() based on the above algorithm..
1266  * This can be overridden via a sysctl.
1267  */
1268 static void
kmem_set_user_wire_limits(void)1269 kmem_set_user_wire_limits(void)
1270 {
1271 	uint64_t available_mem_log;
1272 	uint64_t max_wire_percent;
1273 	size_t wire_limit_percents_length = sizeof(wire_limit_percents) /
1274 	    sizeof(vm_map_size_t);
1275 	vm_map_size_t limit;
1276 	uint64_t config_memsize = max_mem;
1277 #if defined(XNU_TARGET_OS_OSX)
1278 	config_memsize = max_mem_actual;
1279 #endif /* defined(XNU_TARGET_OS_OSX) */
1280 
1281 	available_mem_log = bit_floor(config_memsize);
1282 
1283 	if (available_mem_log < VM_USER_WIREABLE_MIN_CONFIG) {
1284 		available_mem_log = 0;
1285 	} else {
1286 		available_mem_log -= VM_USER_WIREABLE_MIN_CONFIG;
1287 	}
1288 	if (available_mem_log >= wire_limit_percents_length) {
1289 		available_mem_log = wire_limit_percents_length - 1;
1290 	}
1291 	max_wire_percent = wire_limit_percents[available_mem_log];
1292 
1293 	limit = config_memsize * max_wire_percent / 100;
1294 	/* Cap the number of non lockable bytes at VM_NOT_USER_WIREABLE_MAX */
1295 	if (config_memsize - limit > VM_NOT_USER_WIREABLE_MAX) {
1296 		limit = config_memsize - VM_NOT_USER_WIREABLE_MAX;
1297 	}
1298 
1299 	vm_global_user_wire_limit = limit;
1300 	/* the default per task limit is the same as the global limit */
1301 	vm_per_task_user_wire_limit = limit;
1302 	vm_add_wire_count_over_global_limit = 0;
1303 	vm_add_wire_count_over_user_limit = 0;
1304 }
1305 
1306 #define KMEM_MAX_CLAIMS 50
1307 __startup_data
1308 struct kmem_range_startup_spec kmem_claims[KMEM_MAX_CLAIMS] = {};
1309 __startup_data
1310 uint32_t kmem_claim_count = 0;
1311 
1312 __startup_func
1313 void
kmem_range_startup_init(struct kmem_range_startup_spec * sp)1314 kmem_range_startup_init(
1315 	struct kmem_range_startup_spec *sp)
1316 {
1317 	assert(kmem_claim_count < KMEM_MAX_CLAIMS - KMEM_RANGE_COUNT);
1318 	if (sp->kc_calculate_sz) {
1319 		sp->kc_size = (sp->kc_calculate_sz)();
1320 	}
1321 	if (sp->kc_size) {
1322 		kmem_claims[kmem_claim_count] = *sp;
1323 		kmem_claim_count++;
1324 	}
1325 }
1326 
1327 static vm_offset_t
kmem_fuzz_start(void)1328 kmem_fuzz_start(void)
1329 {
1330 	vm_offset_t kmapoff_kaddr = 0;
1331 	uint32_t kmapoff_pgcnt = (early_random() & 0x1ff) + 1; /* 9 bits */
1332 	vm_map_size_t kmapoff_size = ptoa(kmapoff_pgcnt);
1333 
1334 	kmem_alloc(kernel_map, &kmapoff_kaddr, kmapoff_size,
1335 	    KMA_NOFAIL | KMA_KOBJECT | KMA_PERMANENT | KMA_VAONLY,
1336 	    VM_KERN_MEMORY_OSFMK);
1337 	return kmapoff_kaddr + kmapoff_size;
1338 }
1339 
1340 /*
1341  * Returns a 16bit random number between 0 and
1342  * upper_limit (inclusive)
1343  */
1344 __startup_func
1345 uint16_t
kmem_get_random16(uint16_t upper_limit)1346 kmem_get_random16(uint16_t upper_limit)
1347 {
1348 	static uint64_t random_entropy;
1349 	assert(upper_limit < UINT16_MAX);
1350 	if (random_entropy == 0) {
1351 		random_entropy = early_random();
1352 	}
1353 	uint32_t result = random_entropy & UINT32_MAX;
1354 	random_entropy >>= 32;
1355 	return (uint16_t)(result % (upper_limit + 1));
1356 }
1357 
1358 /*
1359  * Generate a randomly shuffled array of indices from 0 to count - 1
1360  */
1361 __startup_func
1362 void
kmem_shuffle(uint16_t * shuffle_buf,uint16_t count)1363 kmem_shuffle(uint16_t *shuffle_buf, uint16_t count)
1364 {
1365 	for (uint16_t i = 0; i < count; i++) {
1366 		uint16_t j = kmem_get_random16(i);
1367 		if (j != i) {
1368 			shuffle_buf[i] = shuffle_buf[j];
1369 		}
1370 		shuffle_buf[j] = i;
1371 	}
1372 }
1373 
1374 #if ZSECURITY_CONFIG(KERNEL_DATA_SPLIT)
1375 __startup_func
1376 static void
kmem_shuffle_claims(void)1377 kmem_shuffle_claims(void)
1378 {
1379 	uint16_t shuffle_buf[KMEM_MAX_CLAIMS] = {};
1380 	kmem_shuffle(&shuffle_buf[0], (uint16_t)kmem_claim_count);
1381 	for (uint16_t i = 0; i < kmem_claim_count; i++) {
1382 		struct kmem_range_startup_spec tmp = kmem_claims[i];
1383 		kmem_claims[i] = kmem_claims[shuffle_buf[i]];
1384 		kmem_claims[shuffle_buf[i]] = tmp;
1385 	}
1386 }
1387 
1388 __startup_func
1389 static void
kmem_readjust_ranges(uint32_t cur_idx)1390 kmem_readjust_ranges(uint32_t cur_idx)
1391 {
1392 	assert(cur_idx != 0);
1393 	uint32_t j = cur_idx - 1, random;
1394 	struct kmem_range_startup_spec sp = kmem_claims[cur_idx];
1395 	struct kmem_range *sp_range = sp.kc_range;
1396 
1397 	/*
1398 	 * Find max index where restriction is met
1399 	 */
1400 	for (; j > 0; j--) {
1401 		struct kmem_range_startup_spec spj = kmem_claims[j];
1402 		vm_map_offset_t max_start = spj.kc_range->min_address;
1403 		if (spj.kc_flags & KC_NO_MOVE) {
1404 			panic("kmem_range_init: Can't scramble with multiple constraints");
1405 		}
1406 		if (max_start <= sp_range->min_address) {
1407 			break;
1408 		}
1409 	}
1410 
1411 	/*
1412 	 * Pick a random index from 0 to max index and shift claims to the right
1413 	 * to make room for restricted claim
1414 	 */
1415 	random = kmem_get_random16((uint16_t)j);
1416 	assert(random <= j);
1417 
1418 	sp_range->min_address = kmem_claims[random].kc_range->min_address;
1419 	sp_range->max_address = sp_range->min_address + sp.kc_size;
1420 
1421 	for (j = cur_idx - 1; j >= random && j != UINT32_MAX; j--) {
1422 		struct kmem_range_startup_spec spj = kmem_claims[j];
1423 		struct kmem_range *range = spj.kc_range;
1424 		range->min_address += sp.kc_size;
1425 		range->max_address += sp.kc_size;
1426 		kmem_claims[j + 1] = spj;
1427 	}
1428 
1429 	sp.kc_flags = KC_NO_MOVE;
1430 	kmem_claims[random] = sp;
1431 }
1432 
1433 #define KMEM_ROUND_GRANULE (32ul << 20)
1434 #define KMEM_ROUND(x) \
1435 	((x + KMEM_ROUND_GRANULE - 1) & -KMEM_ROUND_GRANULE)
1436 
1437 __startup_func
1438 static void
kmem_scramble_ranges(void)1439 kmem_scramble_ranges(void)
1440 {
1441 	vm_map_size_t largest_free_size = 0, total_size, total_free;
1442 	vm_map_size_t total_claims = 0, data_range_size = 0;
1443 	vm_map_offset_t start = 0;
1444 	struct kmem_range kmem_range_ptr = {};
1445 
1446 	/*
1447 	 * Initiatize KMEM_RANGE_ID_UNSORTED range to use the entire map so that
1448 	 * the vm can find the requested ranges.
1449 	 */
1450 	kmem_ranges[KMEM_RANGE_ID_PTR].min_address = MAX(kernel_map->min_offset,
1451 	    VM_MAP_PAGE_SIZE(kernel_map));
1452 	kmem_ranges[KMEM_RANGE_ID_PTR].max_address = kernel_map->max_offset;
1453 
1454 	/*
1455 	 * Allocating the g_kext_map prior to randomizing the remaining submaps as
1456 	 * this map is 2G in size and starts at the end of kernel_text on x86. It
1457 	 * could overflow into the heap.
1458 	 */
1459 	kext_alloc_init();
1460 
1461 	/*
1462 	 * Eat a random amount of kernel_map to fuzz subsequent heap, zone and
1463 	 * stack addresses. (With a 4K page and 9 bits of randomness, this
1464 	 * eats about 2M of VA from the map)
1465 	 *
1466 	 * Note that we always need to slide by at least one page because the VM
1467 	 * pointer packing schemes using KERNEL_PMAP_HEAP_RANGE_START as a base
1468 	 * do not admit this address to be part of any zone submap.
1469 	 */
1470 	start = kmem_fuzz_start();
1471 
1472 	vm_map_sizes(kernel_map, &total_size, &total_free, &largest_free_size);
1473 	largest_free_size = trunc_page(largest_free_size);
1474 
1475 	/*
1476 	 * Determine size of data and pointer kmem_ranges
1477 	 */
1478 	for (uint32_t i = 0; i < kmem_claim_count; i++) {
1479 		total_claims += kmem_claims[i].kc_size;
1480 	}
1481 	largest_free_size -= total_claims;
1482 	data_range_size = round_page((2 * largest_free_size) / 3);
1483 	largest_free_size -= data_range_size;
1484 
1485 	/*
1486 	 * Add claims for data and pointer
1487 	 */
1488 	struct kmem_range_startup_spec kmem_spec_data = {
1489 		.kc_name = "kmem_data_range",
1490 		.kc_range = &kmem_ranges[KMEM_RANGE_ID_DATA],
1491 		.kc_size = data_range_size,
1492 		.kc_flags = KC_NO_ENTRY,
1493 	};
1494 	/*
1495 	 * Don't use &kmem_ranges[KMEM_RANGE_ID_PTR] as changing that range affects
1496 	 * vm_map_locate_space for the initialization below.
1497 	 */
1498 	kmem_claims[kmem_claim_count++] = kmem_spec_data;
1499 	struct kmem_range_startup_spec kmem_spec_ptr = {
1500 		.kc_name = "kmem_ptr_range",
1501 		.kc_range = &kmem_range_ptr,
1502 		.kc_size = largest_free_size,
1503 		.kc_flags = KC_NO_ENTRY,
1504 	};
1505 	kmem_claims[kmem_claim_count++] = kmem_spec_ptr;
1506 
1507 	/*
1508 	 * Shuffle registered claims
1509 	 */
1510 	assert(kmem_claim_count < UINT16_MAX);
1511 	kmem_shuffle_claims();
1512 
1513 	/*
1514 	 * Apply restrictions and determine range for each claim
1515 	 */
1516 	for (uint32_t i = 0; i < kmem_claim_count; i++) {
1517 		vm_map_offset_t end = 0;
1518 		struct kmem_range_startup_spec sp = kmem_claims[i];
1519 		struct kmem_range *sp_range = sp.kc_range;
1520 		if (vm_map_locate_space(kernel_map, sp.kc_size, 0,
1521 		    VM_MAP_KERNEL_FLAGS_NONE, &start, NULL) != KERN_SUCCESS) {
1522 			panic("kmem_range_init: vm_map_locate_space failing for claim %s",
1523 			    sp.kc_name);
1524 		}
1525 
1526 		end = start + sp.kc_size;
1527 		/*
1528 		 * Re-adjust ranges if restriction not met
1529 		 */
1530 		if (sp_range->min_address && start > sp_range->min_address) {
1531 			kmem_readjust_ranges(i);
1532 		} else {
1533 			sp_range->min_address = start;
1534 			sp_range->max_address = end;
1535 		}
1536 		start = end;
1537 	}
1538 
1539 	/*
1540 	 * We have settled on the ranges, now create temporary entries for the
1541 	 * claims
1542 	 */
1543 	for (uint32_t i = 0; i < kmem_claim_count; i++) {
1544 		struct kmem_range_startup_spec sp = kmem_claims[i];
1545 		vm_map_entry_t entry = NULL;
1546 		if (sp.kc_flags & KC_NO_ENTRY) {
1547 			continue;
1548 		}
1549 		if (vm_map_find_space(kernel_map, sp.kc_range->min_address, sp.kc_size, 0,
1550 		    VM_MAP_KERNEL_FLAGS_NONE, &entry) != KERN_SUCCESS) {
1551 			panic("kmem_range_init: vm_map_find_space failing for claim %s",
1552 			    sp.kc_name);
1553 		}
1554 		vm_object_reference(kernel_object);
1555 		VME_OBJECT_SET(entry, kernel_object);
1556 		VME_OFFSET_SET(entry, entry->vme_start);
1557 		vm_map_unlock(kernel_map);
1558 	}
1559 	/*
1560 	 * Now that we are done assigning all the ranges, fixup
1561 	 * kmem_ranges[KMEM_RANGE_ID_PTR]
1562 	 */
1563 	kmem_ranges[KMEM_RANGE_ID_PTR] = kmem_range_ptr;
1564 
1565 #if DEBUG || DEVELOPMENT
1566 	for (uint32_t i = 0; i < kmem_claim_count; i++) {
1567 		struct kmem_range_startup_spec sp = kmem_claims[i];
1568 		const char *size_str = "K";
1569 		uint32_t shift = 10;
1570 		if (sp.kc_size >> 30) {
1571 			size_str = "G";
1572 			shift = 30;
1573 		} else if (sp.kc_size >> 20) {
1574 			size_str = "M";
1575 			shift = 20;
1576 		}
1577 		printf("%-24s: %p - %p (%llu%s)\n", sp.kc_name,
1578 		    (void *)sp.kc_range->min_address, (void *)sp.kc_range->max_address,
1579 		    sp.kc_size >> shift, size_str);
1580 	}
1581 #endif /* DEBUG || DEVELOPMENT */
1582 }
1583 
1584 __startup_func
1585 static void
kmem_range_init(void)1586 kmem_range_init(void)
1587 {
1588 	kmem_scramble_ranges();
1589 
1590 	/* Initialize kmem_large_ranges. Skip 1/8th from the left as we currently
1591 	 * have one front
1592 	 */
1593 	for (kmem_range_id_t i = 0; i < KMEM_RANGE_COUNT; i++) {
1594 		vm_size_t range_adjustment = kmem_range_size(&kmem_ranges[i]) >> 3;
1595 		kmem_large_ranges[i].min_address = kmem_ranges[i].min_address +
1596 		    range_adjustment;
1597 		kmem_large_ranges[i].max_address = kmem_ranges[i].max_address;
1598 	}
1599 
1600 #if DEBUG || DEVELOPMENT
1601 	for (kmem_range_id_t i = 0; i < KMEM_RANGE_COUNT; i++) {
1602 		printf("kmem_large_ranges[%d]    : %p - %p\n", i,
1603 		    (void *)kmem_large_ranges[i].min_address,
1604 		    (void *)kmem_large_ranges[i].max_address);
1605 	}
1606 #endif
1607 }
1608 #else /* ZSECURITY_CONFIG(KERNEL_DATA_SPLIT) */
1609 __startup_func
1610 static void
kmem_range_init(void)1611 kmem_range_init(void)
1612 {
1613 	for (kmem_range_id_t i = 0; i < KMEM_RANGE_COUNT; i++) {
1614 		kmem_ranges[i].min_address = kernel_map->min_offset;
1615 		kmem_ranges[i].max_address = kernel_map->max_offset;
1616 	}
1617 	kext_alloc_init();
1618 	kmem_fuzz_start();
1619 }
1620 #endif
1621 STARTUP(KMEM, STARTUP_RANK_THIRD, kmem_range_init);
1622 
1623 /*
1624  *	kmem_init:
1625  *
1626  *	Initialize the kernel's virtual memory map, taking
1627  *	into account all memory allocated up to this time.
1628  */
1629 __startup_func
1630 void
kmem_init(vm_offset_t start,vm_offset_t end)1631 kmem_init(
1632 	vm_offset_t     start,
1633 	vm_offset_t     end)
1634 {
1635 	vm_map_offset_t map_start;
1636 	vm_map_offset_t map_end;
1637 	vm_map_kernel_flags_t vmk_flags;
1638 
1639 	vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1640 	vmk_flags.vmkf_permanent = TRUE;
1641 	vmk_flags.vmkf_no_pmap_check = TRUE;
1642 
1643 	map_start = vm_map_trunc_page(start,
1644 	    VM_MAP_PAGE_MASK(kernel_map));
1645 	map_end = vm_map_round_page(end,
1646 	    VM_MAP_PAGE_MASK(kernel_map));
1647 
1648 	vm_map_will_allocate_early_map(&kernel_map);
1649 #if     defined(__arm__) || defined(__arm64__)
1650 	kernel_map = vm_map_create_options(pmap_kernel(),
1651 	    VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1652 	    VM_MAX_KERNEL_ADDRESS,
1653 	    VM_MAP_CREATE_DEFAULT);
1654 	/*
1655 	 *	Reserve virtual memory allocated up to this time.
1656 	 */
1657 	{
1658 		unsigned int    region_select = 0;
1659 		vm_map_offset_t region_start;
1660 		vm_map_size_t   region_size;
1661 		vm_map_offset_t map_addr;
1662 		kern_return_t kr;
1663 
1664 		while (pmap_virtual_region(region_select, &region_start, &region_size)) {
1665 			map_addr = region_start;
1666 			kr = vm_map_enter(kernel_map, &map_addr,
1667 			    vm_map_round_page(region_size,
1668 			    VM_MAP_PAGE_MASK(kernel_map)),
1669 			    (vm_map_offset_t) 0,
1670 			    VM_FLAGS_FIXED,
1671 			    vmk_flags,
1672 			    VM_KERN_MEMORY_NONE,
1673 			    VM_OBJECT_NULL,
1674 			    (vm_object_offset_t) 0, FALSE, VM_PROT_NONE, VM_PROT_NONE,
1675 			    VM_INHERIT_DEFAULT);
1676 
1677 			if (kr != KERN_SUCCESS) {
1678 				panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
1679 				    (uint64_t) start, (uint64_t) end, (uint64_t) region_start,
1680 				    (uint64_t) region_size, kr);
1681 			}
1682 
1683 			region_select++;
1684 		}
1685 	}
1686 #else
1687 	kernel_map = vm_map_create_options(pmap_kernel(),
1688 	    VM_MIN_KERNEL_AND_KEXT_ADDRESS, map_end,
1689 	    VM_MAP_CREATE_DEFAULT);
1690 	/*
1691 	 *	Reserve virtual memory allocated up to this time.
1692 	 */
1693 	if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
1694 		vm_map_offset_t map_addr;
1695 		kern_return_t kr;
1696 
1697 		vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1698 		vmk_flags.vmkf_no_pmap_check = TRUE;
1699 
1700 		map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
1701 		kr = vm_map_enter(kernel_map,
1702 		    &map_addr,
1703 		    (vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
1704 		    (vm_map_offset_t) 0,
1705 		    VM_FLAGS_FIXED,
1706 		    vmk_flags,
1707 		    VM_KERN_MEMORY_NONE,
1708 		    VM_OBJECT_NULL,
1709 		    (vm_object_offset_t) 0, FALSE,
1710 		    VM_PROT_NONE, VM_PROT_NONE,
1711 		    VM_INHERIT_DEFAULT);
1712 
1713 		if (kr != KERN_SUCCESS) {
1714 			panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
1715 			    (uint64_t) start, (uint64_t) end,
1716 			    (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1717 			    (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
1718 			    kr);
1719 		}
1720 	}
1721 #endif
1722 
1723 	kmem_set_user_wire_limits();
1724 }
1725 
1726 
1727 #pragma mark map copyio
1728 
1729 /*
1730  *	Routine:	copyinmap
1731  *	Purpose:
1732  *		Like copyin, except that fromaddr is an address
1733  *		in the specified VM map.  This implementation
1734  *		is incomplete; it handles the current user map
1735  *		and the kernel map/submaps.
1736  */
1737 kern_return_t
copyinmap(vm_map_t map,vm_map_offset_t fromaddr,void * todata,vm_size_t length)1738 copyinmap(
1739 	vm_map_t                map,
1740 	vm_map_offset_t         fromaddr,
1741 	void                    *todata,
1742 	vm_size_t               length)
1743 {
1744 	kern_return_t   kr = KERN_SUCCESS;
1745 	vm_map_t oldmap;
1746 
1747 	if (vm_map_pmap(map) == pmap_kernel()) {
1748 		/* assume a correct copy */
1749 		memcpy(todata, CAST_DOWN(void *, fromaddr), length);
1750 	} else if (current_map() == map) {
1751 		if (copyin(fromaddr, todata, length) != 0) {
1752 			kr = KERN_INVALID_ADDRESS;
1753 		}
1754 	} else {
1755 		vm_map_reference(map);
1756 		oldmap = vm_map_switch(map);
1757 		if (copyin(fromaddr, todata, length) != 0) {
1758 			kr = KERN_INVALID_ADDRESS;
1759 		}
1760 		vm_map_switch(oldmap);
1761 		vm_map_deallocate(map);
1762 	}
1763 	return kr;
1764 }
1765 
1766 /*
1767  *	Routine:	copyoutmap
1768  *	Purpose:
1769  *		Like copyout, except that toaddr is an address
1770  *		in the specified VM map.
1771  */
1772 kern_return_t
copyoutmap(vm_map_t map,void * fromdata,vm_map_address_t toaddr,vm_size_t length)1773 copyoutmap(
1774 	vm_map_t                map,
1775 	void                    *fromdata,
1776 	vm_map_address_t        toaddr,
1777 	vm_size_t               length)
1778 {
1779 	kern_return_t   kr = KERN_SUCCESS;
1780 	vm_map_t        oldmap;
1781 
1782 	if (vm_map_pmap(map) == pmap_kernel()) {
1783 		/* assume a correct copy */
1784 		memcpy(CAST_DOWN(void *, toaddr), fromdata, length);
1785 	} else if (current_map() == map) {
1786 		if (copyout(fromdata, toaddr, length) != 0) {
1787 			kr = KERN_INVALID_ADDRESS;
1788 		}
1789 	} else {
1790 		vm_map_reference(map);
1791 		oldmap = vm_map_switch(map);
1792 		if (copyout(fromdata, toaddr, length) != 0) {
1793 			kr = KERN_INVALID_ADDRESS;
1794 		}
1795 		vm_map_switch(oldmap);
1796 		vm_map_deallocate(map);
1797 	}
1798 	return kr;
1799 }
1800 
1801 /*
1802  *	Routine:	copyoutmap_atomic{32, 64}
1803  *	Purpose:
1804  *		Like copyoutmap, except that the operation is atomic.
1805  *      Takes in value rather than *fromdata pointer.
1806  */
1807 kern_return_t
copyoutmap_atomic32(vm_map_t map,uint32_t value,vm_map_address_t toaddr)1808 copyoutmap_atomic32(
1809 	vm_map_t                map,
1810 	uint32_t                value,
1811 	vm_map_address_t        toaddr)
1812 {
1813 	kern_return_t   kr = KERN_SUCCESS;
1814 	vm_map_t        oldmap;
1815 
1816 	if (vm_map_pmap(map) == pmap_kernel()) {
1817 		/* assume a correct toaddr */
1818 		*(uint32_t *)toaddr = value;
1819 	} else if (current_map() == map) {
1820 		if (copyout_atomic32(value, toaddr) != 0) {
1821 			kr = KERN_INVALID_ADDRESS;
1822 		}
1823 	} else {
1824 		vm_map_reference(map);
1825 		oldmap = vm_map_switch(map);
1826 		if (copyout_atomic32(value, toaddr) != 0) {
1827 			kr = KERN_INVALID_ADDRESS;
1828 		}
1829 		vm_map_switch(oldmap);
1830 		vm_map_deallocate(map);
1831 	}
1832 	return kr;
1833 }
1834 
1835 kern_return_t
copyoutmap_atomic64(vm_map_t map,uint64_t value,vm_map_address_t toaddr)1836 copyoutmap_atomic64(
1837 	vm_map_t                map,
1838 	uint64_t                value,
1839 	vm_map_address_t        toaddr)
1840 {
1841 	kern_return_t   kr = KERN_SUCCESS;
1842 	vm_map_t        oldmap;
1843 
1844 	if (vm_map_pmap(map) == pmap_kernel()) {
1845 		/* assume a correct toaddr */
1846 		*(uint64_t *)toaddr = value;
1847 	} else if (current_map() == map) {
1848 		if (copyout_atomic64(value, toaddr) != 0) {
1849 			kr = KERN_INVALID_ADDRESS;
1850 		}
1851 	} else {
1852 		vm_map_reference(map);
1853 		oldmap = vm_map_switch(map);
1854 		if (copyout_atomic64(value, toaddr) != 0) {
1855 			kr = KERN_INVALID_ADDRESS;
1856 		}
1857 		vm_map_switch(oldmap);
1858 		vm_map_deallocate(map);
1859 	}
1860 	return kr;
1861 }
1862 
1863 
1864 #pragma mark pointer obfuscation / packing
1865 
1866 /*
1867  *
1868  *	The following two functions are to be used when exposing kernel
1869  *	addresses to userspace via any of the various debug or info
1870  *	facilities that exist. These are basically the same as VM_KERNEL_ADDRPERM()
1871  *	and VM_KERNEL_UNSLIDE_OR_PERM() except they use a different random seed and
1872  *	are exported to KEXTs.
1873  *
1874  *	NOTE: USE THE MACRO VERSIONS OF THESE FUNCTIONS (in vm_param.h) FROM WITHIN THE KERNEL
1875  */
1876 
1877 vm_offset_t
vm_kernel_addrhash_internal(vm_offset_t addr,uint64_t salt)1878 vm_kernel_addrhash_internal(vm_offset_t addr, uint64_t salt)
1879 {
1880 	assert(salt != 0);
1881 
1882 	if (addr == 0) {
1883 		return 0ul;
1884 	}
1885 
1886 	if (VM_KERNEL_IS_SLID(addr)) {
1887 		return VM_KERNEL_UNSLIDE(addr);
1888 	}
1889 
1890 	vm_offset_t sha_digest[SHA256_DIGEST_LENGTH / sizeof(vm_offset_t)];
1891 	SHA256_CTX sha_ctx;
1892 
1893 	SHA256_Init(&sha_ctx);
1894 	SHA256_Update(&sha_ctx, &salt, sizeof(salt));
1895 	SHA256_Update(&sha_ctx, &addr, sizeof(addr));
1896 	SHA256_Final(sha_digest, &sha_ctx);
1897 
1898 	return sha_digest[0];
1899 }
1900 
1901 __exported vm_offset_t
1902 vm_kernel_addrhash_external(vm_offset_t addr);
1903 vm_offset_t
vm_kernel_addrhash_external(vm_offset_t addr)1904 vm_kernel_addrhash_external(vm_offset_t addr)
1905 {
1906 	return vm_kernel_addrhash_internal(addr, vm_kernel_addrhash_salt_ext);
1907 }
1908 
1909 void
vm_kernel_addrhide(vm_offset_t addr,vm_offset_t * hide_addr)1910 vm_kernel_addrhide(
1911 	vm_offset_t addr,
1912 	vm_offset_t *hide_addr)
1913 {
1914 	*hide_addr = VM_KERNEL_ADDRHIDE(addr);
1915 }
1916 
1917 /*
1918  *	vm_kernel_addrperm_external:
1919  *	vm_kernel_unslide_or_perm_external:
1920  *
1921  *	Use these macros when exposing an address to userspace that could come from
1922  *	either kernel text/data *or* the heap.
1923  */
1924 void
vm_kernel_addrperm_external(vm_offset_t addr,vm_offset_t * perm_addr)1925 vm_kernel_addrperm_external(
1926 	vm_offset_t addr,
1927 	vm_offset_t *perm_addr)
1928 {
1929 	if (VM_KERNEL_IS_SLID(addr)) {
1930 		*perm_addr = VM_KERNEL_UNSLIDE(addr);
1931 	} else if (VM_KERNEL_ADDRESS(addr)) {
1932 		*perm_addr = addr + vm_kernel_addrperm_ext;
1933 	} else {
1934 		*perm_addr = addr;
1935 	}
1936 }
1937 
1938 void
vm_kernel_unslide_or_perm_external(vm_offset_t addr,vm_offset_t * up_addr)1939 vm_kernel_unslide_or_perm_external(
1940 	vm_offset_t addr,
1941 	vm_offset_t *up_addr)
1942 {
1943 	vm_kernel_addrperm_external(addr, up_addr);
1944 }
1945 
1946 void
vm_packing_pointer_invalid(vm_offset_t ptr,vm_packing_params_t params)1947 vm_packing_pointer_invalid(vm_offset_t ptr, vm_packing_params_t params)
1948 {
1949 	if (ptr & ((1ul << params.vmpp_shift) - 1)) {
1950 		panic("pointer %p can't be packed: low %d bits aren't 0",
1951 		    (void *)ptr, params.vmpp_shift);
1952 	} else if (ptr <= params.vmpp_base) {
1953 		panic("pointer %p can't be packed: below base %p",
1954 		    (void *)ptr, (void *)params.vmpp_base);
1955 	} else {
1956 		panic("pointer %p can't be packed: maximum encodable pointer is %p",
1957 		    (void *)ptr, (void *)vm_packing_max_packable(params));
1958 	}
1959 }
1960 
1961 void
vm_packing_verify_range(const char * subsystem,vm_offset_t min_address,vm_offset_t max_address,vm_packing_params_t params)1962 vm_packing_verify_range(
1963 	const char *subsystem,
1964 	vm_offset_t min_address,
1965 	vm_offset_t max_address,
1966 	vm_packing_params_t params)
1967 {
1968 	if (min_address > max_address) {
1969 		panic("%s: %s range invalid min:%p > max:%p",
1970 		    __func__, subsystem, (void *)min_address, (void *)max_address);
1971 	}
1972 
1973 	if (!params.vmpp_base_relative) {
1974 		return;
1975 	}
1976 
1977 	if (min_address <= params.vmpp_base) {
1978 		panic("%s: %s range invalid min:%p <= base:%p",
1979 		    __func__, subsystem, (void *)min_address, (void *)params.vmpp_base);
1980 	}
1981 
1982 	if (max_address > vm_packing_max_packable(params)) {
1983 		panic("%s: %s range invalid max:%p >= max packable:%p",
1984 		    __func__, subsystem, (void *)max_address,
1985 		    (void *)vm_packing_max_packable(params));
1986 	}
1987 }
1988