xref: /xnu-11417.101.15/osfmk/vm/vm_kern.c (revision e3723e1f17661b24996789d8afc084c0c3303b26)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 /*
59  *	File:	vm/vm_kern.c
60  *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
61  *	Date:	1985
62  *
63  *	Kernel memory management.
64  */
65 
66 #include <mach/kern_return.h>
67 #include <mach/vm_param.h>
68 #include <kern/assert.h>
69 #include <kern/thread.h>
70 #include <vm/vm_kern_internal.h>
71 #include <vm/vm_map_internal.h>
72 #include <vm/vm_object_internal.h>
73 #include <vm/vm_page_internal.h>
74 #include <vm/vm_compressor_xnu.h>
75 #include <vm/vm_pageout_xnu.h>
76 #include <vm/vm_init_xnu.h>
77 #include <vm/vm_fault.h>
78 #include <vm/vm_memtag.h>
79 #include <vm/vm_far.h>
80 #include <kern/misc_protos.h>
81 #include <vm/cpm_internal.h>
82 #include <kern/ledger.h>
83 #include <kern/bits.h>
84 #include <kern/startup.h>
85 #include <kern/telemetry.h>
86 
87 #include <string.h>
88 
89 #include <libkern/OSDebug.h>
90 #include <libkern/crypto/sha2.h>
91 #include <libkern/section_keywords.h>
92 #include <sys/kdebug.h>
93 #include <sys/kdebug_triage.h>
94 
95 #include <san/kasan.h>
96 #include <kern/kext_alloc.h>
97 #include <kern/backtrace.h>
98 #include <os/hash.h>
99 #include <kern/zalloc_internal.h>
100 #include <libkern/crypto/rand.h>
101 
102 /*
103  *	Variables exported by this module.
104  */
105 
106 SECURITY_READ_ONLY_LATE(vm_map_t) kernel_map;
107 SECURITY_READ_ONLY_LATE(struct mach_vm_range) kmem_ranges[KMEM_RANGE_COUNT];
108 SECURITY_READ_ONLY_LATE(struct mach_vm_range) kmem_large_ranges[KMEM_RANGE_COUNT];
109 
110 static TUNABLE(uint32_t, kmem_ptr_ranges, "kmem_ptr_ranges",
111     KMEM_RANGE_ID_NUM_PTR);
112 #define KMEM_GOBJ_THRESHOLD   (32ULL << 20)
113 #if DEBUG || DEVELOPMENT
114 #define KMEM_OUTLIER_LOG_SIZE (16ULL << 10)
115 #define KMEM_OUTLIER_SIZE      0
116 #define KMEM_OUTLIER_ALIGN     1
117 btlog_t kmem_outlier_log;
118 #endif /* DEBUG || DEVELOPMENT */
119 
120 __startup_data static vm_map_size_t iokit_range_size;
121 __startup_data static vm_map_size_t data_range_size;
122 __startup_data static vm_map_size_t ptr_range_size;
123 __startup_data static vm_map_size_t sprayqtn_range_size;
124 
125 #pragma mark helpers
126 
127 __attribute__((overloadable))
128 __header_always_inline kmem_flags_t
ANYF(kma_flags_t flags)129 ANYF(kma_flags_t flags)
130 {
131 	return (kmem_flags_t)flags;
132 }
133 
134 __attribute__((overloadable))
135 __header_always_inline kmem_flags_t
ANYF(kmr_flags_t flags)136 ANYF(kmr_flags_t flags)
137 {
138 	return (kmem_flags_t)flags;
139 }
140 
141 __attribute__((overloadable))
142 __header_always_inline kmem_flags_t
ANYF(kmf_flags_t flags)143 ANYF(kmf_flags_t flags)
144 {
145 	return (kmem_flags_t)flags;
146 }
147 
148 __abortlike
149 static void
__kmem_invalid_size_panic(vm_map_t map,vm_size_t size,uint32_t flags)150 __kmem_invalid_size_panic(
151 	vm_map_t        map,
152 	vm_size_t       size,
153 	uint32_t        flags)
154 {
155 	panic("kmem(map=%p, flags=0x%x): invalid size %zd",
156 	    map, flags, (size_t)size);
157 }
158 
159 __abortlike
160 static void
__kmem_invalid_arguments_panic(const char * what,vm_map_t map,vm_address_t address,vm_size_t size,uint32_t flags)161 __kmem_invalid_arguments_panic(
162 	const char     *what,
163 	vm_map_t        map,
164 	vm_address_t    address,
165 	vm_size_t       size,
166 	uint32_t        flags)
167 {
168 	panic("kmem_%s(map=%p, addr=%p, size=%zd, flags=0x%x): "
169 	    "invalid arguments passed",
170 	    what, map, (void *)address, (size_t)size, flags);
171 }
172 
173 __abortlike
174 static void
__kmem_failed_panic(vm_map_t map,vm_size_t size,uint32_t flags,kern_return_t kr,const char * what)175 __kmem_failed_panic(
176 	vm_map_t        map,
177 	vm_size_t       size,
178 	uint32_t        flags,
179 	kern_return_t   kr,
180 	const char     *what)
181 {
182 	panic("kmem_%s(%p, %zd, 0x%x): failed with %d",
183 	    what, map, (size_t)size, flags, kr);
184 }
185 
186 __abortlike
187 static void
__kmem_entry_not_found_panic(vm_map_t map,vm_offset_t addr)188 __kmem_entry_not_found_panic(
189 	vm_map_t        map,
190 	vm_offset_t     addr)
191 {
192 	panic("kmem(map=%p) no entry found at %p", map, (void *)addr);
193 }
194 
195 static inline vm_object_t
__kmem_object(kmem_flags_t flags)196 __kmem_object(kmem_flags_t flags)
197 {
198 	if (flags & KMEM_COMPRESSOR) {
199 		if (flags & KMEM_KOBJECT) {
200 			panic("both KMEM_KOBJECT and KMEM_COMPRESSOR specified");
201 		}
202 		return compressor_object;
203 	}
204 	if (!(flags & KMEM_KOBJECT)) {
205 		panic("KMEM_KOBJECT or KMEM_COMPRESSOR is required");
206 	}
207 	return kernel_object_default;
208 }
209 
210 static inline pmap_mapping_type_t
__kmem_mapping_type(kmem_flags_t flags)211 __kmem_mapping_type(kmem_flags_t flags)
212 {
213 	if (flags & (KMEM_DATA | KMEM_COMPRESSOR | KMEM_DATA_SHARED)) {
214 		return PMAP_MAPPING_TYPE_DEFAULT;
215 	} else {
216 		return PMAP_MAPPING_TYPE_RESTRICTED;
217 	}
218 }
219 
220 static inline vm_size_t
__kmem_guard_left(kmem_flags_t flags)221 __kmem_guard_left(kmem_flags_t flags)
222 {
223 	return (flags & KMEM_GUARD_FIRST) ? PAGE_SIZE : 0;
224 }
225 
226 static inline vm_size_t
__kmem_guard_right(kmem_flags_t flags)227 __kmem_guard_right(kmem_flags_t flags)
228 {
229 	return (flags & KMEM_GUARD_LAST) ? PAGE_SIZE : 0;
230 }
231 
232 static inline vm_size_t
__kmem_guard_size(kmem_flags_t flags)233 __kmem_guard_size(kmem_flags_t flags)
234 {
235 	return __kmem_guard_left(flags) + __kmem_guard_right(flags);
236 }
237 
238 __pure2
239 static inline vm_size_t
__kmem_entry_orig_size(vm_map_entry_t entry)240 __kmem_entry_orig_size(vm_map_entry_t entry)
241 {
242 	vm_object_t object = VME_OBJECT(entry);
243 
244 	if (entry->vme_kernel_object) {
245 		return entry->vme_end - entry->vme_start -
246 		       entry->vme_object_or_delta;
247 	} else {
248 		return object->vo_size - object->vo_size_delta;
249 	}
250 }
251 
252 
253 #pragma mark kmem range methods
254 
255 #define mach_vm_range_load(r, rmin, rmax) \
256 	({ (rmin) = (r)->min_address; (rmax) = (r)->max_address; })
257 
258 __abortlike
259 static void
__mach_vm_range_overflow(mach_vm_offset_t addr,mach_vm_offset_t size)260 __mach_vm_range_overflow(
261 	mach_vm_offset_t        addr,
262 	mach_vm_offset_t        size)
263 {
264 	panic("invalid vm range: [0x%llx, 0x%llx + 0x%llx) wraps around",
265 	    addr, addr, size);
266 }
267 
268 __abortlike
269 static void
__mach_vm_range_invalid(mach_vm_offset_t min_address,mach_vm_offset_t max_address)270 __mach_vm_range_invalid(
271 	mach_vm_offset_t        min_address,
272 	mach_vm_offset_t        max_address)
273 {
274 	panic("invalid vm range: [0x%llx, 0x%llx) wraps around",
275 	    min_address, max_address);
276 }
277 
278 __header_always_inline mach_vm_size_t
mach_vm_range_size(const struct mach_vm_range * r)279 mach_vm_range_size(const struct mach_vm_range *r)
280 {
281 	mach_vm_offset_t rmin, rmax;
282 
283 	mach_vm_range_load(r, rmin, rmax);
284 	return rmax - rmin;
285 }
286 
287 __attribute__((overloadable))
288 __header_always_inline bool
mach_vm_range_contains(const struct mach_vm_range * r,mach_vm_offset_t addr)289 mach_vm_range_contains(const struct mach_vm_range *r, mach_vm_offset_t addr)
290 {
291 	mach_vm_offset_t rmin, rmax;
292 
293 #if CONFIG_KERNEL_TAGGING
294 	if (VM_KERNEL_ADDRESS(addr)) {
295 		addr = vm_memtag_canonicalize_kernel(addr);
296 	}
297 #endif /* CONFIG_KERNEL_TAGGING */
298 
299 	/*
300 	 * The `&` is not a typo: we really expect the check to pass,
301 	 * so encourage the compiler to eagerly load and test without branches
302 	 */
303 	mach_vm_range_load(r, rmin, rmax);
304 	return (addr >= rmin) & (addr < rmax);
305 }
306 
307 __attribute__((overloadable))
308 __header_always_inline bool
mach_vm_range_contains(const struct mach_vm_range * r,mach_vm_offset_t addr,mach_vm_offset_t size)309 mach_vm_range_contains(
310 	const struct mach_vm_range *r,
311 	mach_vm_offset_t        addr,
312 	mach_vm_offset_t        size)
313 {
314 	mach_vm_offset_t rmin, rmax;
315 
316 #if CONFIG_KERNEL_TAGGING
317 	if (VM_KERNEL_ADDRESS(addr)) {
318 		addr = vm_memtag_canonicalize_kernel(addr);
319 	}
320 #endif /* CONFIG_KERNEL_TAGGING */
321 
322 	mach_vm_offset_t end;
323 	if (__improbable(os_add_overflow(addr, size, &end))) {
324 		return false;
325 	}
326 
327 	/*
328 	 *	 The `&` is not a typo: we really expect the check to pass,
329 	 *   so encourage the compiler to eagerly load and test without branches
330 	 */
331 	mach_vm_range_load(r, rmin, rmax);
332 	return (addr >= rmin) & (end >= rmin) & (end <= rmax);
333 }
334 
335 __attribute__((overloadable))
336 __header_always_inline bool
mach_vm_range_intersects(const struct mach_vm_range * r1,const struct mach_vm_range * r2)337 mach_vm_range_intersects(
338 	const struct mach_vm_range *r1,
339 	const struct mach_vm_range *r2)
340 {
341 	mach_vm_offset_t r1_min, r1_max;
342 	mach_vm_offset_t r2_min, r2_max;
343 
344 	mach_vm_range_load(r1, r1_min, r1_max);
345 	r2_min = r2->min_address;
346 	r2_max = r2->max_address;
347 
348 	if (r1_min > r1_max) {
349 		__mach_vm_range_invalid(r1_min, r1_max);
350 	}
351 
352 	if (r2_min > r2_max) {
353 		__mach_vm_range_invalid(r2_min, r2_max);
354 	}
355 
356 	return r1_max > r2_min && r1_min < r2_max;
357 }
358 
359 __attribute__((overloadable))
360 __header_always_inline bool
mach_vm_range_intersects(const struct mach_vm_range * r1,mach_vm_offset_t addr,mach_vm_offset_t size)361 mach_vm_range_intersects(
362 	const struct mach_vm_range *r1,
363 	mach_vm_offset_t        addr,
364 	mach_vm_offset_t        size)
365 {
366 	struct mach_vm_range r2;
367 
368 #if CONFIG_KERNEL_TAGGING
369 	addr = VM_KERNEL_STRIP_UPTR(addr);
370 #endif /* CONFIG_KERNEL_TAGGING */
371 
372 	r2.min_address = addr;
373 	if (os_add_overflow(addr, size, &r2.max_address)) {
374 		__mach_vm_range_overflow(addr, size);
375 	}
376 
377 	return mach_vm_range_intersects(r1, &r2);
378 }
379 
380 bool
kmem_range_id_contains(kmem_range_id_t range_id,vm_map_offset_t addr,vm_map_size_t size)381 kmem_range_id_contains(
382 	kmem_range_id_t         range_id,
383 	vm_map_offset_t         addr,
384 	vm_map_size_t           size)
385 {
386 	return mach_vm_range_contains(&kmem_ranges[range_id], addr, size);
387 }
388 
389 __abortlike
390 static void
kmem_range_invalid_panic(kmem_range_id_t range_id,vm_map_offset_t addr,vm_map_size_t size)391 kmem_range_invalid_panic(
392 	kmem_range_id_t         range_id,
393 	vm_map_offset_t         addr,
394 	vm_map_size_t           size)
395 {
396 	const struct mach_vm_range *r = &kmem_ranges[range_id];
397 	mach_vm_offset_t rmin, rmax;
398 
399 	mach_vm_range_load(r, rmin, rmax);
400 	if (addr + size < rmin) {
401 		panic("addr %p + size %llu overflows %p", (void *)addr, size,
402 		    (void *)(addr + size));
403 	}
404 	panic("addr %p + size %llu doesnt fit in one range (id: %u min: %p max: %p)",
405 	    (void *)addr, size, range_id, (void *)rmin, (void *)rmax);
406 }
407 
408 /*
409  * Return whether the entire allocation is contained in the given range
410  */
411 static bool
kmem_range_contains_fully(kmem_range_id_t range_id,vm_map_offset_t addr,vm_map_size_t size)412 kmem_range_contains_fully(
413 	kmem_range_id_t         range_id,
414 	vm_map_offset_t         addr,
415 	vm_map_size_t           size)
416 {
417 	const struct mach_vm_range *r = &kmem_ranges[range_id];
418 	mach_vm_offset_t rmin, rmax;
419 	bool result = false;
420 
421 	if (VM_KERNEL_ADDRESS(addr)) {
422 		addr = vm_memtag_canonicalize_kernel(addr);
423 	}
424 
425 	/*
426 	 * The `&` is not a typo: we really expect the check to pass,
427 	 * so encourage the compiler to eagerly load and test without branches
428 	 */
429 	mach_vm_range_load(r, rmin, rmax);
430 	result = (addr >= rmin) & (addr < rmax);
431 	if (__improbable(result
432 	    && ((addr + size < rmin) || (addr + size > rmax)))) {
433 		kmem_range_invalid_panic(range_id, addr, size);
434 	}
435 	return result;
436 }
437 
438 vm_map_size_t
kmem_range_id_size(kmem_range_id_t range_id)439 kmem_range_id_size(kmem_range_id_t range_id)
440 {
441 	return mach_vm_range_size(&kmem_ranges[range_id]);
442 }
443 
444 kmem_range_id_t
kmem_addr_get_range(vm_map_offset_t addr,vm_map_size_t size)445 kmem_addr_get_range(vm_map_offset_t addr, vm_map_size_t size)
446 {
447 	kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST;
448 
449 	for (; range_id < KMEM_RANGE_COUNT; range_id++) {
450 		if (kmem_range_contains_fully(range_id, addr, size)) {
451 			return range_id;
452 		}
453 	}
454 	return KMEM_RANGE_ID_NONE;
455 }
456 
457 bool
kmem_is_ptr_range(vm_map_range_id_t range_id)458 kmem_is_ptr_range(vm_map_range_id_t range_id)
459 {
460 	return (range_id >= KMEM_RANGE_ID_FIRST) &&
461 	       (range_id <= KMEM_RANGE_ID_NUM_PTR);
462 }
463 
464 __abortlike
465 static void
kmem_range_invalid_for_overwrite(vm_map_offset_t addr)466 kmem_range_invalid_for_overwrite(vm_map_offset_t addr)
467 {
468 	panic("Can't overwrite mappings (addr: %p) in kmem ptr ranges",
469 	    (void *)addr);
470 }
471 
472 mach_vm_range_t
kmem_validate_range_for_overwrite(vm_map_offset_t addr,vm_map_size_t size)473 kmem_validate_range_for_overwrite(
474 	vm_map_offset_t         addr,
475 	vm_map_size_t           size)
476 {
477 	vm_map_range_id_t range_id = kmem_addr_get_range(addr, size);
478 
479 	if (kmem_is_ptr_range(range_id)) {
480 		kmem_range_invalid_for_overwrite(addr);
481 	}
482 
483 	return &kmem_ranges[range_id];
484 }
485 
486 
487 #pragma mark entry parameters
488 
489 
490 __abortlike
491 static void
__kmem_entry_validate_panic(vm_map_t map,vm_map_entry_t entry,vm_offset_t addr,vm_size_t size,uint32_t flags,kmem_guard_t guard)492 __kmem_entry_validate_panic(
493 	vm_map_t        map,
494 	vm_map_entry_t  entry,
495 	vm_offset_t     addr,
496 	vm_size_t       size,
497 	uint32_t        flags,
498 	kmem_guard_t    guard)
499 {
500 	const char *what = "???";
501 
502 	if (entry->vme_atomic != guard.kmg_atomic) {
503 		what = "atomicity";
504 	} else if (entry->is_sub_map != guard.kmg_submap) {
505 		what = "objectness";
506 	} else if (addr != entry->vme_start) {
507 		what = "left bound";
508 	} else if ((flags & KMF_GUESS_SIZE) == 0 && addr + size != entry->vme_end) {
509 		what = "right bound";
510 	} else if (guard.kmg_context != entry->vme_context) {
511 		what = "guard";
512 	}
513 
514 	panic("kmem(map=%p, addr=%p, size=%zd, flags=0x%x): "
515 	    "entry:%p %s mismatch guard(0x%08x)",
516 	    map, (void *)addr, size, flags, entry,
517 	    what, guard.kmg_context);
518 }
519 
520 static bool
__kmem_entry_validate_guard(vm_map_entry_t entry,vm_offset_t addr,vm_size_t size,kmem_flags_t flags,kmem_guard_t guard)521 __kmem_entry_validate_guard(
522 	vm_map_entry_t  entry,
523 	vm_offset_t     addr,
524 	vm_size_t       size,
525 	kmem_flags_t    flags,
526 	kmem_guard_t    guard)
527 {
528 	if (entry->vme_atomic != guard.kmg_atomic) {
529 		return false;
530 	}
531 
532 	if (!guard.kmg_atomic) {
533 		return true;
534 	}
535 
536 	if (entry->is_sub_map != guard.kmg_submap) {
537 		return false;
538 	}
539 
540 	if (addr != entry->vme_start) {
541 		return false;
542 	}
543 
544 	if ((flags & KMEM_GUESS_SIZE) == 0 && addr + size != entry->vme_end) {
545 		return false;
546 	}
547 
548 	if (!guard.kmg_submap && guard.kmg_context != entry->vme_context) {
549 		return false;
550 	}
551 
552 	return true;
553 }
554 
555 void
kmem_entry_validate_guard(vm_map_t map,vm_map_entry_t entry,vm_offset_t addr,vm_size_t size,kmem_guard_t guard)556 kmem_entry_validate_guard(
557 	vm_map_t        map,
558 	vm_map_entry_t  entry,
559 	vm_offset_t     addr,
560 	vm_size_t       size,
561 	kmem_guard_t    guard)
562 {
563 	if (!__kmem_entry_validate_guard(entry, addr, size, KMEM_NONE, guard)) {
564 		__kmem_entry_validate_panic(map, entry, addr, size, KMEM_NONE, guard);
565 	}
566 }
567 
568 __abortlike
569 static void
__kmem_entry_validate_object_panic(vm_map_t map,vm_map_entry_t entry,kmem_flags_t flags)570 __kmem_entry_validate_object_panic(
571 	vm_map_t        map,
572 	vm_map_entry_t  entry,
573 	kmem_flags_t    flags)
574 {
575 	const char *what;
576 	const char *verb;
577 
578 	if (entry->is_sub_map) {
579 		panic("kmem(map=%p) entry %p is a submap", map, entry);
580 	}
581 
582 	if (flags & KMEM_KOBJECT) {
583 		what = "kernel";
584 		verb = "isn't";
585 	} else if (flags & KMEM_COMPRESSOR) {
586 		what = "compressor";
587 		verb = "isn't";
588 	} else if (entry->vme_kernel_object) {
589 		what = "kernel";
590 		verb = "is unexpectedly";
591 	} else {
592 		what = "compressor";
593 		verb = "is unexpectedly";
594 	}
595 
596 	panic("kmem(map=%p, flags=0x%x): entry %p %s for the %s object",
597 	    map, flags, entry, verb, what);
598 }
599 
600 static bool
__kmem_entry_validate_object(vm_map_entry_t entry,kmem_flags_t flags)601 __kmem_entry_validate_object(
602 	vm_map_entry_t  entry,
603 	kmem_flags_t    flags)
604 {
605 	if (entry->is_sub_map) {
606 		return false;
607 	}
608 	if ((bool)(flags & KMEM_KOBJECT) != entry->vme_kernel_object) {
609 		return false;
610 	}
611 
612 	return (bool)(flags & KMEM_COMPRESSOR) ==
613 	       (VME_OBJECT(entry) == compressor_object);
614 }
615 
616 vm_size_t
kmem_size_guard(vm_map_t map,vm_offset_t addr,kmem_guard_t guard)617 kmem_size_guard(
618 	vm_map_t        map,
619 	vm_offset_t     addr,
620 	kmem_guard_t    guard)
621 {
622 	kmem_flags_t flags = KMEM_GUESS_SIZE;
623 	vm_map_entry_t entry;
624 	vm_size_t size;
625 
626 	vm_map_lock_read(map);
627 
628 #if KASAN_CLASSIC
629 	addr -= PAGE_SIZE;
630 #endif /* KASAN_CLASSIC */
631 	addr = vm_memtag_canonicalize_kernel(addr);
632 
633 	if (!vm_map_lookup_entry(map, addr, &entry)) {
634 		__kmem_entry_not_found_panic(map, addr);
635 	}
636 
637 	if (!__kmem_entry_validate_guard(entry, addr, 0, flags, guard)) {
638 		__kmem_entry_validate_panic(map, entry, addr, 0, flags, guard);
639 	}
640 
641 	size = __kmem_entry_orig_size(entry);
642 
643 	vm_map_unlock_read(map);
644 
645 	return size;
646 }
647 
648 static inline uint16_t
kmem_hash_backtrace(void * fp)649 kmem_hash_backtrace(
650 	void                     *fp)
651 {
652 	uint64_t  bt_count;
653 	uintptr_t bt[8] = {};
654 
655 	struct backtrace_control ctl = {
656 		.btc_frame_addr = (uintptr_t)fp,
657 	};
658 
659 	bt_count = backtrace(bt, sizeof(bt) / sizeof(bt[0]), &ctl, NULL);
660 	return (uint16_t) os_hash_jenkins(bt, bt_count * sizeof(bt[0]));
661 }
662 
663 static_assert(KMEM_RANGE_ID_DATA - 1 <= KMEM_RANGE_MASK,
664     "Insufficient bits to represent ptr ranges");
665 
666 kmem_range_id_t
kmem_adjust_range_id(uint32_t hash)667 kmem_adjust_range_id(
668 	uint32_t                  hash)
669 {
670 	return (kmem_range_id_t) (KMEM_RANGE_ID_PTR_0 +
671 	       (hash & KMEM_RANGE_MASK) % kmem_ptr_ranges);
672 }
673 
674 static bool
kmem_use_sprayqtn(kma_flags_t kma_flags,vm_map_size_t map_size,vm_offset_t mask)675 kmem_use_sprayqtn(
676 	kma_flags_t               kma_flags,
677 	vm_map_size_t             map_size,
678 	vm_offset_t               mask)
679 {
680 	/*
681 	 * Pointer allocations that are above the guard objects threshold or have
682 	 * leading guard pages with non standard alignment requests are redirected
683 	 * to the sprayqtn range.
684 	 */
685 #if DEBUG || DEVELOPMENT
686 	btref_get_flags_t flags = (kma_flags & KMA_NOPAGEWAIT) ?
687 	    BTREF_GET_NOWAIT : 0;
688 
689 	if ((kma_flags & KMA_SPRAYQTN) == 0) {
690 		if (map_size > KMEM_GOBJ_THRESHOLD) {
691 			btlog_record(kmem_outlier_log, (void *)map_size, KMEM_OUTLIER_SIZE,
692 			    btref_get(__builtin_frame_address(0), flags));
693 		} else if ((kma_flags & KMA_GUARD_FIRST) && (mask > PAGE_MASK)) {
694 			btlog_record(kmem_outlier_log, (void *)mask, KMEM_OUTLIER_ALIGN,
695 			    btref_get(__builtin_frame_address(0), flags));
696 		}
697 	}
698 #endif /* DEBUG || DEVELOPMENT */
699 
700 	return (kma_flags & KMA_SPRAYQTN) ||
701 	       (map_size > KMEM_GOBJ_THRESHOLD) ||
702 	       ((kma_flags & KMA_GUARD_FIRST) && (mask > PAGE_MASK));
703 }
704 
705 static void
kmem_apply_security_policy(vm_map_t map,kma_flags_t kma_flags,kmem_guard_t guard,vm_map_size_t map_size,vm_offset_t mask,vm_map_kernel_flags_t * vmk_flags,bool assert_dir __unused)706 kmem_apply_security_policy(
707 	vm_map_t                  map,
708 	kma_flags_t               kma_flags,
709 	kmem_guard_t              guard,
710 	vm_map_size_t             map_size,
711 	vm_offset_t               mask,
712 	vm_map_kernel_flags_t    *vmk_flags,
713 	bool                      assert_dir __unused)
714 {
715 	kmem_range_id_t range_id;
716 	bool from_right;
717 	uint16_t type_hash = guard.kmg_type_hash;
718 
719 	if (startup_phase < STARTUP_SUB_KMEM || map != kernel_map) {
720 		return;
721 	}
722 
723 	/*
724 	 * A non-zero type-hash must be passed by krealloc_type
725 	 */
726 #if (DEBUG || DEVELOPMENT)
727 	if (assert_dir && !(kma_flags & (KMA_DATA | KMA_DATA_SHARED))) {
728 		assert(type_hash != 0);
729 	}
730 #endif
731 
732 	if (kma_flags & (KMA_DATA | KMA_DATA_SHARED)) {
733 		range_id  = KMEM_RANGE_ID_DATA;
734 		/*
735 		 * As an optimization in KMA_DATA to avoid fragmentation,
736 		 * allocate static carveouts at the end of the DATA range.
737 		 */
738 		from_right = (bool)(kma_flags & KMA_PERMANENT);
739 	} else if (kmem_use_sprayqtn(kma_flags, map_size, mask)) {
740 		range_id = KMEM_RANGE_ID_SPRAYQTN;
741 		from_right = (bool)(kma_flags & KMA_PERMANENT);
742 	} else if (type_hash) {
743 		range_id  = (kmem_range_id_t)(type_hash & KMEM_RANGE_MASK);
744 		from_right = type_hash & KMEM_DIRECTION_MASK;
745 	} else {
746 		/*
747 		 * Range id needs to correspond to one of the PTR ranges
748 		 */
749 		type_hash = (uint16_t) kmem_hash_backtrace(__builtin_frame_address(0));
750 		range_id  = kmem_adjust_range_id(type_hash);
751 		from_right = type_hash & KMEM_DIRECTION_MASK;
752 	}
753 
754 	vmk_flags->vmkf_range_id = range_id;
755 	vmk_flags->vmkf_last_free = from_right;
756 }
757 
758 #pragma mark allocation
759 
760 static kmem_return_t
761 kmem_alloc_guard_internal(
762 	vm_map_t                map,
763 	vm_size_t               size,
764 	vm_offset_t             mask,
765 	kma_flags_t             flags,
766 	kmem_guard_t            guard,
767 	kern_return_t         (^alloc_pages)(vm_size_t, kma_flags_t, vm_page_t *))
768 {
769 	vm_object_t             object;
770 	vm_offset_t             delta = 0;
771 	vm_map_entry_t          entry = NULL;
772 	vm_map_offset_t         map_addr, fill_start;
773 	vm_map_size_t           map_size, fill_size;
774 	vm_page_t               guard_left = VM_PAGE_NULL;
775 	vm_page_t               guard_right = VM_PAGE_NULL;
776 	vm_page_t               wired_page_list = VM_PAGE_NULL;
777 	vm_map_kernel_flags_t   vmk_flags = VM_MAP_KERNEL_FLAGS_ANYWHERE();
778 	bool                    skip_guards;
779 	kmem_return_t           kmr = { };
780 
781 	assert(kernel_map && map->pmap == kernel_pmap);
782 
783 #if DEBUG || DEVELOPMENT
784 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_START,
785 	    size, 0, 0, 0);
786 #endif
787 
788 
789 	if (size == 0 ||
790 	    (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) ||
791 	    (size < __kmem_guard_size(ANYF(flags)))) {
792 		__kmem_invalid_size_panic(map, size, flags);
793 	}
794 
795 	/*
796 	 * limit the size of a single extent of wired memory
797 	 * to try and limit the damage to the system if
798 	 * too many pages get wired down
799 	 * limit raised to 2GB with 128GB max physical limit,
800 	 * but scaled by installed memory above this
801 	 *
802 	 * Note: kmem_alloc_contig_guard() is immune to this check.
803 	 */
804 	if (__improbable(!(flags & (KMA_VAONLY | KMA_PAGEABLE)) &&
805 	    alloc_pages == NULL &&
806 	    size > MAX(1ULL << 31, sane_size / 64))) {
807 		kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
808 		goto out_error;
809 	}
810 
811 #if 136275805
812 	/*
813 	 * XXX: Redundantly check the mapping size here so that failure stack traces
814 	 *      are more useful. This has no functional value but is helpful because
815 	 *      telemetry traps can currently only capture the last five calls and
816 	 *      so we want to trap as shallow as possible in a select few cases
817 	 *      where we anticipate issues.
818 	 *
819 	 *      When telemetry collection is complete, this will be removed.
820 	 */
821 	if (__improbable(!vm_map_is_map_size_valid(
822 		    kernel_map, size, flags & KMA_NOSOFTLIMIT))) {
823 		kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
824 		goto out_error;
825 	}
826 #endif /* 136275805 */
827 
828 	/*
829 	 * Guard pages:
830 	 *
831 	 * Guard pages are implemented as fictitious pages.
832 	 *
833 	 * However, some maps, and some objects are known
834 	 * to manage their memory explicitly, and do not need
835 	 * those to be materialized, which saves memory.
836 	 *
837 	 * By placing guard pages on either end of a stack,
838 	 * they can help detect cases where a thread walks
839 	 * off either end of its stack.
840 	 *
841 	 * They are allocated and set up here and attempts
842 	 * to access those pages are trapped in vm_fault_page().
843 	 *
844 	 * The map_size we were passed may include extra space for
845 	 * guard pages. fill_size represents the actual size to populate.
846 	 * Similarly, fill_start indicates where the actual pages
847 	 * will begin in the range.
848 	 */
849 
850 	map_size   = round_page(size);
851 	fill_start = 0;
852 	fill_size  = map_size - __kmem_guard_size(ANYF(flags));
853 
854 #if KASAN_CLASSIC
855 	if (flags & KMA_KASAN_GUARD) {
856 		assert((flags & (KMA_GUARD_FIRST | KMA_GUARD_LAST)) == 0);
857 		flags |= KMA_GUARD_FIRST | KMEM_GUARD_LAST;
858 		delta     = ptoa(2);
859 		map_size += delta;
860 	}
861 #else
862 	(void)delta;
863 #endif /* KASAN_CLASSIC */
864 
865 	skip_guards = (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) ||
866 	    map->never_faults;
867 
868 	if (flags & KMA_GUARD_FIRST) {
869 		vmk_flags.vmkf_guard_before = true;
870 		fill_start += PAGE_SIZE;
871 	}
872 	if (flags & KMA_NOSOFTLIMIT) {
873 		vmk_flags.vmkf_no_soft_limit = true;
874 	}
875 	if ((flags & KMA_GUARD_FIRST) && !skip_guards) {
876 		guard_left = vm_page_create_guard((flags & KMA_NOPAGEWAIT) == 0);
877 		if (__improbable(guard_left == VM_PAGE_NULL)) {
878 			kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
879 			goto out_error;
880 		}
881 	}
882 	if ((flags & KMA_GUARD_LAST) && !skip_guards) {
883 		guard_right = vm_page_create_guard((flags & KMA_NOPAGEWAIT) == 0);
884 		if (__improbable(guard_right == VM_PAGE_NULL)) {
885 			kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
886 			goto out_error;
887 		}
888 	}
889 
890 	if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
891 		if (alloc_pages) {
892 			kmr.kmr_return = alloc_pages(fill_size, flags,
893 			    &wired_page_list);
894 		} else {
895 			kmr.kmr_return = vm_page_alloc_list(atop(fill_size), flags,
896 			    &wired_page_list);
897 		}
898 		if (__improbable(kmr.kmr_return != KERN_SUCCESS)) {
899 			goto out_error;
900 		}
901 	}
902 
903 	/*
904 	 *	Allocate a new object (if necessary).  We must do this before
905 	 *	locking the map, or risk deadlock with the default pager.
906 	 */
907 	if (flags & KMA_KOBJECT) {
908 		{
909 			object = kernel_object_default;
910 		}
911 		vm_object_reference(object);
912 	} else if (flags & KMA_COMPRESSOR) {
913 		object = compressor_object;
914 		vm_object_reference(object);
915 	} else {
916 		object = vm_object_allocate(map_size);
917 		vm_object_lock(object);
918 		vm_object_set_size(object, map_size, size);
919 		/* stabilize the object to prevent shadowing */
920 		object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
921 		VM_OBJECT_SET_TRUE_SHARE(object, TRUE);
922 		vm_object_unlock(object);
923 	}
924 
925 	if (flags & KMA_LAST_FREE) {
926 		vmk_flags.vmkf_last_free = true;
927 	}
928 	if (flags & KMA_PERMANENT) {
929 		vmk_flags.vmf_permanent = true;
930 	}
931 	kmem_apply_security_policy(map, flags, guard, map_size, mask, &vmk_flags,
932 	    false);
933 
934 	kmr.kmr_return = vm_map_find_space(map, 0, map_size, mask,
935 	    vmk_flags, &entry);
936 	if (__improbable(KERN_SUCCESS != kmr.kmr_return)) {
937 		vm_object_deallocate(object);
938 		goto out_error;
939 	}
940 
941 	map_addr = entry->vme_start;
942 	VME_OBJECT_SET(entry, object, guard.kmg_atomic, guard.kmg_context);
943 	VME_ALIAS_SET(entry, guard.kmg_tag);
944 	if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
945 		VME_OFFSET_SET(entry, map_addr);
946 	}
947 
948 #if KASAN
949 	if ((flags & KMA_KOBJECT) && guard.kmg_atomic) {
950 		entry->vme_object_or_delta = (-size & PAGE_MASK) + delta;
951 	}
952 #endif /* KASAN */
953 
954 	if (!(flags & (KMA_COMPRESSOR | KMA_PAGEABLE))) {
955 		entry->wired_count = 1;
956 		vme_btref_consider_and_set(entry, __builtin_frame_address(0));
957 	}
958 
959 	if (guard_left || guard_right || wired_page_list) {
960 		vm_object_offset_t offset = 0ull;
961 
962 		vm_object_lock(object);
963 		vm_map_unlock(map);
964 
965 		if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
966 			offset = map_addr;
967 		}
968 
969 		if (guard_left) {
970 			vm_page_insert(guard_left, object, offset);
971 			guard_left->vmp_busy = FALSE;
972 			guard_left = VM_PAGE_NULL;
973 		}
974 
975 		if (guard_right) {
976 			vm_page_insert(guard_right, object,
977 			    offset + fill_start + fill_size);
978 			guard_right->vmp_busy = FALSE;
979 			guard_right = VM_PAGE_NULL;
980 		}
981 
982 		if (wired_page_list) {
983 			kernel_memory_populate_object_and_unlock(object,
984 			    map_addr + fill_start, offset + fill_start, fill_size,
985 			    wired_page_list, flags, guard.kmg_tag, VM_PROT_DEFAULT,
986 			    __kmem_mapping_type(ANYF(flags)));
987 		} else {
988 			vm_object_unlock(object);
989 		}
990 	} else {
991 		vm_map_unlock(map);
992 	}
993 
994 	/*
995 	 * now that the pages are wired, we no longer have to fear coalesce
996 	 */
997 	if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
998 		vm_map_simplify(map, map_addr);
999 	}
1000 
1001 #if DEBUG || DEVELOPMENT
1002 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
1003 	    atop(fill_size), 0, 0, 0);
1004 #endif /* DEBUG || DEVELOPMENT */
1005 	kmr.kmr_address = CAST_DOWN(vm_offset_t, map_addr);
1006 
1007 #if KASAN
1008 	if (flags & (KMA_KASAN_GUARD | KMA_PAGEABLE)) {
1009 		/*
1010 		 * We need to allow the range for pageable memory,
1011 		 * or faulting will not be allowed.
1012 		 */
1013 		kasan_notify_address(map_addr, map_size);
1014 	}
1015 #endif /* KASAN */
1016 #if KASAN_CLASSIC
1017 	if (flags & KMA_KASAN_GUARD) {
1018 		kmr.kmr_address += PAGE_SIZE;
1019 		kasan_alloc_large(kmr.kmr_address, size);
1020 	}
1021 #endif /* KASAN_CLASSIC */
1022 #if CONFIG_KERNEL_TAGGING
1023 	if (!(flags & KMA_VAONLY) && (flags & KMA_TAG)) {
1024 		kmr.kmr_ptr = vm_memtag_generate_and_store_tag((caddr_t)kmr.kmr_address + fill_start, fill_size);
1025 		kmr.kmr_ptr = (caddr_t)kmr.kmr_ptr - fill_start;
1026 #if KASAN_TBI
1027 		kasan_tbi_retag_unused_space(kmr.kmr_ptr, map_size, size);
1028 #endif /* KASAN_TBI */
1029 	}
1030 #endif /* CONFIG_KERNEL_TAGGING */
1031 	return kmr;
1032 
1033 out_error:
1034 	if (flags & KMA_NOFAIL) {
1035 		__kmem_failed_panic(map, size, flags, kmr.kmr_return, "alloc");
1036 	}
1037 	if (guard_left) {
1038 		guard_left->vmp_snext = wired_page_list;
1039 		wired_page_list = guard_left;
1040 	}
1041 	if (guard_right) {
1042 		guard_right->vmp_snext = wired_page_list;
1043 		wired_page_list = guard_right;
1044 	}
1045 	if (wired_page_list) {
1046 		vm_page_free_list(wired_page_list, FALSE);
1047 	}
1048 
1049 #if DEBUG || DEVELOPMENT
1050 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
1051 	    0, 0, 0, 0);
1052 #endif /* DEBUG || DEVELOPMENT */
1053 
1054 	return kmr;
1055 }
1056 
1057 kmem_return_t
kmem_alloc_guard(vm_map_t map,vm_size_t size,vm_offset_t mask,kma_flags_t flags,kmem_guard_t guard)1058 kmem_alloc_guard(
1059 	vm_map_t        map,
1060 	vm_size_t       size,
1061 	vm_offset_t     mask,
1062 	kma_flags_t     flags,
1063 	kmem_guard_t    guard)
1064 {
1065 	return kmem_alloc_guard_internal(map, size, mask, flags, guard, NULL);
1066 }
1067 
1068 kmem_return_t
kmem_alloc_contig_guard(vm_map_t map,vm_size_t size,vm_offset_t mask,ppnum_t max_pnum,ppnum_t pnum_mask,kma_flags_t flags,kmem_guard_t guard)1069 kmem_alloc_contig_guard(
1070 	vm_map_t                map,
1071 	vm_size_t               size,
1072 	vm_offset_t             mask,
1073 	ppnum_t                 max_pnum,
1074 	ppnum_t                 pnum_mask,
1075 	kma_flags_t             flags,
1076 	kmem_guard_t            guard)
1077 {
1078 	__auto_type alloc_pages = ^(vm_size_t fill_size, kma_flags_t kma_flags, vm_page_t *pages) {
1079 		return cpm_allocate(fill_size, pages, max_pnum, pnum_mask, FALSE, kma_flags);
1080 	};
1081 
1082 	return kmem_alloc_guard_internal(map, size, mask, flags, guard, alloc_pages);
1083 }
1084 
1085 kmem_return_t
kmem_suballoc(vm_map_t parent,mach_vm_offset_t * addr,vm_size_t size,vm_map_create_options_t vmc_options,int vm_flags,kms_flags_t flags,vm_tag_t tag)1086 kmem_suballoc(
1087 	vm_map_t                parent,
1088 	mach_vm_offset_t       *addr,
1089 	vm_size_t               size,
1090 	vm_map_create_options_t vmc_options,
1091 	int                     vm_flags,
1092 	kms_flags_t             flags,
1093 	vm_tag_t                tag)
1094 {
1095 	vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1096 	vm_map_offset_t map_addr = 0;
1097 	kmem_return_t kmr = { };
1098 	vm_map_t map;
1099 
1100 	assert(page_aligned(size));
1101 	assert(parent->pmap == kernel_pmap);
1102 
1103 	vm_map_kernel_flags_set_vmflags(&vmk_flags, vm_flags, tag);
1104 
1105 	if (parent == kernel_map) {
1106 		assert(vmk_flags.vmf_overwrite || (flags & KMS_DATA));
1107 	}
1108 
1109 	if (vmk_flags.vmf_fixed) {
1110 		map_addr = trunc_page(*addr);
1111 	}
1112 
1113 	pmap_reference(vm_map_pmap(parent));
1114 	map = vm_map_create_options(vm_map_pmap(parent), 0, size, vmc_options);
1115 
1116 	/*
1117 	 * 1. vm_map_enter() will consume one ref on success.
1118 	 *
1119 	 * 2. make the entry atomic as kernel submaps should never be split.
1120 	 *
1121 	 * 3. instruct vm_map_enter() that it is a fresh submap
1122 	 *    that needs to be taught its bounds as it inserted.
1123 	 */
1124 	vm_map_reference(map);
1125 
1126 	vmk_flags.vmkf_submap = true;
1127 	if ((flags & KMS_DATA) == 0) {
1128 		/* FIXME: IOKit submaps get fragmented and can't be atomic */
1129 		vmk_flags.vmkf_submap_atomic = true;
1130 	}
1131 	vmk_flags.vmkf_submap_adjust = true;
1132 	if (flags & KMS_LAST_FREE) {
1133 		vmk_flags.vmkf_last_free = true;
1134 	}
1135 	if (flags & KMS_PERMANENT) {
1136 		vmk_flags.vmf_permanent = true;
1137 	}
1138 	if (flags & KMS_DATA) {
1139 		vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
1140 	}
1141 	if (flags & KMS_NOSOFTLIMIT) {
1142 		vmk_flags.vmkf_no_soft_limit = true;
1143 	}
1144 
1145 	kmr.kmr_return = vm_map_enter(parent, &map_addr, size, 0,
1146 	    vmk_flags, (vm_object_t)map, 0, FALSE,
1147 	    VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1148 
1149 	if (kmr.kmr_return != KERN_SUCCESS) {
1150 		if (flags & KMS_NOFAIL) {
1151 			panic("kmem_suballoc(map=%p, size=%zd) failed with %d",
1152 			    parent, size, kmr.kmr_return);
1153 		}
1154 		assert(os_ref_get_count_raw(&map->map_refcnt) == 2);
1155 		vm_map_deallocate(map);
1156 		vm_map_deallocate(map); /* also removes ref to pmap */
1157 		return kmr;
1158 	}
1159 
1160 	/*
1161 	 * For kmem_suballocs that register a claim and are assigned a range, ensure
1162 	 * that the exact same range is returned.
1163 	 */
1164 	if (*addr != 0 && parent == kernel_map &&
1165 	    startup_phase > STARTUP_SUB_KMEM) {
1166 		assert(CAST_DOWN(vm_offset_t, map_addr) == *addr);
1167 	} else {
1168 		*addr = map_addr;
1169 	}
1170 
1171 	kmr.kmr_submap = map;
1172 	return kmr;
1173 }
1174 
1175 /*
1176  *	kmem_alloc:
1177  *
1178  *	Allocate wired-down memory in the kernel's address map
1179  *	or a submap.  The memory is not zero-filled.
1180  */
1181 
1182 __exported kern_return_t
1183 kmem_alloc_external(
1184 	vm_map_t        map,
1185 	vm_offset_t     *addrp,
1186 	vm_size_t       size);
1187 kern_return_t
kmem_alloc_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1188 kmem_alloc_external(
1189 	vm_map_t        map,
1190 	vm_offset_t     *addrp,
1191 	vm_size_t       size)
1192 {
1193 	if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
1194 		return kmem_alloc(map, addrp, size, KMA_NONE, vm_tag_bt());
1195 	}
1196 	/* Maintain ABI compatibility: invalid sizes used to be allowed */
1197 	return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
1198 }
1199 
1200 
1201 /*
1202  *	kmem_alloc_kobject:
1203  *
1204  *	Allocate wired-down memory in the kernel's address map
1205  *	or a submap.  The memory is not zero-filled.
1206  *
1207  *	The memory is allocated in the kernel_object.
1208  *	It may not be copied with vm_map_copy, and
1209  *	it may not be reallocated with kmem_realloc.
1210  */
1211 
1212 __exported kern_return_t
1213 kmem_alloc_kobject_external(
1214 	vm_map_t        map,
1215 	vm_offset_t     *addrp,
1216 	vm_size_t       size);
1217 kern_return_t
kmem_alloc_kobject_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1218 kmem_alloc_kobject_external(
1219 	vm_map_t        map,
1220 	vm_offset_t     *addrp,
1221 	vm_size_t       size)
1222 {
1223 	if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
1224 		return kmem_alloc(map, addrp, size, KMA_KOBJECT, vm_tag_bt());
1225 	}
1226 	/* Maintain ABI compatibility: invalid sizes used to be allowed */
1227 	return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
1228 }
1229 
1230 /*
1231  *	kmem_alloc_pageable:
1232  *
1233  *	Allocate pageable memory in the kernel's address map.
1234  */
1235 
1236 __exported kern_return_t
1237 kmem_alloc_pageable_external(
1238 	vm_map_t        map,
1239 	vm_offset_t     *addrp,
1240 	vm_size_t       size);
1241 kern_return_t
kmem_alloc_pageable_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1242 kmem_alloc_pageable_external(
1243 	vm_map_t        map,
1244 	vm_offset_t     *addrp,
1245 	vm_size_t       size)
1246 {
1247 	if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
1248 		return kmem_alloc(map, addrp, size, KMA_PAGEABLE | KMA_DATA, vm_tag_bt());
1249 	}
1250 	/* Maintain ABI compatibility: invalid sizes used to be allowed */
1251 	return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
1252 }
1253 
1254 static __attribute__((always_inline, warn_unused_result))
1255 kern_return_t
mach_vm_allocate_kernel_sanitize(vm_map_t map,mach_vm_offset_ut addr_u,mach_vm_size_ut size_u,vm_map_kernel_flags_t vmk_flags,vm_map_offset_t * map_addr,vm_map_size_t * map_size)1256 mach_vm_allocate_kernel_sanitize(
1257 	vm_map_t                map,
1258 	mach_vm_offset_ut       addr_u,
1259 	mach_vm_size_ut         size_u,
1260 	vm_map_kernel_flags_t   vmk_flags,
1261 	vm_map_offset_t        *map_addr,
1262 	vm_map_size_t          *map_size)
1263 {
1264 	kern_return_t   result;
1265 	vm_map_offset_t map_end;
1266 
1267 	if (vmk_flags.vmf_fixed) {
1268 		result = vm_sanitize_addr_size(addr_u, size_u,
1269 		    VM_SANITIZE_CALLER_VM_ALLOCATE_FIXED,
1270 		    map,
1271 		    VM_SANITIZE_FLAGS_SIZE_ZERO_SUCCEEDS | VM_SANITIZE_FLAGS_REALIGN_START,
1272 		    map_addr, &map_end, map_size);
1273 		if (__improbable(result != KERN_SUCCESS)) {
1274 			return result;
1275 		}
1276 	} else {
1277 		*map_addr = 0;
1278 		result = vm_sanitize_size(0, size_u,
1279 		    VM_SANITIZE_CALLER_VM_ALLOCATE_ANYWHERE, map,
1280 		    VM_SANITIZE_FLAGS_SIZE_ZERO_SUCCEEDS,
1281 		    map_size);
1282 		if (__improbable(result != KERN_SUCCESS)) {
1283 			return result;
1284 		}
1285 	}
1286 
1287 	return KERN_SUCCESS;
1288 }
1289 
1290 kern_return_t
mach_vm_allocate_kernel(vm_map_t map,mach_vm_offset_ut * addr_u,mach_vm_size_ut size_u,vm_map_kernel_flags_t vmk_flags)1291 mach_vm_allocate_kernel(
1292 	vm_map_t                map,
1293 	mach_vm_offset_ut      *addr_u,
1294 	mach_vm_size_ut         size_u,
1295 	vm_map_kernel_flags_t   vmk_flags)
1296 {
1297 	vm_map_offset_t map_addr;
1298 	vm_map_size_t   map_size;
1299 	kern_return_t   result;
1300 
1301 	if (map == VM_MAP_NULL) {
1302 		ktriage_record(thread_tid(current_thread()),
1303 		    KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
1304 		    KDBG_TRIAGE_RESERVED,
1305 		    KDBG_TRIAGE_VM_ALLOCATE_KERNEL_BADMAP_ERROR),
1306 		    KERN_INVALID_ARGUMENT /* arg */);
1307 		return KERN_INVALID_ARGUMENT;
1308 	}
1309 
1310 	if (!vm_map_kernel_flags_check_vm_and_kflags(vmk_flags,
1311 	    VM_FLAGS_USER_ALLOCATE)) {
1312 		return KERN_INVALID_ARGUMENT;
1313 	}
1314 
1315 	result = mach_vm_allocate_kernel_sanitize(map,
1316 	    *addr_u,
1317 	    size_u,
1318 	    vmk_flags,
1319 	    &map_addr,
1320 	    &map_size);
1321 	if (__improbable(result != KERN_SUCCESS)) {
1322 		result = vm_sanitize_get_kr(result);
1323 		if (result == KERN_SUCCESS) {
1324 			*addr_u = vm_sanitize_wrap_addr(0);
1325 		} else {
1326 			ktriage_record(thread_tid(current_thread()),
1327 			    KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
1328 			    KDBG_TRIAGE_RESERVED,
1329 			    KDBG_TRIAGE_VM_ALLOCATE_KERNEL_BADSIZE_ERROR),
1330 			    KERN_INVALID_ARGUMENT /* arg */);
1331 		}
1332 		return result;
1333 	}
1334 
1335 	vm_map_kernel_flags_update_range_id(&vmk_flags, map, map_size);
1336 
1337 	result = vm_map_enter(
1338 		map,
1339 		&map_addr,
1340 		map_size,
1341 		(vm_map_offset_t)0,
1342 		vmk_flags,
1343 		VM_OBJECT_NULL,
1344 		(vm_object_offset_t)0,
1345 		FALSE,
1346 		VM_PROT_DEFAULT,
1347 		VM_PROT_ALL,
1348 		VM_INHERIT_DEFAULT);
1349 
1350 	if (result == KERN_SUCCESS) {
1351 #if KASAN
1352 		if (map->pmap == kernel_pmap) {
1353 			kasan_notify_address(map_addr, map_size);
1354 		}
1355 #endif
1356 		*addr_u = vm_sanitize_wrap_addr(map_addr);
1357 	} else {
1358 		ktriage_record(thread_tid(current_thread()),
1359 		    KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
1360 		    KDBG_TRIAGE_RESERVED,
1361 		    KDBG_TRIAGE_VM_ALLOCATE_KERNEL_VMMAPENTER_ERROR),
1362 		    result /* arg */);
1363 	}
1364 	return result;
1365 }
1366 
1367 #pragma mark population
1368 
1369 static void
kernel_memory_populate_pmap_enter(vm_object_t object,vm_address_t addr,vm_object_offset_t offset,vm_page_t mem,vm_prot_t prot,int pe_flags,pmap_mapping_type_t mapping_type)1370 kernel_memory_populate_pmap_enter(
1371 	vm_object_t             object,
1372 	vm_address_t            addr,
1373 	vm_object_offset_t      offset,
1374 	vm_page_t               mem,
1375 	vm_prot_t               prot,
1376 	int                     pe_flags,
1377 	pmap_mapping_type_t     mapping_type)
1378 {
1379 	kern_return_t   pe_result;
1380 	int             pe_options;
1381 
1382 	if (VMP_ERROR_GET(mem)) {
1383 		panic("VM page %p should not have an error", mem);
1384 	}
1385 
1386 	pe_options = PMAP_OPTIONS_NOWAIT;
1387 	if (object->internal) {
1388 		pe_options |= PMAP_OPTIONS_INTERNAL;
1389 	}
1390 	if (mem->vmp_reusable || object->all_reusable) {
1391 		pe_options |= PMAP_OPTIONS_REUSABLE;
1392 	}
1393 
1394 	pe_result = pmap_enter_options(kernel_pmap, addr + offset,
1395 	    VM_PAGE_GET_PHYS_PAGE(mem), prot, VM_PROT_NONE,
1396 	    pe_flags, /* wired */ TRUE, pe_options, NULL, mapping_type);
1397 
1398 	if (pe_result == KERN_RESOURCE_SHORTAGE) {
1399 		vm_object_unlock(object);
1400 
1401 		pe_options &= ~PMAP_OPTIONS_NOWAIT;
1402 
1403 		pe_result = pmap_enter_options(kernel_pmap, addr + offset,
1404 		    VM_PAGE_GET_PHYS_PAGE(mem), prot, VM_PROT_NONE,
1405 		    pe_flags, /* wired */ TRUE, pe_options, NULL, mapping_type);
1406 
1407 		vm_object_lock(object);
1408 	}
1409 
1410 	assert(pe_result == KERN_SUCCESS);
1411 }
1412 
1413 void
kernel_memory_populate_object_and_unlock(vm_object_t object,vm_address_t addr,vm_offset_t offset,vm_size_t size,vm_page_t page_list,kma_flags_t flags,vm_tag_t tag,vm_prot_t prot,pmap_mapping_type_t mapping_type)1414 kernel_memory_populate_object_and_unlock(
1415 	vm_object_t             object, /* must be locked */
1416 	vm_address_t            addr,
1417 	vm_offset_t             offset,
1418 	vm_size_t               size,
1419 	vm_page_t               page_list,
1420 	kma_flags_t             flags,
1421 	vm_tag_t                tag,
1422 	vm_prot_t               prot,
1423 	pmap_mapping_type_t     mapping_type)
1424 {
1425 	vm_page_t       mem;
1426 	int             pe_flags;
1427 	bool            gobbled_list = page_list && page_list->vmp_gobbled;
1428 
1429 	assert(((flags & KMA_KOBJECT) != 0) == (is_kernel_object(object) != 0));
1430 	assert3u((bool)(flags & KMA_COMPRESSOR), ==, object == compressor_object);
1431 
1432 
1433 	if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
1434 		assert3u(offset, ==, addr);
1435 	} else {
1436 		/*
1437 		 * kernel_memory_populate_pmap_enter() might drop the object
1438 		 * lock, and the caller might not own a reference anymore
1439 		 * and rely on holding the vm object lock for liveness.
1440 		 */
1441 		vm_object_reference_locked(object);
1442 	}
1443 
1444 	if (flags & KMA_KSTACK) {
1445 		pe_flags = VM_MEM_STACK;
1446 	} else {
1447 		pe_flags = 0;
1448 	}
1449 
1450 
1451 	for (vm_object_offset_t pg_offset = 0;
1452 	    pg_offset < size;
1453 	    pg_offset += PAGE_SIZE_64) {
1454 		if (page_list == NULL) {
1455 			panic("%s: page_list too short", __func__);
1456 		}
1457 
1458 		mem = page_list;
1459 		page_list = mem->vmp_snext;
1460 		mem->vmp_snext = NULL;
1461 
1462 		assert(mem->vmp_wire_count == 0);
1463 		assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
1464 		assert(vm_page_is_canonical(mem));
1465 
1466 		if (flags & KMA_COMPRESSOR) {
1467 			mem->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
1468 			/*
1469 			 * Background processes doing I/O accounting can call
1470 			 * into NVME driver to do some work which results in
1471 			 * an allocation here and so we want to make sure
1472 			 * that the pages used by compressor, regardless of
1473 			 * process context, are never on the special Q.
1474 			 */
1475 			mem->vmp_on_specialq = VM_PAGE_SPECIAL_Q_EMPTY;
1476 
1477 			vm_page_insert(mem, object, offset + pg_offset);
1478 		} else {
1479 			mem->vmp_q_state = VM_PAGE_IS_WIRED;
1480 			mem->vmp_wire_count = 1;
1481 
1482 
1483 			vm_page_insert_wired(mem, object, offset + pg_offset, tag);
1484 		}
1485 
1486 		mem->vmp_gobbled = false;
1487 		mem->vmp_busy = false;
1488 		mem->vmp_pmapped = true;
1489 		mem->vmp_wpmapped = true;
1490 
1491 		/*
1492 		 * Manual PMAP_ENTER_OPTIONS() with shortcuts
1493 		 * for the kernel and compressor objects.
1494 		 */
1495 		kernel_memory_populate_pmap_enter(object, addr, pg_offset,
1496 		    mem, prot, pe_flags, mapping_type);
1497 
1498 		if (flags & KMA_NOENCRYPT) {
1499 			pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
1500 		}
1501 	}
1502 
1503 	if (page_list) {
1504 		panic("%s: page_list too long", __func__);
1505 	}
1506 
1507 	vm_object_unlock(object);
1508 	if ((flags & (KMA_KOBJECT | KMA_COMPRESSOR)) == 0) {
1509 		vm_object_deallocate(object);
1510 	}
1511 
1512 	/*
1513 	 * Update the accounting:
1514 	 * - the compressor "wired" pages don't really count as wired
1515 	 * - kmem_alloc_contig_guard() gives gobbled pages,
1516 	 *   which already count as wired but need to be ungobbled.
1517 	 */
1518 	if (gobbled_list) {
1519 		vm_page_lockspin_queues();
1520 		if (flags & KMA_COMPRESSOR) {
1521 			vm_page_wire_count -= atop(size);
1522 		}
1523 		vm_page_gobble_count -= atop(size);
1524 		vm_page_unlock_queues();
1525 	} else if ((flags & KMA_COMPRESSOR) == 0) {
1526 		vm_page_lockspin_queues();
1527 		vm_page_wire_count += atop(size);
1528 		vm_page_unlock_queues();
1529 	}
1530 
1531 	if (flags & KMA_KOBJECT) {
1532 		/* vm_page_insert_wired() handles regular objects already */
1533 		vm_tag_update_size(tag, size, NULL);
1534 	}
1535 
1536 #if KASAN
1537 	if (flags & KMA_COMPRESSOR) {
1538 		kasan_notify_address_nopoison(addr, size);
1539 	} else {
1540 		kasan_notify_address(addr, size);
1541 	}
1542 #endif /* KASAN */
1543 }
1544 
1545 
1546 kern_return_t
kernel_memory_populate(vm_offset_t addr,vm_size_t size,kma_flags_t flags,vm_tag_t tag)1547 kernel_memory_populate(
1548 	vm_offset_t     addr,
1549 	vm_size_t       size,
1550 	kma_flags_t     flags,
1551 	vm_tag_t        tag)
1552 {
1553 	kern_return_t   kr = KERN_SUCCESS;
1554 	vm_page_t       page_list = NULL;
1555 	vm_size_t       page_count = atop_64(size);
1556 	vm_object_t     object = __kmem_object(ANYF(flags));
1557 
1558 #if DEBUG || DEVELOPMENT
1559 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_START,
1560 	    size, 0, 0, 0);
1561 #endif /* DEBUG || DEVELOPMENT */
1562 
1563 
1564 	kr = vm_page_alloc_list(page_count, flags, &page_list);
1565 	if (kr == KERN_SUCCESS) {
1566 		vm_object_lock(object);
1567 		kernel_memory_populate_object_and_unlock(object, addr,
1568 		    addr, size, page_list, flags, tag, VM_PROT_DEFAULT,
1569 		    __kmem_mapping_type(ANYF(flags)));
1570 	}
1571 
1572 #if DEBUG || DEVELOPMENT
1573 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
1574 	    page_count, 0, 0, 0);
1575 #endif /* DEBUG || DEVELOPMENT */
1576 	return kr;
1577 }
1578 
1579 void
kernel_memory_depopulate(vm_offset_t addr,vm_size_t size,kma_flags_t flags,vm_tag_t tag)1580 kernel_memory_depopulate(
1581 	vm_offset_t        addr,
1582 	vm_size_t          size,
1583 	kma_flags_t        flags,
1584 	vm_tag_t           tag)
1585 {
1586 	vm_object_t        object = __kmem_object(ANYF(flags));
1587 	vm_object_offset_t offset = addr;
1588 	vm_page_t          mem;
1589 	vm_page_t          local_freeq = NULL;
1590 	unsigned int       pages_unwired = 0;
1591 
1592 	vm_object_lock(object);
1593 
1594 	pmap_protect(kernel_pmap, offset, offset + size, VM_PROT_NONE);
1595 
1596 	for (vm_object_offset_t pg_offset = 0;
1597 	    pg_offset < size;
1598 	    pg_offset += PAGE_SIZE_64) {
1599 		mem = vm_page_lookup(object, offset + pg_offset);
1600 
1601 		assert(mem);
1602 
1603 		if (flags & KMA_COMPRESSOR) {
1604 			assert(mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR);
1605 		} else {
1606 			assert(mem->vmp_q_state == VM_PAGE_IS_WIRED);
1607 			pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
1608 			pages_unwired++;
1609 		}
1610 
1611 		mem->vmp_busy = TRUE;
1612 
1613 		assert(mem->vmp_tabled);
1614 		vm_page_remove(mem, TRUE);
1615 		assert(mem->vmp_busy);
1616 
1617 		assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
1618 
1619 		mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
1620 		mem->vmp_snext = local_freeq;
1621 		local_freeq = mem;
1622 	}
1623 
1624 	vm_object_unlock(object);
1625 
1626 	vm_page_free_list(local_freeq, TRUE);
1627 
1628 	if (!(flags & KMA_COMPRESSOR)) {
1629 		vm_page_lockspin_queues();
1630 		vm_page_wire_count -= pages_unwired;
1631 		vm_page_unlock_queues();
1632 	}
1633 
1634 	if (flags & KMA_KOBJECT) {
1635 		/* vm_page_remove() handles regular objects already */
1636 		vm_tag_update_size(tag, -ptoa_64(pages_unwired), NULL);
1637 	}
1638 }
1639 
1640 #pragma mark reallocation
1641 
1642 __abortlike
1643 static void
__kmem_realloc_invalid_object_size_panic(vm_map_t map,vm_address_t address,vm_size_t size,vm_map_entry_t entry)1644 __kmem_realloc_invalid_object_size_panic(
1645 	vm_map_t                map,
1646 	vm_address_t            address,
1647 	vm_size_t               size,
1648 	vm_map_entry_t          entry)
1649 {
1650 	vm_object_t object  = VME_OBJECT(entry);
1651 	vm_size_t   objsize = __kmem_entry_orig_size(entry);
1652 
1653 	panic("kmem_realloc(map=%p, addr=%p, size=%zd, entry=%p): "
1654 	    "object %p has unexpected size %ld",
1655 	    map, (void *)address, (size_t)size, entry, object, objsize);
1656 }
1657 
1658 __abortlike
1659 static void
__kmem_realloc_invalid_pager_panic(vm_map_t map,vm_address_t address,vm_size_t size,vm_map_entry_t entry)1660 __kmem_realloc_invalid_pager_panic(
1661 	vm_map_t                map,
1662 	vm_address_t            address,
1663 	vm_size_t               size,
1664 	vm_map_entry_t          entry)
1665 {
1666 	vm_object_t object     = VME_OBJECT(entry);
1667 	memory_object_t pager  = object->pager;
1668 	bool pager_created     = object->pager_created;
1669 	bool pager_initialized = object->pager_initialized;
1670 	bool pager_ready       = object->pager_ready;
1671 
1672 	panic("kmem_realloc(map=%p, addr=%p, size=%zd, entry=%p): "
1673 	    "object %p has unexpected pager %p (%d,%d,%d)",
1674 	    map, (void *)address, (size_t)size, entry, object,
1675 	    pager, pager_created, pager_initialized, pager_ready);
1676 }
1677 
1678 static kmem_return_t
kmem_realloc_shrink_guard(vm_map_t map,vm_offset_t req_oldaddr,vm_size_t req_oldsize,vm_size_t req_newsize,kmr_flags_t flags,kmem_guard_t guard,vm_map_entry_t entry)1679 kmem_realloc_shrink_guard(
1680 	vm_map_t                map,
1681 	vm_offset_t             req_oldaddr,
1682 	vm_size_t               req_oldsize,
1683 	vm_size_t               req_newsize,
1684 	kmr_flags_t             flags,
1685 	kmem_guard_t            guard,
1686 	vm_map_entry_t          entry)
1687 {
1688 	vmr_flags_t             vmr_flags = VM_MAP_REMOVE_KUNWIRE;
1689 	vm_object_t             object;
1690 	vm_offset_t             delta = 0;
1691 	kmem_return_t           kmr;
1692 	bool                    was_atomic;
1693 	vm_size_t               oldsize = round_page(req_oldsize);
1694 	vm_size_t               newsize = round_page(req_newsize);
1695 	vm_address_t            oldaddr = req_oldaddr;
1696 
1697 #if KASAN_CLASSIC
1698 	if (flags & KMR_KASAN_GUARD) {
1699 		assert((flags & (KMR_GUARD_FIRST | KMR_GUARD_LAST)) == 0);
1700 		flags   |= KMR_GUARD_FIRST | KMR_GUARD_LAST;
1701 		oldaddr -= PAGE_SIZE;
1702 		delta    = ptoa(2);
1703 		oldsize += delta;
1704 		newsize += delta;
1705 	}
1706 #endif /* KASAN_CLASSIC */
1707 
1708 	if (flags & KMR_TAG) {
1709 		oldaddr = vm_memtag_canonicalize_kernel(req_oldaddr);
1710 	}
1711 
1712 	vm_map_lock_assert_exclusive(map);
1713 
1714 	if ((flags & KMR_KOBJECT) == 0) {
1715 		object = VME_OBJECT(entry);
1716 		vm_object_reference(object);
1717 	}
1718 
1719 	/*
1720 	 *	Shrinking an atomic entry starts with splitting it,
1721 	 *	and removing the second half.
1722 	 */
1723 	was_atomic = entry->vme_atomic;
1724 	entry->vme_atomic = false;
1725 	vm_map_clip_end(map, entry, entry->vme_start + newsize);
1726 	entry->vme_atomic = was_atomic;
1727 
1728 #if KASAN
1729 	if (entry->vme_kernel_object && was_atomic) {
1730 		entry->vme_object_or_delta = (-req_newsize & PAGE_MASK) + delta;
1731 	}
1732 #if KASAN_CLASSIC
1733 	if (flags & KMR_KASAN_GUARD) {
1734 		kasan_poison_range(oldaddr + newsize, oldsize - newsize,
1735 		    ASAN_VALID);
1736 	}
1737 #endif
1738 #if KASAN_TBI
1739 	if (flags & KMR_TAG) {
1740 		kasan_tbi_mark_free_space((caddr_t)req_oldaddr + newsize, oldsize - newsize);
1741 	}
1742 #endif /* KASAN_TBI */
1743 #endif /* KASAN */
1744 	(void)vm_map_remove_and_unlock(map,
1745 	    oldaddr + newsize, oldaddr + oldsize,
1746 	    vmr_flags, KMEM_GUARD_NONE);
1747 
1748 
1749 	/*
1750 	 *	Lastly, if there are guard pages, deal with them.
1751 	 *
1752 	 *	The kernel object just needs to depopulate,
1753 	 *	regular objects require freeing the last page
1754 	 *	and replacing it with a guard.
1755 	 */
1756 	if (flags & KMR_KOBJECT) {
1757 		if (flags & KMR_GUARD_LAST) {
1758 			kma_flags_t dflags = KMA_KOBJECT;
1759 			kernel_memory_depopulate(oldaddr + newsize - PAGE_SIZE,
1760 			    PAGE_SIZE, dflags, guard.kmg_tag);
1761 		}
1762 	} else {
1763 		vm_page_t guard_right = VM_PAGE_NULL;
1764 		vm_offset_t remove_start = newsize;
1765 
1766 		if (flags & KMR_GUARD_LAST) {
1767 			if (!map->never_faults) {
1768 				guard_right = vm_page_create_guard(true);
1769 			}
1770 			remove_start -= PAGE_SIZE;
1771 		}
1772 
1773 		vm_object_lock(object);
1774 
1775 		if (object->vo_size != oldsize) {
1776 			__kmem_realloc_invalid_object_size_panic(map,
1777 			    req_oldaddr, req_oldsize + delta, entry);
1778 		}
1779 		vm_object_set_size(object, newsize, req_newsize);
1780 
1781 		vm_object_page_remove(object, remove_start, oldsize);
1782 
1783 		if (guard_right) {
1784 			vm_page_insert(guard_right, object, newsize - PAGE_SIZE);
1785 			guard_right->vmp_busy = false;
1786 		}
1787 		vm_object_unlock(object);
1788 		vm_object_deallocate(object);
1789 	}
1790 
1791 	kmr.kmr_address = req_oldaddr;
1792 	kmr.kmr_return  = 0;
1793 #if KASAN_CLASSIC
1794 	if (flags & KMA_KASAN_GUARD) {
1795 		kasan_alloc_large(kmr.kmr_address, req_newsize);
1796 	}
1797 #endif /* KASAN_CLASSIC */
1798 #if KASAN_TBI
1799 	if ((flags & KMR_TAG) && (flags & KMR_FREEOLD)) {
1800 		kmr.kmr_ptr = vm_memtag_generate_and_store_tag(kmr.kmr_ptr, req_newsize);
1801 		kasan_tbi_retag_unused_space(kmr.kmr_ptr, newsize, req_newsize);
1802 	}
1803 #endif /* KASAN_TBI */
1804 
1805 	return kmr;
1806 }
1807 
1808 kmem_return_t
kmem_realloc_guard(vm_map_t map,vm_offset_t req_oldaddr,vm_size_t req_oldsize,vm_size_t req_newsize,kmr_flags_t flags,kmem_guard_t guard)1809 kmem_realloc_guard(
1810 	vm_map_t                map,
1811 	vm_offset_t             req_oldaddr,
1812 	vm_size_t               req_oldsize,
1813 	vm_size_t               req_newsize,
1814 	kmr_flags_t             flags,
1815 	kmem_guard_t            guard)
1816 {
1817 	vm_object_t             object;
1818 	vm_size_t               oldsize;
1819 	vm_size_t               newsize;
1820 	vm_offset_t             delta = 0;
1821 	vm_map_offset_t         oldaddr;
1822 	vm_map_offset_t         newaddr;
1823 	vm_object_offset_t      newoffs;
1824 	vm_map_entry_t          oldentry;
1825 	vm_map_entry_t          newentry;
1826 	vm_page_t               page_list = NULL;
1827 	bool                    needs_wakeup = false;
1828 	kmem_return_t           kmr = { };
1829 	unsigned int            last_timestamp;
1830 	vm_map_kernel_flags_t   vmk_flags = {
1831 		.vmkf_last_free = (bool)(flags & KMR_LAST_FREE),
1832 	};
1833 
1834 	assert(KMEM_REALLOC_FLAGS_VALID(flags));
1835 
1836 	if (!guard.kmg_atomic) {
1837 		if (!(flags & (KMR_DATA | KMR_DATA_SHARED))) {
1838 			__kmem_invalid_arguments_panic("realloc", map, req_oldaddr,
1839 			    req_oldsize, flags);
1840 		}
1841 
1842 		if (flags & KMR_KOBJECT) {
1843 			__kmem_invalid_arguments_panic("realloc", map, req_oldaddr,
1844 			    req_oldsize, flags);
1845 		}
1846 	}
1847 
1848 	if (req_oldaddr == 0ul) {
1849 		return kmem_alloc_guard(map, req_newsize, 0, (kma_flags_t)flags, guard);
1850 	}
1851 
1852 	if (req_newsize == 0ul) {
1853 		kmem_free_guard(map, req_oldaddr, req_oldsize,
1854 		    (kmf_flags_t)flags, guard);
1855 		return kmr;
1856 	}
1857 
1858 	if (req_newsize >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) {
1859 		__kmem_invalid_size_panic(map, req_newsize, flags);
1860 	}
1861 	if (req_newsize < __kmem_guard_size(ANYF(flags))) {
1862 		__kmem_invalid_size_panic(map, req_newsize, flags);
1863 	}
1864 
1865 	oldsize = round_page(req_oldsize);
1866 	newsize = round_page(req_newsize);
1867 	oldaddr = req_oldaddr;
1868 #if KASAN_CLASSIC
1869 	if (flags & KMR_KASAN_GUARD) {
1870 		flags   |= KMR_GUARD_FIRST | KMR_GUARD_LAST;
1871 		oldaddr -= PAGE_SIZE;
1872 		delta    = ptoa(2);
1873 		oldsize += delta;
1874 		newsize += delta;
1875 	}
1876 #endif /* KASAN_CLASSIC */
1877 #if CONFIG_KERNEL_TAGGING
1878 	if (flags & KMR_TAG) {
1879 		vm_memtag_verify_tag(req_oldaddr + __kmem_guard_left(ANYF(flags)));
1880 		oldaddr = vm_memtag_canonicalize_kernel(req_oldaddr);
1881 	}
1882 #endif /* CONFIG_KERNEL_TAGGING */
1883 
1884 #if !KASAN
1885 	/*
1886 	 *	If not on a KASAN variant and no difference in requested size,
1887 	 *  just return.
1888 	 *
1889 	 *	Otherwise we want to validate the size and re-tag for KASAN_TBI.
1890 	 */
1891 	if (oldsize == newsize) {
1892 		kmr.kmr_address = req_oldaddr;
1893 		return kmr;
1894 	}
1895 #endif /* !KASAN */
1896 
1897 	/*
1898 	 *	If we're growing the allocation,
1899 	 *	then reserve the pages we'll need,
1900 	 *	and find a spot for its new place.
1901 	 */
1902 	if (oldsize < newsize) {
1903 #if DEBUG || DEVELOPMENT
1904 		VM_DEBUG_CONSTANT_EVENT(vm_kern_request,
1905 		    DBG_VM_KERN_REQUEST, DBG_FUNC_START,
1906 		    newsize - oldsize, 0, 0, 0);
1907 #endif /* DEBUG || DEVELOPMENT */
1908 		kmr.kmr_return = vm_page_alloc_list(atop(newsize - oldsize),
1909 		    (kma_flags_t)flags, &page_list);
1910 		if (kmr.kmr_return == KERN_SUCCESS) {
1911 			kmem_apply_security_policy(map, (kma_flags_t)flags, guard,
1912 			    newsize, 0, &vmk_flags, true);
1913 			kmr.kmr_return = vm_map_find_space(map, 0, newsize, 0,
1914 			    vmk_flags, &newentry);
1915 		}
1916 		if (__improbable(kmr.kmr_return != KERN_SUCCESS)) {
1917 			if (flags & KMR_REALLOCF) {
1918 				kmem_free_guard(map, req_oldaddr, req_oldsize,
1919 				    flags & (KMF_TAG | KMF_GUARD_FIRST |
1920 				    KMF_GUARD_LAST | KMF_KASAN_GUARD), guard);
1921 			}
1922 			if (page_list) {
1923 				vm_page_free_list(page_list, FALSE);
1924 			}
1925 #if DEBUG || DEVELOPMENT
1926 			VM_DEBUG_CONSTANT_EVENT(vm_kern_request,
1927 			    DBG_VM_KERN_REQUEST, DBG_FUNC_END,
1928 			    0, 0, 0, 0);
1929 #endif /* DEBUG || DEVELOPMENT */
1930 			return kmr;
1931 		}
1932 
1933 		/* map is locked */
1934 	} else {
1935 		vm_map_lock(map);
1936 	}
1937 
1938 
1939 	/*
1940 	 *	Locate the entry:
1941 	 *	- wait for it to quiesce.
1942 	 *	- validate its guard,
1943 	 *	- learn its correct tag,
1944 	 */
1945 again:
1946 	if (!vm_map_lookup_entry(map, oldaddr, &oldentry)) {
1947 		__kmem_entry_not_found_panic(map, req_oldaddr);
1948 	}
1949 	if ((flags & KMR_KOBJECT) && oldentry->in_transition) {
1950 		oldentry->needs_wakeup = true;
1951 		vm_map_entry_wait(map, THREAD_UNINT);
1952 		goto again;
1953 	}
1954 	kmem_entry_validate_guard(map, oldentry, oldaddr, oldsize, guard);
1955 	if (!__kmem_entry_validate_object(oldentry, ANYF(flags))) {
1956 		__kmem_entry_validate_object_panic(map, oldentry, ANYF(flags));
1957 	}
1958 	/*
1959 	 *	TODO: We should validate for non atomic entries that the range
1960 	 *	      we are acting on is what we expect here.
1961 	 */
1962 #if KASAN
1963 	if (__kmem_entry_orig_size(oldentry) != req_oldsize) {
1964 		__kmem_realloc_invalid_object_size_panic(map,
1965 		    req_oldaddr, req_oldsize + delta, oldentry);
1966 	}
1967 
1968 	if (oldsize == newsize) {
1969 		kmr.kmr_address = req_oldaddr;
1970 		if (oldentry->vme_kernel_object) {
1971 			oldentry->vme_object_or_delta = delta +
1972 			    (-req_newsize & PAGE_MASK);
1973 		} else {
1974 			object = VME_OBJECT(oldentry);
1975 			vm_object_lock(object);
1976 			vm_object_set_size(object, newsize, req_newsize);
1977 			vm_object_unlock(object);
1978 		}
1979 		vm_map_unlock(map);
1980 
1981 #if KASAN_CLASSIC
1982 		if (flags & KMA_KASAN_GUARD) {
1983 			kasan_alloc_large(kmr.kmr_address, req_newsize);
1984 		}
1985 #endif /* KASAN_CLASSIC */
1986 #if KASAN_TBI
1987 		if ((flags & KMR_TAG) && (flags & KMR_FREEOLD)) {
1988 			kmr.kmr_ptr = vm_memtag_generate_and_store_tag(kmr.kmr_ptr, req_newsize);
1989 			kasan_tbi_retag_unused_space(kmr.kmr_ptr, newsize, req_newsize);
1990 		}
1991 #endif /* KASAN_TBI */
1992 		return kmr;
1993 	}
1994 #endif /* KASAN */
1995 
1996 	guard.kmg_tag = VME_ALIAS(oldentry);
1997 
1998 	if (newsize < oldsize) {
1999 		return kmem_realloc_shrink_guard(map, req_oldaddr,
2000 		           req_oldsize, req_newsize, flags, guard, oldentry);
2001 	}
2002 
2003 
2004 	/*
2005 	 *	We are growing the entry
2006 	 *
2007 	 *	For regular objects we use the object `vo_size` updates
2008 	 *	as a guarantee that no 2 kmem_realloc() can happen
2009 	 *	concurrently (by doing it before the map is unlocked.
2010 	 *
2011 	 *	For the kernel object, prevent the entry from being
2012 	 *	reallocated or changed by marking it "in_transition".
2013 	 */
2014 
2015 	object = VME_OBJECT(oldentry);
2016 	vm_object_lock(object);
2017 	vm_object_reference_locked(object);
2018 
2019 	newaddr = newentry->vme_start;
2020 	newoffs = oldsize;
2021 
2022 	VME_OBJECT_SET(newentry, object, guard.kmg_atomic, guard.kmg_context);
2023 	VME_ALIAS_SET(newentry, guard.kmg_tag);
2024 	if (flags & KMR_KOBJECT) {
2025 		oldentry->in_transition = true;
2026 		VME_OFFSET_SET(newentry, newaddr);
2027 		newentry->wired_count = 1;
2028 		vme_btref_consider_and_set(newentry, __builtin_frame_address(0));
2029 		newoffs = newaddr + oldsize;
2030 #if KASAN
2031 		newentry->vme_object_or_delta = delta +
2032 		    (-req_newsize & PAGE_MASK);
2033 #endif /* KASAN */
2034 	} else {
2035 		if (object->pager_created || object->pager) {
2036 			/*
2037 			 * We can't "realloc/grow" the pager, so pageable
2038 			 * allocations should not go through this path.
2039 			 */
2040 			__kmem_realloc_invalid_pager_panic(map,
2041 			    req_oldaddr, req_oldsize + delta, oldentry);
2042 		}
2043 		if (object->vo_size != oldsize) {
2044 			__kmem_realloc_invalid_object_size_panic(map,
2045 			    req_oldaddr, req_oldsize + delta, oldentry);
2046 		}
2047 		vm_object_set_size(object, newsize, req_newsize);
2048 	}
2049 
2050 	last_timestamp = map->timestamp;
2051 	vm_map_unlock(map);
2052 
2053 
2054 	/*
2055 	 *	Now proceed with the population of pages.
2056 	 *
2057 	 *	Kernel objects can use the kmem population helpers.
2058 	 *
2059 	 *	Regular objects will insert pages manually,
2060 	 *	then wire the memory into the new range.
2061 	 */
2062 
2063 	vm_size_t guard_right_size = __kmem_guard_right(ANYF(flags));
2064 
2065 	if (flags & KMR_KOBJECT) {
2066 		pmap_mapping_type_t mapping_type = __kmem_mapping_type(ANYF(flags));
2067 
2068 		pmap_protect(kernel_pmap,
2069 		    oldaddr, oldaddr + oldsize - guard_right_size,
2070 		    VM_PROT_NONE);
2071 
2072 		for (vm_object_offset_t offset = 0;
2073 		    offset < oldsize - guard_right_size;
2074 		    offset += PAGE_SIZE_64) {
2075 			vm_page_t mem;
2076 
2077 			mem = vm_page_lookup(object, oldaddr + offset);
2078 			if (mem == VM_PAGE_NULL) {
2079 				continue;
2080 			}
2081 
2082 			pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
2083 
2084 			mem->vmp_busy = true;
2085 			vm_page_remove(mem, true);
2086 			vm_page_insert_wired(mem, object, newaddr + offset,
2087 			    guard.kmg_tag);
2088 			mem->vmp_busy = false;
2089 
2090 			kernel_memory_populate_pmap_enter(object, newaddr,
2091 			    offset, mem, VM_PROT_DEFAULT, 0, mapping_type);
2092 		}
2093 
2094 		kernel_memory_populate_object_and_unlock(object,
2095 		    newaddr + oldsize - guard_right_size,
2096 		    newoffs - guard_right_size,
2097 		    newsize - oldsize,
2098 		    page_list, (kma_flags_t)flags,
2099 		    guard.kmg_tag, VM_PROT_DEFAULT, mapping_type);
2100 	} else {
2101 		vm_page_t guard_right = VM_PAGE_NULL;
2102 
2103 		/*
2104 		 *	Note: we are borrowing the new entry reference
2105 		 *	on the object for the duration of this code,
2106 		 *	which works because we keep the object locked
2107 		 *	throughout.
2108 		 */
2109 		if ((flags & KMR_GUARD_LAST) && !map->never_faults) {
2110 			guard_right = vm_page_lookup(object, oldsize - PAGE_SIZE);
2111 			assert(vm_page_is_guard(guard_right));
2112 			guard_right->vmp_busy = true;
2113 			vm_page_remove(guard_right, true);
2114 		}
2115 
2116 		if (flags & KMR_FREEOLD) {
2117 			/*
2118 			 * Freeing the old mapping will make
2119 			 * the old pages become pageable until
2120 			 * the new mapping makes them wired again.
2121 			 * Let's take an extra "wire_count" to
2122 			 * prevent any accidental "page out".
2123 			 * We'll have to undo that after wiring
2124 			 * the new mapping.
2125 			 */
2126 			vm_object_reference_locked(object); /* keep object alive */
2127 			for (vm_object_offset_t offset = 0;
2128 			    offset < oldsize - guard_right_size;
2129 			    offset += PAGE_SIZE_64) {
2130 				vm_page_t mem;
2131 
2132 				mem = vm_page_lookup(object, offset);
2133 				assert(mem != VM_PAGE_NULL);
2134 				assertf(!VM_PAGE_PAGEABLE(mem),
2135 				    "mem %p qstate %d",
2136 				    mem, mem->vmp_q_state);
2137 				if (vm_page_is_guard(mem)) {
2138 					/* guard pages are not wired */
2139 				} else {
2140 					assertf(VM_PAGE_WIRED(mem),
2141 					    "mem %p qstate %d wirecount %d",
2142 					    mem,
2143 					    mem->vmp_q_state,
2144 					    mem->vmp_wire_count);
2145 					assertf(mem->vmp_wire_count >= 1,
2146 					    "mem %p wirecount %d",
2147 					    mem, mem->vmp_wire_count);
2148 					mem->vmp_wire_count++;
2149 				}
2150 			}
2151 		}
2152 
2153 		for (vm_object_offset_t offset = oldsize - guard_right_size;
2154 		    offset < newsize - guard_right_size;
2155 		    offset += PAGE_SIZE_64) {
2156 			vm_page_t mem = page_list;
2157 
2158 			page_list = mem->vmp_snext;
2159 			mem->vmp_snext = VM_PAGE_NULL;
2160 			assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
2161 			assert(!VM_PAGE_PAGEABLE(mem));
2162 
2163 			vm_page_insert(mem, object, offset);
2164 			mem->vmp_busy = false;
2165 		}
2166 
2167 		if (guard_right) {
2168 			vm_page_insert(guard_right, object, newsize - PAGE_SIZE);
2169 			guard_right->vmp_busy = false;
2170 		}
2171 
2172 		vm_object_unlock(object);
2173 	}
2174 
2175 	/*
2176 	 *	Mark the entry as idle again,
2177 	 *	and honor KMR_FREEOLD if needed.
2178 	 */
2179 
2180 	vm_map_lock(map);
2181 	if (last_timestamp + 1 != map->timestamp &&
2182 	    !vm_map_lookup_entry(map, oldaddr, &oldentry)) {
2183 		__kmem_entry_not_found_panic(map, req_oldaddr);
2184 	}
2185 
2186 	if (flags & KMR_KOBJECT) {
2187 		assert(oldentry->in_transition);
2188 		oldentry->in_transition = false;
2189 		if (oldentry->needs_wakeup) {
2190 			needs_wakeup = true;
2191 			oldentry->needs_wakeup = false;
2192 		}
2193 	}
2194 
2195 	if (flags & KMR_FREEOLD) {
2196 		vmr_flags_t vmr_flags = VM_MAP_REMOVE_KUNWIRE;
2197 
2198 #if KASAN_CLASSIC
2199 		if (flags & KMR_KASAN_GUARD) {
2200 			kasan_poison_range(oldaddr, oldsize, ASAN_VALID);
2201 		}
2202 #endif
2203 #if KASAN_TBI
2204 		if (flags & KMR_TAG) {
2205 			kasan_tbi_mark_free_space((caddr_t)req_oldaddr, oldsize);
2206 		}
2207 #endif /* KASAN_TBI */
2208 		if (flags & KMR_GUARD_LAST) {
2209 			vmr_flags |= VM_MAP_REMOVE_NOKUNWIRE_LAST;
2210 		}
2211 		(void)vm_map_remove_and_unlock(map,
2212 		    oldaddr, oldaddr + oldsize,
2213 		    vmr_flags, guard);
2214 	} else {
2215 		vm_map_unlock(map);
2216 	}
2217 
2218 	if ((flags & KMR_KOBJECT) == 0) {
2219 		kern_return_t kr;
2220 		/*
2221 		 * This must happen _after_ we do the KMR_FREEOLD,
2222 		 * because wiring the pages will call into the pmap,
2223 		 * and if the pages are typed XNU_KERNEL_RESTRICTED,
2224 		 * this would cause a second mapping of the page and panic.
2225 		 */
2226 		kr = vm_map_wire_kernel(map,
2227 		    vm_sanitize_wrap_addr(newaddr),
2228 		    vm_sanitize_wrap_addr(newaddr + newsize),
2229 		    vm_sanitize_wrap_prot(VM_PROT_DEFAULT),
2230 		    guard.kmg_tag, FALSE);
2231 		assert(kr == KERN_SUCCESS);
2232 
2233 		if (flags & KMR_FREEOLD) {
2234 			/*
2235 			 * Undo the extra "wiring" we made above
2236 			 * and release the extra reference we took
2237 			 * on the object.
2238 			 */
2239 			vm_object_lock(object);
2240 			for (vm_object_offset_t offset = 0;
2241 			    offset < oldsize - guard_right_size;
2242 			    offset += PAGE_SIZE_64) {
2243 				vm_page_t mem;
2244 
2245 				mem = vm_page_lookup(object, offset);
2246 				assert(mem != VM_PAGE_NULL);
2247 				assertf(!VM_PAGE_PAGEABLE(mem),
2248 				    "mem %p qstate %d",
2249 				    mem, mem->vmp_q_state);
2250 				if (vm_page_is_guard(mem)) {
2251 					/* guard pages are not wired */
2252 				} else {
2253 					assertf(VM_PAGE_WIRED(mem),
2254 					    "mem %p qstate %d wirecount %d",
2255 					    mem,
2256 					    mem->vmp_q_state,
2257 					    mem->vmp_wire_count);
2258 					assertf(mem->vmp_wire_count >= 2,
2259 					    "mem %p wirecount %d",
2260 					    mem, mem->vmp_wire_count);
2261 					mem->vmp_wire_count--;
2262 					assert(VM_PAGE_WIRED(mem));
2263 					assert(mem->vmp_wire_count >= 1);
2264 				}
2265 			}
2266 			vm_object_unlock(object);
2267 			vm_object_deallocate(object); /* release extra ref */
2268 		}
2269 	}
2270 
2271 	if (needs_wakeup) {
2272 		vm_map_entry_wakeup(map);
2273 	}
2274 
2275 #if DEBUG || DEVELOPMENT
2276 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
2277 	    atop(newsize - oldsize), 0, 0, 0);
2278 #endif /* DEBUG || DEVELOPMENT */
2279 	kmr.kmr_address = newaddr;
2280 
2281 #if KASAN
2282 	kasan_notify_address(kmr.kmr_address, newsize);
2283 #endif /* KASAN */
2284 #if KASAN_CLASSIC
2285 	if (flags & KMR_KASAN_GUARD) {
2286 		kmr.kmr_address += PAGE_SIZE;
2287 		kasan_alloc_large(kmr.kmr_address, req_newsize);
2288 	}
2289 #endif /* KASAN_CLASSIC */
2290 #if CONFIG_KERNEL_TAGGING
2291 	if (flags & KMR_TAG) {
2292 #if   KASAN_TBI
2293 		/*
2294 		 * Validate the current buffer, then generate a new tag,
2295 		 * even if the address is stable, it's a "new" allocation.
2296 		 */
2297 		__asan_loadN((vm_offset_t)kmr.kmr_address, oldsize);
2298 		kmr.kmr_ptr = vm_memtag_generate_and_store_tag(kmr.kmr_ptr, req_newsize);
2299 		kasan_tbi_retag_unused_space(kmr.kmr_ptr, newsize, req_newsize);
2300 #endif /* KASAN_TBI */
2301 	}
2302 #endif /* CONFIG_KERNEL_TAGGING */
2303 
2304 	return kmr;
2305 }
2306 
2307 #pragma mark map/remap/wire
2308 
2309 kern_return_t
mach_vm_map_kernel(vm_map_t target_map,mach_vm_offset_ut * address,mach_vm_size_ut initial_size,mach_vm_offset_ut mask,vm_map_kernel_flags_t vmk_flags,ipc_port_t port,memory_object_offset_ut offset,boolean_t copy,vm_prot_ut cur_protection,vm_prot_ut max_protection,vm_inherit_ut inheritance)2310 mach_vm_map_kernel(
2311 	vm_map_t                target_map,
2312 	mach_vm_offset_ut      *address,
2313 	mach_vm_size_ut         initial_size,
2314 	mach_vm_offset_ut       mask,
2315 	vm_map_kernel_flags_t   vmk_flags,
2316 	ipc_port_t              port,
2317 	memory_object_offset_ut offset,
2318 	boolean_t               copy,
2319 	vm_prot_ut              cur_protection,
2320 	vm_prot_ut              max_protection,
2321 	vm_inherit_ut           inheritance)
2322 {
2323 	/* range_id is set by vm_map_enter_mem_object */
2324 	return vm_map_enter_mem_object(target_map,
2325 	           address,
2326 	           initial_size,
2327 	           mask,
2328 	           vmk_flags,
2329 	           port,
2330 	           offset,
2331 	           copy,
2332 	           cur_protection,
2333 	           max_protection,
2334 	           inheritance,
2335 	           NULL,
2336 	           0);
2337 }
2338 
2339 kern_return_t
mach_vm_remap_new_kernel(vm_map_t target_map,mach_vm_offset_ut * address,mach_vm_size_ut size,mach_vm_offset_ut mask,vm_map_kernel_flags_t vmk_flags,vm_map_t src_map,mach_vm_offset_ut memory_address,boolean_t copy,vm_prot_ut * cur_protection,vm_prot_ut * max_protection,vm_inherit_ut inheritance)2340 mach_vm_remap_new_kernel(
2341 	vm_map_t                target_map,
2342 	mach_vm_offset_ut      *address,
2343 	mach_vm_size_ut         size,
2344 	mach_vm_offset_ut       mask,
2345 	vm_map_kernel_flags_t   vmk_flags,
2346 	vm_map_t                src_map,
2347 	mach_vm_offset_ut       memory_address,
2348 	boolean_t               copy,
2349 	vm_prot_ut             *cur_protection,   /* IN/OUT */
2350 	vm_prot_ut             *max_protection,   /* IN/OUT */
2351 	vm_inherit_ut           inheritance)
2352 {
2353 	if (!vm_map_kernel_flags_check_vm_and_kflags(vmk_flags,
2354 	    VM_FLAGS_USER_REMAP)) {
2355 		return KERN_INVALID_ARGUMENT;
2356 	}
2357 
2358 
2359 	vmk_flags.vmf_return_data_addr = true;
2360 
2361 	/* range_id is set by vm_map_remap */
2362 	return vm_map_remap(target_map,
2363 	           address,
2364 	           size,
2365 	           mask,
2366 	           vmk_flags,
2367 	           src_map,
2368 	           memory_address,
2369 	           copy,
2370 	           cur_protection,
2371 	           max_protection,
2372 	           inheritance);
2373 }
2374 
2375 #pragma mark free
2376 
2377 #if KASAN
2378 
2379 __abortlike
2380 static void
__kmem_free_invalid_object_size_panic(vm_map_t map,vm_address_t address,vm_size_t size,vm_map_entry_t entry)2381 __kmem_free_invalid_object_size_panic(
2382 	vm_map_t                map,
2383 	vm_address_t            address,
2384 	vm_size_t               size,
2385 	vm_map_entry_t          entry)
2386 {
2387 	vm_object_t object  = VME_OBJECT(entry);
2388 	vm_size_t   objsize = __kmem_entry_orig_size(entry);
2389 
2390 	panic("kmem_free(map=%p, addr=%p, size=%zd, entry=%p): "
2391 	    "object %p has unexpected size %ld",
2392 	    map, (void *)address, (size_t)size, entry, object, objsize);
2393 }
2394 
2395 #endif /* KASAN */
2396 
2397 vm_size_t
kmem_free_guard(vm_map_t map,vm_offset_t req_addr,vm_size_t req_size,kmf_flags_t flags,kmem_guard_t guard)2398 kmem_free_guard(
2399 	vm_map_t        map,
2400 	vm_offset_t     req_addr,
2401 	vm_size_t       req_size,
2402 	kmf_flags_t     flags,
2403 	kmem_guard_t    guard)
2404 {
2405 	vmr_flags_t     vmr_flags = VM_MAP_REMOVE_KUNWIRE;
2406 	vm_address_t    addr      = req_addr;
2407 	vm_offset_t     delta     = 0;
2408 	vm_size_t       size;
2409 #if KASAN
2410 	vm_map_entry_t  entry;
2411 #endif /* KASAN */
2412 
2413 	assert(map->pmap == kernel_pmap);
2414 
2415 #if KASAN_CLASSIC
2416 	if (flags & KMF_KASAN_GUARD) {
2417 		addr  -= PAGE_SIZE;
2418 		delta  = ptoa(2);
2419 	}
2420 #endif /* KASAN_CLASSIC */
2421 #if CONFIG_KERNEL_TAGGING
2422 	if (flags & KMF_TAG) {
2423 		vm_memtag_verify_tag(req_addr + __kmem_guard_left(ANYF(flags)));
2424 		addr = vm_memtag_canonicalize_kernel(req_addr);
2425 	}
2426 #endif /* CONFIG_KERNEL_TAGGING */
2427 
2428 	if (flags & KMF_GUESS_SIZE) {
2429 		vmr_flags |= VM_MAP_REMOVE_GUESS_SIZE;
2430 		size = PAGE_SIZE;
2431 	} else if (req_size == 0) {
2432 		__kmem_invalid_size_panic(map, req_size, flags);
2433 	} else {
2434 		size = round_page(req_size) + delta;
2435 	}
2436 
2437 	vm_map_lock(map);
2438 
2439 #if KASAN
2440 	if (!vm_map_lookup_entry(map, addr, &entry)) {
2441 		__kmem_entry_not_found_panic(map, req_addr);
2442 	}
2443 	if (flags & KMF_GUESS_SIZE) {
2444 		vmr_flags &= ~VM_MAP_REMOVE_GUESS_SIZE;
2445 		req_size = __kmem_entry_orig_size(entry);
2446 		size = round_page(req_size + delta);
2447 	} else if (guard.kmg_atomic && entry->vme_kernel_object &&
2448 	    __kmem_entry_orig_size(entry) != req_size) {
2449 		/*
2450 		 * We can't make a strict check for regular
2451 		 * VM objects because it could be:
2452 		 *
2453 		 * - the kmem_guard_free() of a kmem_realloc_guard() without
2454 		 *   KMR_FREEOLD, and in that case the object size won't match.
2455 		 *
2456 		 * - a submap, in which case there is no "orig size".
2457 		 */
2458 		__kmem_free_invalid_object_size_panic(map,
2459 		    req_addr, req_size + delta, entry);
2460 	}
2461 #endif /* KASAN */
2462 #if KASAN_CLASSIC
2463 	if (flags & KMR_KASAN_GUARD) {
2464 		kasan_poison_range(addr, size, ASAN_VALID);
2465 	}
2466 #endif
2467 #if KASAN_TBI
2468 	if (flags & KMF_TAG) {
2469 		kasan_tbi_mark_free_space((caddr_t)req_addr, size);
2470 	}
2471 #endif /* KASAN_TBI */
2472 
2473 	/*
2474 	 * vm_map_remove_and_unlock is called with VM_MAP_REMOVE_KUNWIRE, which
2475 	 * unwires the kernel mapping. The page won't be mapped any longer so
2476 	 * there is no extra step that is required for memory tagging to "clear"
2477 	 * it -- the page will be later laundered when reused.
2478 	 */
2479 	return vm_map_remove_and_unlock(map, addr, addr + size,
2480 	           vmr_flags, guard).kmr_size - delta;
2481 }
2482 
2483 __exported void
2484 kmem_free_external(
2485 	vm_map_t        map,
2486 	vm_offset_t     addr,
2487 	vm_size_t       size);
2488 void
kmem_free_external(vm_map_t map,vm_offset_t addr,vm_size_t size)2489 kmem_free_external(
2490 	vm_map_t        map,
2491 	vm_offset_t     addr,
2492 	vm_size_t       size)
2493 {
2494 	if (size) {
2495 		kmem_free(map, trunc_page(addr), size);
2496 #if MACH_ASSERT
2497 	} else {
2498 		printf("kmem_free(map=%p, addr=%p) called with size=0, lr: %p\n",
2499 		    map, (void *)addr, __builtin_return_address(0));
2500 #endif
2501 	}
2502 }
2503 
2504 #pragma mark kmem metadata
2505 
2506 /*
2507  * Guard objects for kmem pointer allocation:
2508  *
2509  * Guard objects introduce size slabs to kmem pointer allocations that are
2510  * allocated in chunks of n * sizeclass. When an allocation of a specific
2511  * sizeclass is requested a random slot from [0, n) is returned.
2512  * Allocations are returned from that chunk until m slots are left. The
2513  * remaining m slots are referred to as guard objects. They don't get
2514  * allocated and the chunk is now considered full. When an allocation is
2515  * freed to the chunk 1 slot is now available from m + 1 for the next
2516  * allocation of that sizeclass.
2517  *
2518  * Guard objects are intended to make exploitation of use after frees harder
2519  * as allocations that are freed can no longer be reliable reallocated.
2520  * They also make exploitation of OOBs harder as overflowing out of an
2521  * allocation can no longer be safe even with sufficient spraying.
2522  */
2523 
2524 #define KMEM_META_PRIMARY    UINT8_MAX
2525 #define KMEM_META_START     (UINT8_MAX - 1)
2526 #define KMEM_META_FREE      (UINT8_MAX - 2)
2527 #if __ARM_16K_PG__
2528 #define KMEM_MIN_SIZE        PAGE_SIZE
2529 #define KMEM_CHUNK_SIZE_MIN (KMEM_MIN_SIZE * 16)
2530 #else /* __ARM_16K_PG__ */
2531 /*
2532  * PAGE_SIZE isn't a compile time constant on some arm64 devices. Those
2533  * devices use 4k page size when their RAM is <= 1GB and 16k otherwise.
2534  * Therefore populate sizeclasses from 4k for those devices.
2535  */
2536 #define KMEM_MIN_SIZE       (4 * 1024)
2537 #define KMEM_CHUNK_SIZE_MIN (KMEM_MIN_SIZE * 32)
2538 #endif /* __ARM_16K_PG__ */
2539 #define KMEM_MAX_SIZE       (32ULL << 20)
2540 #define KMEM_START_IDX      (kmem_log2down(KMEM_MIN_SIZE))
2541 #define KMEM_LAST_IDX       (kmem_log2down(KMEM_MAX_SIZE))
2542 #define KMEM_NUM_SIZECLASS  (KMEM_LAST_IDX - KMEM_START_IDX + 1)
2543 #define KMEM_FRONTS         (KMEM_RANGE_ID_NUM_PTR * 2)
2544 #define KMEM_NUM_GUARDS      2
2545 
2546 struct kmem_page_meta {
2547 	union {
2548 		/*
2549 		 * On primary allocated chunk with KMEM_META_PRIMARY marker
2550 		 */
2551 		uint32_t km_bitmap;
2552 		/*
2553 		 * On start and end of free chunk with KMEM_META_FREE marker
2554 		 */
2555 		uint32_t km_free_chunks;
2556 	};
2557 	/*
2558 	 * KMEM_META_PRIMARY: Start meta of allocated chunk
2559 	 * KMEM_META_FREE   : Start and end meta of free chunk
2560 	 * KMEM_META_START  : Meta region start and end
2561 	 */
2562 	uint8_t  km_page_marker;
2563 	uint8_t  km_sizeclass;
2564 	union {
2565 		/*
2566 		 * On primary allocated chunk with KMEM_META_PRIMARY marker
2567 		 */
2568 		uint16_t km_chunk_len;
2569 		/*
2570 		 * On secondary allocated chunks
2571 		 */
2572 		uint16_t km_page_idx;
2573 	};
2574 	LIST_ENTRY(kmem_page_meta) km_link;
2575 } kmem_page_meta_t;
2576 
2577 typedef LIST_HEAD(kmem_list_head, kmem_page_meta) kmem_list_head_t;
2578 struct kmem_sizeclass {
2579 	vm_map_size_t                   ks_size;
2580 	uint32_t                        ks_num_chunk;
2581 	uint32_t                        ks_num_elem;
2582 	crypto_random_ctx_t __zpercpu   ks_rng_ctx;
2583 	kmem_list_head_t                ks_allfree_head[KMEM_FRONTS];
2584 	kmem_list_head_t                ks_partial_head[KMEM_FRONTS];
2585 	kmem_list_head_t                ks_full_head[KMEM_FRONTS];
2586 };
2587 
2588 static struct kmem_sizeclass kmem_size_array[KMEM_NUM_SIZECLASS];
2589 
2590 /*
2591  * Locks to synchronize metadata population
2592  */
2593 static LCK_GRP_DECLARE(kmem_locks_grp, "kmem_locks");
2594 static LCK_MTX_DECLARE(kmem_meta_region_lck, &kmem_locks_grp);
2595 #define kmem_meta_lock()   lck_mtx_lock(&kmem_meta_region_lck)
2596 #define kmem_meta_unlock() lck_mtx_unlock(&kmem_meta_region_lck)
2597 
2598 static SECURITY_READ_ONLY_LATE(struct mach_vm_range)
2599 kmem_meta_range[KMEM_RANGE_ID_NUM_PTR + 1];
2600 static SECURITY_READ_ONLY_LATE(struct kmem_page_meta *)
2601 kmem_meta_base[KMEM_RANGE_ID_NUM_PTR + 1];
2602 /*
2603  * Keeps track of metadata high water mark for each front
2604  */
2605 static struct kmem_page_meta *kmem_meta_hwm[KMEM_FRONTS];
2606 static SECURITY_READ_ONLY_LATE(vm_map_t)
2607 kmem_meta_map[KMEM_RANGE_ID_NUM_PTR + 1];
2608 static vm_map_size_t kmem_meta_size;
2609 
2610 static uint32_t
kmem_get_front(kmem_range_id_t range_id,bool from_right)2611 kmem_get_front(
2612 	kmem_range_id_t         range_id,
2613 	bool                    from_right)
2614 {
2615 	assert((range_id >= KMEM_RANGE_ID_FIRST) &&
2616 	    (range_id <= KMEM_RANGE_ID_NUM_PTR));
2617 	return (range_id - KMEM_RANGE_ID_FIRST) * 2 + from_right;
2618 }
2619 
2620 static inline uint32_t
kmem_slot_idx_to_bit(uint32_t slot_idx,uint32_t size_idx __unused)2621 kmem_slot_idx_to_bit(
2622 	uint32_t                slot_idx,
2623 	uint32_t                size_idx __unused)
2624 {
2625 	assert(slot_idx < kmem_size_array[size_idx].ks_num_elem);
2626 	return 1ull << slot_idx;
2627 }
2628 
2629 static uint32_t
kmem_get_idx_from_size(vm_map_size_t size)2630 kmem_get_idx_from_size(vm_map_size_t size)
2631 {
2632 	assert(size >= KMEM_MIN_SIZE && size <= KMEM_MAX_SIZE);
2633 	return kmem_log2down(size - 1) - KMEM_START_IDX + 1;
2634 }
2635 
2636 __abortlike
2637 static void
kmem_invalid_size_idx(uint32_t idx)2638 kmem_invalid_size_idx(uint32_t idx)
2639 {
2640 	panic("Invalid sizeclass idx %u", idx);
2641 }
2642 
2643 static vm_map_size_t
kmem_get_size_from_idx(uint32_t idx)2644 kmem_get_size_from_idx(uint32_t idx)
2645 {
2646 	if (__improbable(idx >= KMEM_NUM_SIZECLASS)) {
2647 		kmem_invalid_size_idx(idx);
2648 	}
2649 	return 1ul << (idx + KMEM_START_IDX);
2650 }
2651 
2652 static inline uint16_t
kmem_get_page_idx(struct kmem_page_meta * meta)2653 kmem_get_page_idx(struct kmem_page_meta *meta)
2654 {
2655 	uint8_t page_marker = meta->km_page_marker;
2656 
2657 	return (page_marker == KMEM_META_PRIMARY) ? 0 : meta->km_page_idx;
2658 }
2659 
2660 __abortlike
2661 static void
kmem_invalid_chunk_len(struct kmem_page_meta * meta)2662 kmem_invalid_chunk_len(struct kmem_page_meta *meta)
2663 {
2664 	panic("Reading free chunks for meta %p where marker != KMEM_META_PRIMARY",
2665 	    meta);
2666 }
2667 
2668 static inline uint16_t
kmem_get_chunk_len(struct kmem_page_meta * meta)2669 kmem_get_chunk_len(struct kmem_page_meta *meta)
2670 {
2671 	if (__improbable(meta->km_page_marker != KMEM_META_PRIMARY)) {
2672 		kmem_invalid_chunk_len(meta);
2673 	}
2674 
2675 	return meta->km_chunk_len;
2676 }
2677 
2678 __abortlike
2679 static void
kmem_invalid_free_chunk_len(struct kmem_page_meta * meta)2680 kmem_invalid_free_chunk_len(struct kmem_page_meta *meta)
2681 {
2682 	panic("Reading free chunks for meta %p where marker != KMEM_META_FREE",
2683 	    meta);
2684 }
2685 
2686 static inline uint32_t
kmem_get_free_chunk_len(struct kmem_page_meta * meta)2687 kmem_get_free_chunk_len(struct kmem_page_meta *meta)
2688 {
2689 	if (__improbable(meta->km_page_marker != KMEM_META_FREE)) {
2690 		kmem_invalid_free_chunk_len(meta);
2691 	}
2692 
2693 	return meta->km_free_chunks;
2694 }
2695 
2696 /*
2697  * Return the metadata corresponding to the specified address
2698  */
2699 static struct kmem_page_meta *
kmem_addr_to_meta(vm_map_offset_t addr,vm_map_range_id_t range_id,vm_map_offset_t * range_start,uint64_t * meta_idx)2700 kmem_addr_to_meta(
2701 	vm_map_offset_t         addr,
2702 	vm_map_range_id_t       range_id,
2703 	vm_map_offset_t        *range_start,
2704 	uint64_t               *meta_idx)
2705 {
2706 	struct kmem_page_meta *meta_base = kmem_meta_base[range_id];
2707 
2708 	*range_start = kmem_ranges[range_id].min_address;
2709 	*meta_idx = (addr - *range_start) / KMEM_CHUNK_SIZE_MIN;
2710 	return VM_FAR_ADD_PTR_UNBOUNDED(meta_base, *meta_idx);
2711 }
2712 
2713 /*
2714  * Return the metadata start of the chunk that the address belongs to
2715  */
2716 static struct kmem_page_meta *
kmem_addr_to_meta_start(vm_address_t addr,vm_map_range_id_t range_id,vm_map_offset_t * chunk_start)2717 kmem_addr_to_meta_start(
2718 	vm_address_t            addr,
2719 	vm_map_range_id_t       range_id,
2720 	vm_map_offset_t        *chunk_start)
2721 {
2722 	vm_map_offset_t range_start;
2723 	uint64_t meta_idx;
2724 	struct kmem_page_meta *meta;
2725 
2726 	meta = kmem_addr_to_meta(addr, range_id, &range_start, &meta_idx);
2727 	meta_idx -= kmem_get_page_idx(meta);
2728 	meta = VM_FAR_ADD_PTR_UNBOUNDED(meta, -(ptrdiff_t)kmem_get_page_idx(meta));
2729 	assert(meta->km_page_marker == KMEM_META_PRIMARY);
2730 	*chunk_start = range_start + (meta_idx * KMEM_CHUNK_SIZE_MIN);
2731 	return meta;
2732 }
2733 
2734 __startup_func
2735 static void
kmem_init_meta_front(struct kmem_page_meta * meta,kmem_range_id_t range_id,bool from_right)2736 kmem_init_meta_front(
2737 	struct kmem_page_meta  *meta,
2738 	kmem_range_id_t         range_id,
2739 	bool                    from_right)
2740 {
2741 	kernel_memory_populate(trunc_page((vm_map_offset_t) meta), PAGE_SIZE,
2742 	    KMA_KOBJECT | KMA_ZERO | KMA_NOFAIL, VM_KERN_MEMORY_OSFMK);
2743 	meta->km_page_marker = KMEM_META_START;
2744 	if (!from_right) {
2745 		meta++;
2746 		kmem_meta_base[range_id] = meta;
2747 	}
2748 	kmem_meta_hwm[kmem_get_front(range_id, from_right)] = meta;
2749 }
2750 
2751 __startup_func
2752 static void
kmem_metadata_init(void)2753 kmem_metadata_init(void)
2754 {
2755 	for (kmem_range_id_t i = KMEM_RANGE_ID_FIRST; i <= kmem_ptr_ranges; i++) {
2756 		vm_map_offset_t addr = kmem_meta_range[i].min_address;
2757 		struct kmem_page_meta *meta;
2758 		uint64_t meta_idx;
2759 
2760 		vm_map_will_allocate_early_map(&kmem_meta_map[i]);
2761 		kmem_meta_map[i] = kmem_suballoc(kernel_map, &addr, kmem_meta_size,
2762 		    VM_MAP_CREATE_NEVER_FAULTS | VM_MAP_CREATE_DISABLE_HOLELIST,
2763 		    VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
2764 		    KMS_PERMANENT | KMS_NOFAIL | KMS_NOSOFTLIMIT,
2765 		    VM_KERN_MEMORY_OSFMK).kmr_submap;
2766 
2767 		kmem_meta_range[i].min_address = addr;
2768 		kmem_meta_range[i].max_address = addr + kmem_meta_size;
2769 
2770 		meta = (struct kmem_page_meta *) kmem_meta_range[i].min_address;
2771 		kmem_init_meta_front(meta, i, 0);
2772 
2773 		meta = kmem_addr_to_meta(kmem_ranges[i].max_address, i, &addr,
2774 		    &meta_idx);
2775 		kmem_init_meta_front(meta, i, 1);
2776 	}
2777 }
2778 
2779 __startup_func
2780 static void
kmem_init_front_head(struct kmem_sizeclass * ks,uint32_t front)2781 kmem_init_front_head(
2782 	struct kmem_sizeclass  *ks,
2783 	uint32_t                front)
2784 {
2785 	LIST_INIT(&ks->ks_allfree_head[front]);
2786 	LIST_INIT(&ks->ks_partial_head[front]);
2787 	LIST_INIT(&ks->ks_full_head[front]);
2788 }
2789 
2790 __startup_func
2791 static void
kmem_sizeclass_init(void)2792 kmem_sizeclass_init(void)
2793 {
2794 	for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) {
2795 		struct kmem_sizeclass *ks = &kmem_size_array[i];
2796 		kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST;
2797 
2798 		ks->ks_size = kmem_get_size_from_idx(i);
2799 		ks->ks_num_chunk = roundup(8 * ks->ks_size, KMEM_CHUNK_SIZE_MIN) /
2800 		    KMEM_CHUNK_SIZE_MIN;
2801 		ks->ks_num_elem = (ks->ks_num_chunk * KMEM_CHUNK_SIZE_MIN) / ks->ks_size;
2802 		assert(ks->ks_num_elem <=
2803 		    (sizeof(((struct kmem_page_meta *)0)->km_bitmap) * 8));
2804 		for (; range_id <= KMEM_RANGE_ID_NUM_PTR; range_id++) {
2805 			kmem_init_front_head(ks, kmem_get_front(range_id, 0));
2806 			kmem_init_front_head(ks, kmem_get_front(range_id, 1));
2807 		}
2808 	}
2809 }
2810 
2811 /*
2812  * This is done during EARLY_BOOT as it needs the corecrypto module to be
2813  * set up.
2814  */
2815 __startup_func
2816 static void
kmem_crypto_init(void)2817 kmem_crypto_init(void)
2818 {
2819 	vm_size_t ctx_size = crypto_random_kmem_ctx_size();
2820 
2821 	for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) {
2822 		struct kmem_sizeclass *ks = &kmem_size_array[i];
2823 
2824 		ks->ks_rng_ctx = zalloc_percpu_permanent(ctx_size, ZALIGN_PTR);
2825 		zpercpu_foreach(ctx, ks->ks_rng_ctx) {
2826 			crypto_random_kmem_init(ctx);
2827 		}
2828 	}
2829 }
2830 STARTUP(EARLY_BOOT, STARTUP_RANK_MIDDLE, kmem_crypto_init);
2831 
2832 __abortlike
2833 static void
kmem_validate_slot_panic(vm_map_offset_t addr,struct kmem_page_meta * meta,uint32_t slot_idx,uint32_t size_idx)2834 kmem_validate_slot_panic(
2835 	vm_map_offset_t         addr,
2836 	struct kmem_page_meta  *meta,
2837 	uint32_t                slot_idx,
2838 	uint32_t                size_idx)
2839 {
2840 	if (meta->km_page_marker != KMEM_META_PRIMARY) {
2841 		panic("Metadata (%p) for addr (%p) not primary", meta, (void *)addr);
2842 	}
2843 	if (meta->km_sizeclass != size_idx) {
2844 		panic("Metadata's (%p) sizeclass (%u != %u) changed during deletion",
2845 		    meta, meta->km_sizeclass, size_idx);
2846 	}
2847 	panic("Double free detected: Slot (%u) in meta (%p) for addr %p marked free",
2848 	    slot_idx, meta, (void *)addr);
2849 }
2850 
2851 __abortlike
2852 static void
kmem_invalid_slot_for_addr(mach_vm_range_t slot,vm_map_offset_t start,vm_map_offset_t end)2853 kmem_invalid_slot_for_addr(
2854 	mach_vm_range_t         slot,
2855 	vm_map_offset_t         start,
2856 	vm_map_offset_t         end)
2857 {
2858 	panic("Invalid kmem ptr slot [%p:%p] for allocation [%p:%p]",
2859 	    (void *)slot->min_address, (void *)slot->max_address,
2860 	    (void *)start, (void *)end);
2861 }
2862 
2863 void
kmem_validate_slot(vm_map_offset_t addr,struct kmem_page_meta * meta,uint32_t size_idx,uint32_t slot_idx)2864 kmem_validate_slot(
2865 	vm_map_offset_t         addr,
2866 	struct kmem_page_meta  *meta,
2867 	uint32_t                size_idx,
2868 	uint32_t                slot_idx)
2869 {
2870 	if ((meta->km_page_marker != KMEM_META_PRIMARY) ||
2871 	    (meta->km_sizeclass != size_idx) ||
2872 	    ((meta->km_bitmap & kmem_slot_idx_to_bit(slot_idx, size_idx)) != 0)) {
2873 		kmem_validate_slot_panic(addr, meta, size_idx, slot_idx);
2874 	}
2875 }
2876 
2877 static void
kmem_validate_slot_initial(mach_vm_range_t slot,vm_map_offset_t start,vm_map_offset_t end,struct kmem_page_meta * meta,uint32_t size_idx,uint32_t slot_idx)2878 kmem_validate_slot_initial(
2879 	mach_vm_range_t         slot,
2880 	vm_map_offset_t         start,
2881 	vm_map_offset_t         end,
2882 	struct kmem_page_meta  *meta,
2883 	uint32_t                size_idx,
2884 	uint32_t                slot_idx)
2885 {
2886 	if ((slot->min_address == 0) || (slot->max_address == 0) ||
2887 	    (start < slot->min_address) || (start >= slot->max_address) ||
2888 	    (end > slot->max_address)) {
2889 		kmem_invalid_slot_for_addr(slot, start, end);
2890 	}
2891 
2892 	kmem_validate_slot(start, meta, size_idx, slot_idx);
2893 }
2894 
2895 uint32_t
kmem_addr_get_slot_idx(vm_map_offset_t start,vm_map_offset_t end,vm_map_range_id_t range_id,struct kmem_page_meta ** meta,uint32_t * size_idx,mach_vm_range_t slot)2896 kmem_addr_get_slot_idx(
2897 	vm_map_offset_t         start,
2898 	vm_map_offset_t         end,
2899 	vm_map_range_id_t       range_id,
2900 	struct kmem_page_meta **meta,
2901 	uint32_t               *size_idx,
2902 	mach_vm_range_t         slot)
2903 {
2904 	vm_map_offset_t chunk_start;
2905 	vm_map_size_t slot_size;
2906 	uint32_t slot_idx;
2907 
2908 	*meta = kmem_addr_to_meta_start(start, range_id, &chunk_start);
2909 	*size_idx = (*meta)->km_sizeclass;
2910 	slot_size = kmem_get_size_from_idx(*size_idx);
2911 	slot_idx = (start - chunk_start) / slot_size;
2912 	slot->min_address = chunk_start + slot_idx * slot_size;
2913 	slot->max_address = slot->min_address + slot_size;
2914 
2915 	kmem_validate_slot_initial(slot, start, end, *meta, *size_idx, slot_idx);
2916 
2917 	return slot_idx;
2918 }
2919 
2920 static bool
kmem_populate_needed(vm_offset_t from,vm_offset_t to)2921 kmem_populate_needed(vm_offset_t from, vm_offset_t to)
2922 {
2923 #if KASAN
2924 #pragma unused(from, to)
2925 	return true;
2926 #else
2927 	vm_offset_t page_addr = trunc_page(from);
2928 
2929 	for (; page_addr < to; page_addr += PAGE_SIZE) {
2930 		/*
2931 		 * This can race with another thread doing a populate on the same metadata
2932 		 * page, where we see an updated pmap but unmapped KASan shadow, causing a
2933 		 * fault in the shadow when we first access the metadata page. Avoid this
2934 		 * by always synchronizing on the kmem_meta_lock with KASan.
2935 		 */
2936 		if (!pmap_find_phys(kernel_pmap, page_addr)) {
2937 			return true;
2938 		}
2939 	}
2940 
2941 	return false;
2942 #endif /* !KASAN */
2943 }
2944 
2945 static void
kmem_populate_meta_locked(vm_offset_t from,vm_offset_t to)2946 kmem_populate_meta_locked(vm_offset_t from, vm_offset_t to)
2947 {
2948 	vm_offset_t page_addr = trunc_page(from);
2949 
2950 	vm_map_unlock(kernel_map);
2951 
2952 	for (; page_addr < to; page_addr += PAGE_SIZE) {
2953 		for (;;) {
2954 			kern_return_t ret = KERN_SUCCESS;
2955 
2956 			/*
2957 			 * All updates to kmem metadata are done under the kmem_meta_lock
2958 			 */
2959 			kmem_meta_lock();
2960 			if (0 == pmap_find_phys(kernel_pmap, page_addr)) {
2961 				ret = kernel_memory_populate(page_addr,
2962 				    PAGE_SIZE, KMA_NOPAGEWAIT | KMA_KOBJECT | KMA_ZERO,
2963 				    VM_KERN_MEMORY_OSFMK);
2964 			}
2965 			kmem_meta_unlock();
2966 
2967 			if (ret == KERN_SUCCESS) {
2968 				break;
2969 			}
2970 
2971 			/*
2972 			 * We can't pass KMA_NOPAGEWAIT under a global lock as it leads
2973 			 * to bad system deadlocks, so if the allocation failed,
2974 			 * we need to do the VM_PAGE_WAIT() outside of the lock.
2975 			 */
2976 			VM_PAGE_WAIT();
2977 		}
2978 	}
2979 
2980 	vm_map_lock(kernel_map);
2981 }
2982 
2983 __abortlike
2984 static void
kmem_invalid_meta_panic(struct kmem_page_meta * meta,uint32_t slot_idx,struct kmem_sizeclass sizeclass)2985 kmem_invalid_meta_panic(
2986 	struct kmem_page_meta  *meta,
2987 	uint32_t                slot_idx,
2988 	struct kmem_sizeclass   sizeclass)
2989 {
2990 	uint32_t size_idx = kmem_get_idx_from_size(sizeclass.ks_size);
2991 
2992 	if (slot_idx >= sizeclass.ks_num_elem) {
2993 		panic("Invalid slot idx %u [0:%u] for meta %p", slot_idx,
2994 		    sizeclass.ks_num_elem, meta);
2995 	}
2996 	if (meta->km_sizeclass != size_idx) {
2997 		panic("Invalid size_idx (%u != %u) in meta %p", size_idx,
2998 		    meta->km_sizeclass, meta);
2999 	}
3000 	panic("page_marker %u not primary in meta %p", meta->km_page_marker, meta);
3001 }
3002 
3003 __abortlike
3004 static void
kmem_slot_has_entry_panic(vm_map_entry_t entry,vm_map_offset_t addr)3005 kmem_slot_has_entry_panic(
3006 	vm_map_entry_t          entry,
3007 	vm_map_offset_t         addr)
3008 {
3009 	panic("Entry (%p) already exists for addr (%p) being returned",
3010 	    entry, (void *)addr);
3011 }
3012 
3013 __abortlike
3014 static void
kmem_slot_not_found(struct kmem_page_meta * meta,uint32_t slot_idx)3015 kmem_slot_not_found(
3016 	struct kmem_page_meta  *meta,
3017 	uint32_t                slot_idx)
3018 {
3019 	panic("%uth free slot not found for meta %p bitmap %u", slot_idx, meta,
3020 	    meta->km_bitmap);
3021 }
3022 
3023 /*
3024  * Returns a 16bit random number between 0 and
3025  * upper_limit (inclusive)
3026  */
3027 __startup_func
3028 uint16_t
kmem_get_random16(uint16_t upper_limit)3029 kmem_get_random16(
3030 	uint16_t                upper_limit)
3031 {
3032 	static uint64_t random_entropy;
3033 	assert(upper_limit < UINT16_MAX);
3034 	if (random_entropy == 0) {
3035 		random_entropy = early_random();
3036 	}
3037 	uint32_t result = random_entropy & UINT32_MAX;
3038 	random_entropy >>= 32;
3039 	return (uint16_t)(result % (upper_limit + 1));
3040 }
3041 
3042 static uint32_t
kmem_get_nth_free_slot(struct kmem_page_meta * meta,uint32_t n,uint32_t bitmap)3043 kmem_get_nth_free_slot(
3044 	struct kmem_page_meta  *meta,
3045 	uint32_t                n,
3046 	uint32_t                bitmap)
3047 {
3048 	uint32_t zeros_seen = 0, ones_seen = 0;
3049 
3050 	while (bitmap) {
3051 		uint32_t count = __builtin_ctz(bitmap);
3052 
3053 		zeros_seen += count;
3054 		bitmap >>= count;
3055 		if (__probable(~bitmap)) {
3056 			count = __builtin_ctz(~bitmap);
3057 		} else {
3058 			count = 32;
3059 		}
3060 		if (count + ones_seen > n) {
3061 			return zeros_seen + n;
3062 		}
3063 		ones_seen += count;
3064 		bitmap >>= count;
3065 	}
3066 
3067 	kmem_slot_not_found(meta, n);
3068 }
3069 
3070 
3071 static uint32_t
kmem_get_next_slot(struct kmem_page_meta * meta,struct kmem_sizeclass sizeclass,uint32_t bitmap)3072 kmem_get_next_slot(
3073 	struct kmem_page_meta  *meta,
3074 	struct kmem_sizeclass   sizeclass,
3075 	uint32_t                bitmap)
3076 {
3077 	uint32_t num_slots = __builtin_popcount(bitmap);
3078 	uint64_t slot_idx = 0;
3079 
3080 	assert(num_slots > 0);
3081 	if (__improbable(startup_phase < STARTUP_SUB_EARLY_BOOT)) {
3082 		/*
3083 		 * Use early random prior to early boot as the ks_rng_ctx requires
3084 		 * the corecrypto module to be setup before it is initialized and
3085 		 * used.
3086 		 *
3087 		 * num_slots can't be 0 as we take this path when we have more than
3088 		 * one slot left.
3089 		 */
3090 		slot_idx = kmem_get_random16((uint16_t)num_slots - 1);
3091 	} else {
3092 		crypto_random_uniform(zpercpu_get(sizeclass.ks_rng_ctx), num_slots,
3093 		    &slot_idx);
3094 	}
3095 
3096 	return kmem_get_nth_free_slot(meta, slot_idx, bitmap);
3097 }
3098 
3099 /*
3100  * Returns an unallocated slot from the given metadata
3101  */
3102 static vm_map_offset_t
kmem_get_addr_from_meta(struct kmem_page_meta * meta,vm_map_range_id_t range_id,struct kmem_sizeclass sizeclass,vm_map_entry_t * entry)3103 kmem_get_addr_from_meta(
3104 	struct kmem_page_meta  *meta,
3105 	vm_map_range_id_t       range_id,
3106 	struct kmem_sizeclass   sizeclass,
3107 	vm_map_entry_t         *entry)
3108 {
3109 	vm_map_offset_t addr;
3110 	vm_map_size_t size = sizeclass.ks_size;
3111 	uint32_t size_idx = kmem_get_idx_from_size(size);
3112 	uint64_t meta_idx = meta - kmem_meta_base[range_id];
3113 	mach_vm_offset_t range_start = kmem_ranges[range_id].min_address;
3114 	uint32_t slot_bit;
3115 	uint32_t slot_idx = kmem_get_next_slot(meta, sizeclass, meta->km_bitmap);
3116 
3117 	if ((slot_idx >= sizeclass.ks_num_elem) ||
3118 	    (meta->km_sizeclass != size_idx) ||
3119 	    (meta->km_page_marker != KMEM_META_PRIMARY)) {
3120 		kmem_invalid_meta_panic(meta, slot_idx, sizeclass);
3121 	}
3122 
3123 	slot_bit = kmem_slot_idx_to_bit(slot_idx, size_idx);
3124 	meta->km_bitmap &= ~slot_bit;
3125 
3126 	addr = range_start + (meta_idx * KMEM_CHUNK_SIZE_MIN) + (slot_idx * size);
3127 	assert(kmem_range_contains_fully(range_id, addr, size));
3128 	if (vm_map_lookup_entry(kernel_map, addr, entry)) {
3129 		kmem_slot_has_entry_panic(*entry, addr);
3130 	}
3131 	if ((*entry != vm_map_to_entry(kernel_map)) &&
3132 	    ((*entry)->vme_next != vm_map_to_entry(kernel_map)) &&
3133 	    ((*entry)->vme_next->vme_start < (addr + size))) {
3134 		kmem_slot_has_entry_panic(*entry, addr);
3135 	}
3136 	return addr;
3137 }
3138 
3139 __abortlike
3140 static void
kmem_range_out_of_va(kmem_range_id_t range_id,uint32_t num_chunks)3141 kmem_range_out_of_va(
3142 	kmem_range_id_t         range_id,
3143 	uint32_t                num_chunks)
3144 {
3145 	panic("No more VA to allocate %u chunks in range %u", num_chunks, range_id);
3146 }
3147 
3148 static void
kmem_init_allocated_chunk(struct kmem_page_meta * meta,struct kmem_sizeclass sizeclass,uint32_t size_idx)3149 kmem_init_allocated_chunk(
3150 	struct kmem_page_meta  *meta,
3151 	struct kmem_sizeclass   sizeclass,
3152 	uint32_t                size_idx)
3153 {
3154 	uint32_t meta_num = sizeclass.ks_num_chunk;
3155 	uint32_t num_elem = sizeclass.ks_num_elem;
3156 
3157 	meta->km_bitmap = (1ull << num_elem) - 1;
3158 	meta->km_chunk_len = (uint16_t)meta_num;
3159 	assert(LIST_NEXT(meta, km_link) == NULL);
3160 	assert(meta->km_link.le_prev == NULL);
3161 	meta->km_sizeclass = (uint8_t)size_idx;
3162 	meta->km_page_marker = KMEM_META_PRIMARY;
3163 	meta++;
3164 	for (uint32_t i = 1; i < meta_num; i++) {
3165 		meta->km_page_idx = (uint16_t)i;
3166 		meta->km_sizeclass = (uint8_t)size_idx;
3167 		meta->km_page_marker = 0;
3168 		meta->km_bitmap = 0;
3169 		meta++;
3170 	}
3171 }
3172 
3173 static uint32_t
kmem_get_additional_meta(struct kmem_page_meta * meta,uint32_t meta_req,bool from_right,struct kmem_page_meta ** adj_free_meta)3174 kmem_get_additional_meta(
3175 	struct kmem_page_meta  *meta,
3176 	uint32_t                meta_req,
3177 	bool                    from_right,
3178 	struct kmem_page_meta **adj_free_meta)
3179 {
3180 	struct kmem_page_meta *meta_prev = from_right ? meta : (meta - 1);
3181 
3182 	if (meta_prev->km_page_marker == KMEM_META_FREE) {
3183 		uint32_t chunk_len = kmem_get_free_chunk_len(meta_prev);
3184 
3185 		*adj_free_meta = from_right ? meta_prev : (meta_prev - chunk_len + 1);
3186 		meta_req -= chunk_len;
3187 	} else {
3188 		*adj_free_meta = NULL;
3189 	}
3190 
3191 	return meta_req;
3192 }
3193 
3194 
3195 static struct kmem_page_meta *
kmem_get_new_chunk(vm_map_range_id_t range_id,bool from_right,uint32_t size_idx)3196 kmem_get_new_chunk(
3197 	vm_map_range_id_t       range_id,
3198 	bool                    from_right,
3199 	uint32_t                size_idx)
3200 {
3201 	struct kmem_sizeclass sizeclass = kmem_size_array[size_idx];
3202 	struct kmem_page_meta *start, *end, *meta_update;
3203 	struct kmem_page_meta *adj_free_meta = NULL;
3204 	uint32_t meta_req = sizeclass.ks_num_chunk;
3205 
3206 	for (;;) {
3207 		struct kmem_page_meta *metaf = kmem_meta_hwm[kmem_get_front(range_id, 0)];
3208 		struct kmem_page_meta *metab = kmem_meta_hwm[kmem_get_front(range_id, 1)];
3209 		struct kmem_page_meta *meta;
3210 		vm_offset_t start_addr, end_addr;
3211 		uint32_t meta_num;
3212 
3213 		meta = from_right ? metab : metaf;
3214 		meta_num = kmem_get_additional_meta(meta, meta_req, from_right,
3215 		    &adj_free_meta);
3216 
3217 		if (metaf + meta_num >= metab) {
3218 			kmem_range_out_of_va(range_id, meta_num);
3219 		}
3220 
3221 		start = from_right ? (metab - meta_num) : metaf;
3222 		end = from_right ? metab : (metaf + meta_num);
3223 
3224 		start_addr = (vm_offset_t)start;
3225 		end_addr   = (vm_offset_t)end;
3226 
3227 		/*
3228 		 * If the new high watermark stays on the same page,
3229 		 * no need to populate and drop the lock.
3230 		 */
3231 		if (!page_aligned(from_right ? end_addr : start_addr) &&
3232 		    trunc_page(start_addr) == trunc_page(end_addr - 1)) {
3233 			break;
3234 		}
3235 		if (!kmem_populate_needed(start_addr, end_addr)) {
3236 			break;
3237 		}
3238 
3239 		kmem_populate_meta_locked(start_addr, end_addr);
3240 
3241 		/*
3242 		 * Since we dropped the lock, reassess conditions still hold:
3243 		 * - the HWM we are changing must not have moved
3244 		 * - the other HWM must not intersect with ours
3245 		 * - in case of coalescing, the adjacent free meta must still
3246 		 *   be free and of the same size.
3247 		 *
3248 		 * If we failed to grow, reevaluate whether freelists have
3249 		 * entries now by returning NULL.
3250 		 */
3251 		metaf = kmem_meta_hwm[kmem_get_front(range_id, 0)];
3252 		metab = kmem_meta_hwm[kmem_get_front(range_id, 1)];
3253 		if (meta != (from_right ? metab : metaf)) {
3254 			return NULL;
3255 		}
3256 		if (metaf + meta_num >= metab) {
3257 			kmem_range_out_of_va(range_id, meta_num);
3258 		}
3259 		if (adj_free_meta) {
3260 			if (adj_free_meta->km_page_marker != KMEM_META_FREE ||
3261 			    kmem_get_free_chunk_len(adj_free_meta) !=
3262 			    meta_req - meta_num) {
3263 				return NULL;
3264 			}
3265 		}
3266 
3267 		break;
3268 	}
3269 
3270 	/*
3271 	 * If there is an adjacent free chunk remove it from free list
3272 	 */
3273 	if (adj_free_meta) {
3274 		LIST_REMOVE(adj_free_meta, km_link);
3275 		LIST_NEXT(adj_free_meta, km_link) = NULL;
3276 		adj_free_meta->km_link.le_prev = NULL;
3277 	}
3278 
3279 	/*
3280 	 * Update hwm
3281 	 */
3282 	meta_update = from_right ? start : end;
3283 	kmem_meta_hwm[kmem_get_front(range_id, from_right)] = meta_update;
3284 
3285 	/*
3286 	 * Initialize metadata
3287 	 */
3288 	start = from_right ? start : (end - meta_req);
3289 	kmem_init_allocated_chunk(start, sizeclass, size_idx);
3290 
3291 	return start;
3292 }
3293 
3294 static void
kmem_requeue_meta(struct kmem_page_meta * meta,struct kmem_list_head * head)3295 kmem_requeue_meta(
3296 	struct kmem_page_meta  *meta,
3297 	struct kmem_list_head  *head)
3298 {
3299 	LIST_REMOVE(meta, km_link);
3300 	LIST_INSERT_HEAD(head, meta, km_link);
3301 }
3302 
3303 /*
3304  * Return corresponding sizeclass to stash free chunks in
3305  */
3306 __abortlike
3307 static void
kmem_invalid_chunk_num(uint32_t chunks)3308 kmem_invalid_chunk_num(uint32_t chunks)
3309 {
3310 	panic("Invalid number of chunks %u\n", chunks);
3311 }
3312 
3313 static uint32_t
kmem_get_size_idx_for_chunks(uint32_t chunks)3314 kmem_get_size_idx_for_chunks(uint32_t chunks)
3315 {
3316 	for (uint32_t i = KMEM_NUM_SIZECLASS - 1; i > 0; i--) {
3317 		if (chunks >= kmem_size_array[i].ks_num_chunk) {
3318 			return i;
3319 		}
3320 	}
3321 	kmem_invalid_chunk_num(chunks);
3322 }
3323 
3324 static void
kmem_clear_meta_range(struct kmem_page_meta * meta,uint32_t count)3325 kmem_clear_meta_range(struct kmem_page_meta *meta, uint32_t count)
3326 {
3327 	bzero(meta, count * sizeof(struct kmem_page_meta));
3328 }
3329 
3330 static void
kmem_check_meta_range_is_clear(struct kmem_page_meta * meta,uint32_t count)3331 kmem_check_meta_range_is_clear(struct kmem_page_meta *meta, uint32_t count)
3332 {
3333 #if MACH_ASSERT
3334 	size_t size = count * sizeof(struct kmem_page_meta);
3335 
3336 	assert(memcmp_zero_ptr_aligned(meta, size) == 0);
3337 #else
3338 #pragma unused(meta, count)
3339 #endif
3340 }
3341 
3342 /*!
3343  * @function kmem_init_free_chunk()
3344  *
3345  * @discussion
3346  * This function prepares a range of chunks to be put on a free list.
3347  * The first and last metadata might be dirty, but the "inner" ones
3348  * must be zero filled by the caller prior to calling this function.
3349  */
3350 static void
kmem_init_free_chunk(struct kmem_page_meta * meta,uint32_t num_chunks,uint32_t front)3351 kmem_init_free_chunk(
3352 	struct kmem_page_meta  *meta,
3353 	uint32_t                num_chunks,
3354 	uint32_t                front)
3355 {
3356 	struct kmem_sizeclass *sizeclass;
3357 	uint32_t size_idx = kmem_get_size_idx_for_chunks(num_chunks);
3358 
3359 	if (num_chunks > 2) {
3360 		kmem_check_meta_range_is_clear(meta + 1, num_chunks - 2);
3361 	}
3362 
3363 	meta[0] = (struct kmem_page_meta){
3364 		.km_free_chunks = num_chunks,
3365 		.km_page_marker = KMEM_META_FREE,
3366 		.km_sizeclass   = (uint8_t)size_idx,
3367 	};
3368 	if (num_chunks > 1) {
3369 		meta[num_chunks - 1] = (struct kmem_page_meta){
3370 			.km_free_chunks = num_chunks,
3371 			.km_page_marker = KMEM_META_FREE,
3372 			.km_sizeclass   = (uint8_t)size_idx,
3373 		};
3374 	}
3375 
3376 	sizeclass = &kmem_size_array[size_idx];
3377 	LIST_INSERT_HEAD(&sizeclass->ks_allfree_head[front], meta, km_link);
3378 }
3379 
3380 static struct kmem_page_meta *
kmem_get_free_chunk_from_list(struct kmem_sizeclass * org_sizeclass,uint32_t size_idx,uint32_t front)3381 kmem_get_free_chunk_from_list(
3382 	struct kmem_sizeclass  *org_sizeclass,
3383 	uint32_t                size_idx,
3384 	uint32_t                front)
3385 {
3386 	struct kmem_sizeclass *sizeclass;
3387 	uint32_t num_chunks = org_sizeclass->ks_num_chunk;
3388 	struct kmem_page_meta *meta;
3389 	uint32_t idx = size_idx;
3390 
3391 	while (idx < KMEM_NUM_SIZECLASS) {
3392 		sizeclass = &kmem_size_array[idx];
3393 		meta = LIST_FIRST(&sizeclass->ks_allfree_head[front]);
3394 		if (meta) {
3395 			break;
3396 		}
3397 		idx++;
3398 	}
3399 
3400 	/*
3401 	 * Trim if larger in size
3402 	 */
3403 	if (meta) {
3404 		uint32_t num_chunks_free = kmem_get_free_chunk_len(meta);
3405 
3406 		assert(meta->km_page_marker == KMEM_META_FREE);
3407 		LIST_REMOVE(meta, km_link);
3408 		LIST_NEXT(meta, km_link) = NULL;
3409 		meta->km_link.le_prev = NULL;
3410 		if (num_chunks_free > num_chunks) {
3411 			num_chunks_free -= num_chunks;
3412 			kmem_init_free_chunk(meta + num_chunks, num_chunks_free, front);
3413 		}
3414 
3415 		kmem_init_allocated_chunk(meta, *org_sizeclass, size_idx);
3416 	}
3417 
3418 	return meta;
3419 }
3420 
3421 kern_return_t
kmem_locate_space(vm_map_size_t size,vm_map_range_id_t range_id,bool from_right,vm_map_offset_t * start_inout,vm_map_entry_t * entry_out)3422 kmem_locate_space(
3423 	vm_map_size_t           size,
3424 	vm_map_range_id_t       range_id,
3425 	bool                    from_right,
3426 	vm_map_offset_t        *start_inout,
3427 	vm_map_entry_t         *entry_out)
3428 {
3429 	vm_map_entry_t entry;
3430 	uint32_t size_idx = kmem_get_idx_from_size(size);
3431 	uint32_t front = kmem_get_front(range_id, from_right);
3432 	struct kmem_sizeclass *sizeclass = &kmem_size_array[size_idx];
3433 	struct kmem_page_meta *meta;
3434 
3435 	assert(size <= sizeclass->ks_size);
3436 again:
3437 	if ((meta = LIST_FIRST(&sizeclass->ks_partial_head[front])) != NULL) {
3438 		*start_inout = kmem_get_addr_from_meta(meta, range_id, *sizeclass, &entry);
3439 		/*
3440 		 * Requeue to full if necessary
3441 		 */
3442 		assert(meta->km_page_marker == KMEM_META_PRIMARY);
3443 		if (__builtin_popcount(meta->km_bitmap) == KMEM_NUM_GUARDS) {
3444 			kmem_requeue_meta(meta, &sizeclass->ks_full_head[front]);
3445 		}
3446 	} else if ((meta = kmem_get_free_chunk_from_list(sizeclass, size_idx,
3447 	    front)) != NULL) {
3448 		*start_inout = kmem_get_addr_from_meta(meta, range_id, *sizeclass, &entry);
3449 		/*
3450 		 * Queue to partial
3451 		 */
3452 		assert(meta->km_page_marker == KMEM_META_PRIMARY);
3453 		assert(__builtin_popcount(meta->km_bitmap) > KMEM_NUM_GUARDS);
3454 		LIST_INSERT_HEAD(&sizeclass->ks_partial_head[front], meta, km_link);
3455 	} else {
3456 		meta = kmem_get_new_chunk(range_id, from_right, size_idx);
3457 		if (meta == NULL) {
3458 			goto again;
3459 		}
3460 		*start_inout = kmem_get_addr_from_meta(meta, range_id, *sizeclass, &entry);
3461 		assert(meta->km_page_marker == KMEM_META_PRIMARY);
3462 		LIST_INSERT_HEAD(&sizeclass->ks_partial_head[front], meta, km_link);
3463 	}
3464 
3465 	if (entry_out) {
3466 		*entry_out = entry;
3467 	}
3468 
3469 	return KERN_SUCCESS;
3470 }
3471 
3472 /*
3473  * Determine whether the given metadata was allocated from the right
3474  */
3475 static bool
kmem_meta_is_from_right(kmem_range_id_t range_id,struct kmem_page_meta * meta)3476 kmem_meta_is_from_right(
3477 	kmem_range_id_t         range_id,
3478 	struct kmem_page_meta  *meta)
3479 {
3480 	struct kmem_page_meta *metaf = kmem_meta_hwm[kmem_get_front(range_id, 0)];
3481 	__assert_only struct kmem_page_meta *metab = kmem_meta_hwm[kmem_get_front(range_id, 1)];
3482 	struct kmem_page_meta *meta_base = kmem_meta_base[range_id];
3483 	struct kmem_page_meta *meta_end;
3484 
3485 	meta_end = (struct kmem_page_meta *)kmem_meta_range[range_id].max_address;
3486 
3487 	if ((meta >= meta_base) && (meta < metaf)) {
3488 		return false;
3489 	}
3490 
3491 	assert(meta >= metab && meta < meta_end);
3492 	return true;
3493 }
3494 
3495 static void
kmem_free_chunk(kmem_range_id_t range_id,struct kmem_page_meta * meta,bool from_right)3496 kmem_free_chunk(
3497 	kmem_range_id_t         range_id,
3498 	struct kmem_page_meta  *meta,
3499 	bool                    from_right)
3500 {
3501 	struct kmem_page_meta *meta_coalesce = meta - 1;
3502 	struct kmem_page_meta *meta_start = meta;
3503 	uint32_t num_chunks = kmem_get_chunk_len(meta);
3504 	uint32_t add_chunks;
3505 	struct kmem_page_meta *meta_end = meta + num_chunks;
3506 	struct kmem_page_meta *meta_hwm_l, *meta_hwm_r;
3507 	uint32_t front = kmem_get_front(range_id, from_right);
3508 
3509 	meta_hwm_l = kmem_meta_hwm[kmem_get_front(range_id, 0)];
3510 	meta_hwm_r = kmem_meta_hwm[kmem_get_front(range_id, 1)];
3511 
3512 	LIST_REMOVE(meta, km_link);
3513 	kmem_clear_meta_range(meta, num_chunks);
3514 
3515 	/*
3516 	 * Coalesce left
3517 	 */
3518 	if (((from_right && (meta_coalesce >= meta_hwm_r)) || !from_right) &&
3519 	    (meta_coalesce->km_page_marker == KMEM_META_FREE)) {
3520 		meta_start = meta_coalesce - kmem_get_free_chunk_len(meta_coalesce) + 1;
3521 		add_chunks = kmem_get_free_chunk_len(meta_start);
3522 		num_chunks += add_chunks;
3523 		LIST_REMOVE(meta_start, km_link);
3524 		kmem_clear_meta_range(meta_start + add_chunks - 1, 1);
3525 	}
3526 
3527 	/*
3528 	 * Coalesce right
3529 	 */
3530 	if (((!from_right && (meta_end < meta_hwm_l)) || from_right) &&
3531 	    (meta_end->km_page_marker == KMEM_META_FREE)) {
3532 		add_chunks = kmem_get_free_chunk_len(meta_end);
3533 		LIST_REMOVE(meta_end, km_link);
3534 		kmem_clear_meta_range(meta_end, 1);
3535 		meta_end = meta_end + add_chunks;
3536 		num_chunks += add_chunks;
3537 	}
3538 
3539 	kmem_init_free_chunk(meta_start, num_chunks, front);
3540 }
3541 
3542 static void
kmem_free_slot(kmem_range_id_t range_id,mach_vm_range_t slot)3543 kmem_free_slot(
3544 	kmem_range_id_t         range_id,
3545 	mach_vm_range_t         slot)
3546 {
3547 	struct kmem_page_meta *meta;
3548 	vm_map_offset_t chunk_start;
3549 	uint32_t size_idx, chunk_elem, slot_idx, num_elem;
3550 	struct kmem_sizeclass *sizeclass;
3551 	vm_map_size_t slot_size;
3552 
3553 	meta = kmem_addr_to_meta_start(slot->min_address, range_id, &chunk_start);
3554 	size_idx = meta->km_sizeclass;
3555 	slot_size = kmem_get_size_from_idx(size_idx);
3556 	slot_idx = (slot->min_address - chunk_start) / slot_size;
3557 	assert((meta->km_bitmap & kmem_slot_idx_to_bit(slot_idx, size_idx)) == 0);
3558 	meta->km_bitmap |= kmem_slot_idx_to_bit(slot_idx, size_idx);
3559 
3560 	sizeclass = &kmem_size_array[size_idx];
3561 	chunk_elem = sizeclass->ks_num_elem;
3562 	num_elem = __builtin_popcount(meta->km_bitmap);
3563 
3564 	if (num_elem == chunk_elem) {
3565 		/*
3566 		 * If entire chunk empty add to emtpy list
3567 		 */
3568 		bool from_right = kmem_meta_is_from_right(range_id, meta);
3569 
3570 		kmem_free_chunk(range_id, meta, from_right);
3571 	} else if (num_elem == KMEM_NUM_GUARDS + 1) {
3572 		/*
3573 		 * If we freed to full chunk move it to partial
3574 		 */
3575 		uint32_t front = kmem_get_front(range_id,
3576 		    kmem_meta_is_from_right(range_id, meta));
3577 
3578 		kmem_requeue_meta(meta, &sizeclass->ks_partial_head[front]);
3579 	}
3580 }
3581 
3582 void
kmem_free_space(vm_map_offset_t start,vm_map_offset_t end,vm_map_range_id_t range_id,mach_vm_range_t slot)3583 kmem_free_space(
3584 	vm_map_offset_t         start,
3585 	vm_map_offset_t         end,
3586 	vm_map_range_id_t       range_id,
3587 	mach_vm_range_t         slot)
3588 {
3589 	bool entry_present = false;
3590 	vm_map_entry_t prev_entry;
3591 	vm_map_entry_t next_entry;
3592 
3593 	if ((slot->min_address == start) && (slot->max_address == end)) {
3594 		/*
3595 		 * Entire slot is being freed at once
3596 		 */
3597 		return kmem_free_slot(range_id, slot);
3598 	}
3599 
3600 	entry_present = vm_map_lookup_entry(kernel_map, start, &prev_entry);
3601 	assert(!entry_present);
3602 	next_entry = prev_entry->vme_next;
3603 
3604 	if (((prev_entry == vm_map_to_entry(kernel_map) ||
3605 	    prev_entry->vme_end <= slot->min_address)) &&
3606 	    (next_entry == vm_map_to_entry(kernel_map) ||
3607 	    (next_entry->vme_start >= slot->max_address))) {
3608 		/*
3609 		 * Free entire slot
3610 		 */
3611 		kmem_free_slot(range_id, slot);
3612 	}
3613 }
3614 
3615 #pragma mark kmem init
3616 
3617 /*
3618  * The default percentage of memory that can be mlocked is scaled based on the total
3619  * amount of memory in the system. These percentages are caclulated
3620  * offline and stored in this table. We index this table by
3621  * log2(max_mem) - VM_USER_WIREABLE_MIN_CONFIG. We clamp this index in the range
3622  * [0, sizeof(wire_limit_percents) / sizeof(vm_map_size_t))
3623  *
3624  * Note that these values were picked for mac.
3625  * If we ever have very large memory config arm devices, we may want to revisit
3626  * since the kernel overhead is smaller there due to the larger page size.
3627  */
3628 
3629 /* Start scaling iff we're managing > 2^32 = 4GB of RAM. */
3630 #define VM_USER_WIREABLE_MIN_CONFIG 32
3631 #if CONFIG_JETSAM
3632 /* Systems with jetsam can wire a bit more b/c the system can relieve wired
3633  * pressure.
3634  */
3635 static vm_map_size_t wire_limit_percents[] =
3636 { 80, 80, 80, 80, 82, 85, 88, 91, 94, 97};
3637 #else
3638 static vm_map_size_t wire_limit_percents[] =
3639 { 70, 73, 76, 79, 82, 85, 88, 91, 94, 97};
3640 #endif /* CONFIG_JETSAM */
3641 
3642 /* Set limit to 95% of DRAM if serverperfmode=1 */
3643 #define VM_USER_SERVERPERF_WIRE_LIMIT_PERCENT 95
3644 /* Use special serverperfmode behavior iff DRAM > 2^35 = 32GiB of RAM. */
3645 #define VM_USER_SERVERPERF_WIREABLE_MIN_CONFIG 35
3646 
3647 /*
3648  * Sets the default global user wire limit which limits the amount of
3649  * memory that can be locked via mlock() based on the above algorithm..
3650  * This can be overridden via a sysctl.
3651  */
3652 static void
kmem_set_user_wire_limits(void)3653 kmem_set_user_wire_limits(void)
3654 {
3655 	uint64_t available_mem_log;
3656 	uint64_t max_wire_percent;
3657 	size_t wire_limit_percents_length = sizeof(wire_limit_percents) /
3658 	    sizeof(vm_map_size_t);
3659 	vm_map_size_t limit;
3660 	uint64_t config_memsize = max_mem;
3661 #if defined(XNU_TARGET_OS_OSX)
3662 	config_memsize = max_mem_actual;
3663 #endif /* defined(XNU_TARGET_OS_OSX) */
3664 
3665 	available_mem_log = bit_floor(config_memsize);
3666 
3667 	if (serverperfmode &&
3668 	    (available_mem_log >= VM_USER_SERVERPERF_WIREABLE_MIN_CONFIG)) {
3669 		max_wire_percent = VM_USER_SERVERPERF_WIRE_LIMIT_PERCENT;
3670 	} else {
3671 		if (available_mem_log < VM_USER_WIREABLE_MIN_CONFIG) {
3672 			available_mem_log = 0;
3673 		} else {
3674 			available_mem_log -= VM_USER_WIREABLE_MIN_CONFIG;
3675 		}
3676 		if (available_mem_log >= wire_limit_percents_length) {
3677 			available_mem_log = wire_limit_percents_length - 1;
3678 		}
3679 		max_wire_percent = wire_limit_percents[available_mem_log];
3680 	}
3681 
3682 	limit = config_memsize * max_wire_percent / 100;
3683 	/* Cap the number of non lockable bytes at VM_NOT_USER_WIREABLE_MAX */
3684 	if (config_memsize - limit > VM_NOT_USER_WIREABLE_MAX) {
3685 		limit = config_memsize - VM_NOT_USER_WIREABLE_MAX;
3686 	}
3687 
3688 	vm_global_user_wire_limit = limit;
3689 	/* the default per task limit is the same as the global limit */
3690 	vm_per_task_user_wire_limit = limit;
3691 	vm_add_wire_count_over_global_limit = 0;
3692 	vm_add_wire_count_over_user_limit = 0;
3693 }
3694 
3695 #define KMEM_MAX_CLAIMS 50
3696 __startup_data
3697 struct kmem_range_startup_spec kmem_claims[KMEM_MAX_CLAIMS] = {};
3698 
3699 #if !MACH_ASSERT
3700 __startup_data
3701 #endif /* !MACH_ASSERT */
3702 uint32_t kmem_claim_count = 0;
3703 
3704 #if MACH_ASSERT
3705 /**
3706  * Save off some minimal information about the ranges for consumption by
3707  * post-lockdown tests.
3708  */
3709 static struct mach_vm_range kmem_test_saved_ranges[KMEM_MAX_CLAIMS];
3710 #endif /* MACH_ASSERT */
3711 
3712 /**
3713  * For a requested claim size (i.e. kc_size), get the number of bytes which
3714  * should actually be allocated for a region in order to be able to properly
3715  * provide the requested size (the allocation size).
3716  *
3717  * This allocation size is always greater or equal to the claim size. It can,
3718  * for example, include additional space as required by the kernel memory
3719  * configuration.
3720  *
3721  * @param known_last Is the claim in question known to be the last region after
3722  * all placing has completed? The size for a known_last allocation is always
3723  * less than or equal to a non-known_last allocation of the same size.
3724  */
3725 __startup_func
3726 static vm_map_size_t
kmem_claim_to_allocation_size(vm_map_size_t claim_size,bool known_last)3727 kmem_claim_to_allocation_size(vm_map_size_t claim_size, bool known_last)
3728 {
3729 	(void)known_last;
3730 	/*
3731 	 * Allocation size and claim size are identical.
3732 	 */
3733 	return claim_size;
3734 }
3735 
3736 /**
3737  * Compute the largest claim which can be made from a given allocation size.
3738  */
3739 static vm_map_size_t
kmem_allocation_to_claim_size(vm_map_size_t allocation_size)3740 kmem_allocation_to_claim_size(vm_map_size_t allocation_size)
3741 {
3742 	/*
3743 	 * Allocation size and claim size are identical.
3744 	 */
3745 	return allocation_size;
3746 }
3747 
3748 __startup_func
3749 void
kmem_range_startup_init(struct kmem_range_startup_spec * sp)3750 kmem_range_startup_init(
3751 	struct kmem_range_startup_spec *sp)
3752 {
3753 	assert(kmem_claim_count < KMEM_MAX_CLAIMS - KMEM_RANGE_COUNT);
3754 	if (sp->kc_calculate_sz) {
3755 		sp->kc_size = (sp->kc_calculate_sz)();
3756 	}
3757 	if (sp->kc_size) {
3758 		kmem_claims[kmem_claim_count] = *sp;
3759 		kmem_claim_count++;
3760 	}
3761 }
3762 
3763 static vm_offset_t
kmem_fuzz_start(void)3764 kmem_fuzz_start(void)
3765 {
3766 	vm_offset_t kmapoff_kaddr = 0;
3767 	uint32_t kmapoff_pgcnt;
3768 
3769 	kmapoff_pgcnt = (early_random() & 0x1ff) + 1; /* 9 bits */
3770 
3771 	vm_map_size_t kmapoff_size = ptoa(kmapoff_pgcnt);
3772 
3773 	kmem_alloc(kernel_map, &kmapoff_kaddr, kmapoff_size,
3774 	    KMA_NOFAIL | KMA_KOBJECT | KMA_PERMANENT | KMA_VAONLY,
3775 	    VM_KERN_MEMORY_OSFMK);
3776 
3777 
3778 	return kmapoff_kaddr + kmapoff_size;
3779 }
3780 
3781 /*
3782  * Generate a randomly shuffled array of indices from 0 to count - 1
3783  */
3784 __startup_func
3785 void
kmem_shuffle(uint16_t * shuffle_buf,uint16_t count)3786 kmem_shuffle(
3787 	uint16_t       *shuffle_buf,
3788 	uint16_t        count)
3789 {
3790 	for (uint16_t i = 0; i < count; i++) {
3791 		uint16_t j = kmem_get_random16(i);
3792 		if (j != i) {
3793 			shuffle_buf[i] = shuffle_buf[j];
3794 		}
3795 		shuffle_buf[j] = i;
3796 	}
3797 }
3798 
3799 __startup_func
3800 static void
kmem_shuffle_claims(void)3801 kmem_shuffle_claims(void)
3802 {
3803 	uint16_t shuffle_buf[KMEM_MAX_CLAIMS] = {};
3804 	uint16_t limit = (uint16_t)kmem_claim_count;
3805 
3806 	kmem_shuffle(&shuffle_buf[0], limit);
3807 	for (uint16_t i = 0; i < limit; i++) {
3808 		struct kmem_range_startup_spec tmp = kmem_claims[i];
3809 		kmem_claims[i] = kmem_claims[shuffle_buf[i]];
3810 		kmem_claims[shuffle_buf[i]] = tmp;
3811 	}
3812 }
3813 
3814 __startup_func
3815 static void
kmem_readjust_ranges(uint32_t cur_idx)3816 kmem_readjust_ranges(
3817 	uint32_t        cur_idx)
3818 {
3819 	assert(cur_idx != 0);
3820 	uint32_t j = cur_idx - 1, random;
3821 	struct kmem_range_startup_spec sp = kmem_claims[cur_idx];
3822 	struct mach_vm_range *sp_range = sp.kc_range;
3823 	/*
3824 	 * Even if sp is currently last, it will never be last after it is moved.
3825 	 * As such, we want to bump other claims over it and include any necessary
3826 	 * padding for a non-last claim.
3827 	 *
3828 	 * While changing which claim is last can impact the total VA usage, since a
3829 	 * known_last allocation size is guaranteed to always be less-than-or-equal
3830 	 * to a non-known_last allocation (which is used for pre-placement sizing),
3831 	 * we will always have enough space so long as the pre-placement sizing had
3832 	 * enough space.
3833 	 */
3834 	vm_map_offset_t sp_allocation_size =
3835 	    kmem_claim_to_allocation_size(sp.kc_size, /* known_last */ false);
3836 
3837 	/*
3838 	 * Find max index where restriction is met
3839 	 */
3840 	for (; j > 0; j--) {
3841 		struct kmem_range_startup_spec spj = kmem_claims[j];
3842 		vm_map_offset_t max_start = spj.kc_range->min_address;
3843 		if (spj.kc_flags & KC_NO_MOVE) {
3844 			panic("kmem_range_init: Can't scramble with multiple constraints");
3845 		}
3846 		if (max_start <= sp_range->min_address) {
3847 			break;
3848 		}
3849 	}
3850 
3851 	/*
3852 	 * Pick a random index from 0 to max index and shift claims to the right
3853 	 * to make room for restricted claim
3854 	 */
3855 	random = kmem_get_random16((uint16_t)j);
3856 	assert(random <= j);
3857 
3858 	sp_range->min_address = kmem_claims[random].kc_range->min_address;
3859 	sp_range->max_address = sp_range->min_address + sp.kc_size;
3860 
3861 	for (j = cur_idx - 1; j >= random && j != UINT32_MAX; j--) {
3862 		struct kmem_range_startup_spec spj = kmem_claims[j];
3863 		struct mach_vm_range *range = spj.kc_range;
3864 		range->min_address += sp_allocation_size;
3865 		range->max_address += sp_allocation_size;
3866 		kmem_claims[j + 1] = spj;
3867 	}
3868 
3869 	sp.kc_flags |= KC_NO_MOVE;
3870 	kmem_claims[random] = sp;
3871 }
3872 
3873 __startup_func
3874 static void
kmem_add_ptr_claims(void)3875 kmem_add_ptr_claims(void)
3876 {
3877 	uint64_t kmem_meta_num, kmem_ptr_chunks;
3878 	vm_map_size_t org_ptr_range_size __assert_only;
3879 
3880 	org_ptr_range_size = ptr_range_size;
3881 
3882 	ptr_range_size -= PAGE_SIZE;
3883 	ptr_range_size *= KMEM_CHUNK_SIZE_MIN;
3884 	ptr_range_size /= (KMEM_CHUNK_SIZE_MIN + sizeof(struct kmem_page_meta));
3885 
3886 	kmem_ptr_chunks = ptr_range_size / KMEM_CHUNK_SIZE_MIN;
3887 	ptr_range_size = kmem_ptr_chunks * KMEM_CHUNK_SIZE_MIN;
3888 
3889 	kmem_meta_num = kmem_ptr_chunks + 2;
3890 	kmem_meta_size = round_page(kmem_meta_num * sizeof(struct kmem_page_meta));
3891 
3892 	assert(kmem_meta_size + ptr_range_size <= org_ptr_range_size);
3893 	/*
3894 	 * Add claims for kmem's ranges
3895 	 */
3896 	for (uint32_t i = 0; i < kmem_ptr_ranges; i++) {
3897 		struct kmem_range_startup_spec kmem_spec = {
3898 			.kc_name = "kmem_ptr_range",
3899 			.kc_range = &kmem_ranges[KMEM_RANGE_ID_PTR_0 + i],
3900 			.kc_size = ptr_range_size,
3901 			.kc_flags = KC_NO_ENTRY,
3902 		};
3903 		kmem_claims[kmem_claim_count++] = kmem_spec;
3904 
3905 		struct kmem_range_startup_spec kmem_meta_spec = {
3906 			.kc_name = "kmem_ptr_range_meta",
3907 			.kc_range = &kmem_meta_range[KMEM_RANGE_ID_PTR_0 + i],
3908 			.kc_size = kmem_meta_size,
3909 			.kc_flags = KC_NONE,
3910 		};
3911 		kmem_claims[kmem_claim_count++] = kmem_meta_spec;
3912 	}
3913 }
3914 
3915 __startup_func
3916 static void
kmem_add_extra_claims(void)3917 kmem_add_extra_claims(void)
3918 {
3919 	vm_map_size_t largest_free_size = 0, total_claims = 0;
3920 	vm_map_size_t sane_sprayqtn_size = 0, sprayqtn_allocation_size = 0;
3921 	vm_map_size_t ptr_total_allocation_size = 0;
3922 
3923 	vm_map_sizes(kernel_map, NULL, NULL, &largest_free_size);
3924 	largest_free_size = trunc_page(largest_free_size);
3925 
3926 	/*
3927 	 * kasan and configs w/o *TRR need to have just one ptr range due to
3928 	 * resource constraints.
3929 	 */
3930 #if !ZSECURITY_CONFIG(KERNEL_PTR_SPLIT)
3931 	kmem_ptr_ranges = 1;
3932 #endif
3933 	/*
3934 	 * Determine size of data and pointer kmem_ranges
3935 	 */
3936 	for (uint32_t i = 0; i < kmem_claim_count; i++) {
3937 		struct kmem_range_startup_spec sp_i = kmem_claims[i];
3938 
3939 		total_claims += kmem_claim_to_allocation_size(
3940 			sp_i.kc_size, /* known_last */ false);
3941 	}
3942 	assert((total_claims & PAGE_MASK) == 0);
3943 
3944 
3945 	largest_free_size -= total_claims;
3946 
3947 	/*
3948 	 * Use half the total available VA for all pointer allocations (this
3949 	 * includes the kmem_sprayqtn range). Given that we have 4 total
3950 	 * ranges divide the available VA by 8.
3951 	 */
3952 	ptr_range_size = largest_free_size / ((kmem_ptr_ranges + 1) * 2);
3953 
3954 	sprayqtn_range_size = ptr_range_size;
3955 	sane_sprayqtn_size = kmem_claim_to_allocation_size(
3956 		/* claim_size */ sane_size / 2, /* known_last */ false);
3957 	if (sprayqtn_range_size > sane_sprayqtn_size) {
3958 		vm_map_size_t sprayqtn_extra;
3959 
3960 		/*
3961 		 * Spray quarantine doesn't need that much space.
3962 		 * Shrink it to something reasonable and equally share the leftover VA
3963 		 * with the other pointer ranges.
3964 		 */
3965 		sprayqtn_extra = sprayqtn_range_size - sane_sprayqtn_size;
3966 		sprayqtn_range_size -= sprayqtn_extra;
3967 		ptr_range_size += sprayqtn_extra / kmem_ptr_ranges;
3968 	}
3969 
3970 	ptr_range_size = round_page(ptr_range_size);
3971 	sprayqtn_range_size = round_page(sprayqtn_range_size);
3972 
3973 	iokit_range_size = 0;
3974 
3975 	/* Less any necessary allocation padding... */
3976 	ptr_range_size = kmem_allocation_to_claim_size(ptr_range_size);
3977 	sprayqtn_range_size = kmem_allocation_to_claim_size(sprayqtn_range_size);
3978 
3979 	/*
3980 	 * Add the pointer and metadata claims
3981 	 * Note: this call modifies ptr_range_size and may, depending on the padding
3982 	 * requirements, slightly increase or decrease the overall allocation size
3983 	 * of the pointer+metadata region.
3984 	 */
3985 	kmem_add_ptr_claims();
3986 
3987 	sprayqtn_allocation_size = kmem_claim_to_allocation_size(
3988 		sprayqtn_range_size, /* known_last */ false);
3989 	ptr_total_allocation_size =
3990 	    (kmem_claim_to_allocation_size(ptr_range_size, /* known_last */ false) +
3991 	    kmem_claim_to_allocation_size(kmem_meta_size, /* known_last */ false)) *
3992 	    kmem_ptr_ranges;
3993 
3994 	/*
3995 	 * Check: spray and ptr_range are minimally valid.
3996 	 * This is a useful assert as it should catch us if we were to end up with a
3997 	 * "negative" (or extremely large) data_range_size.
3998 	 */
3999 	assert(sprayqtn_allocation_size + ptr_total_allocation_size < largest_free_size);
4000 
4001 	/*
4002 	 * Finally, give any remaining allocable space to the data region.
4003 	 */
4004 	data_range_size = largest_free_size - sprayqtn_allocation_size -
4005 	    ptr_total_allocation_size;
4006 
4007 #if defined(ARM_LARGE_MEMORY)
4008 	/*
4009 	 * Reserve space for our dedicated IOKit carveout.
4010 	 * Currently, we carve off a quarter of the data region.
4011 	 */
4012 	iokit_range_size = round_page(data_range_size / 4);
4013 	data_range_size -= kmem_claim_to_allocation_size(
4014 		iokit_range_size, /* known_last */ false);
4015 #endif /* defined(ARM_LARGE_MEMORY) */
4016 
4017 	/* Less any necessary allocation padding... */
4018 	data_range_size = kmem_allocation_to_claim_size(data_range_size);
4019 
4020 	/* Check: our allocations should all still fit in the free space */
4021 	assert(sprayqtn_allocation_size + ptr_total_allocation_size +
4022 	    kmem_claim_to_allocation_size(iokit_range_size, /* known_last */ false) +
4023 	    kmem_claim_to_allocation_size(data_range_size, /* known_last */ false) <=
4024 	    largest_free_size);
4025 
4026 	struct kmem_range_startup_spec kmem_spec_sprayqtn = {
4027 		.kc_name = "kmem_sprayqtn_range",
4028 		.kc_range = &kmem_ranges[KMEM_RANGE_ID_SPRAYQTN],
4029 		.kc_size = sprayqtn_range_size,
4030 		.kc_flags = KC_NO_ENTRY,
4031 	};
4032 	kmem_claims[kmem_claim_count++] = kmem_spec_sprayqtn;
4033 
4034 	/*
4035 	 * If !defined(ARM_LARGE_MEMORY), KMEM_RANGE_ID_IOKIT is coalesced into the data range.
4036 	 * This is to minimize wasted translation tables in constrained environments.
4037 	 * The coalescing happens during kmem_scramble_ranges.
4038 	 */
4039 #if defined(ARM_LARGE_MEMORY)
4040 	struct kmem_range_startup_spec kmem_spec_iokit = {
4041 		.kc_name = "kmem_iokit_range",
4042 		.kc_range = &kmem_ranges[KMEM_RANGE_ID_IOKIT],
4043 		.kc_size = iokit_range_size,
4044 		.kc_flags = KC_NO_ENTRY,
4045 	};
4046 	kmem_claims[kmem_claim_count++] = kmem_spec_iokit;
4047 #endif /* defined(ARM_LARGE_MEMORY) */
4048 
4049 	struct kmem_range_startup_spec kmem_spec_data = {
4050 		.kc_name = "kmem_data_range",
4051 		.kc_range = &kmem_ranges[KMEM_RANGE_ID_DATA],
4052 		.kc_size = data_range_size,
4053 		.kc_flags = KC_NO_ENTRY,
4054 	};
4055 	kmem_claims[kmem_claim_count++] = kmem_spec_data;
4056 }
4057 
4058 __startup_func
4059 static void
kmem_scramble_ranges(void)4060 kmem_scramble_ranges(void)
4061 {
4062 	vm_map_offset_t va_alloc_head = 0;
4063 
4064 	/*
4065 	 * Initiatize KMEM_RANGE_ID_NONE range to use the entire map so that
4066 	 * the vm can find the requested ranges.
4067 	 */
4068 	kmem_ranges[KMEM_RANGE_ID_NONE].min_address = MAX(kernel_map->min_offset,
4069 	    VM_MAP_PAGE_SIZE(kernel_map));
4070 	kmem_ranges[KMEM_RANGE_ID_NONE].max_address = kernel_map->max_offset;
4071 
4072 	/*
4073 	 * Allocating the g_kext_map prior to randomizing the remaining submaps as
4074 	 * this map is 2G in size and starts at the end of kernel_text on x86. It
4075 	 * could overflow into the heap.
4076 	 */
4077 	kext_alloc_init();
4078 
4079 	/*
4080 	 * Eat a random amount of kernel_map to fuzz subsequent heap, zone and
4081 	 * stack addresses. (With a 4K page and 9 bits of randomness, this
4082 	 * eats about 2M of VA from the map)
4083 	 *
4084 	 * Note that we always need to slide by at least one page because the VM
4085 	 * pointer packing schemes using KERNEL_PMAP_HEAP_RANGE_START as a base
4086 	 * do not admit this address to be part of any zone submap.
4087 	 */
4088 	va_alloc_head = kmem_fuzz_start();
4089 
4090 	/*
4091 	 * Add claims for ptr and data kmem_ranges
4092 	 */
4093 	kmem_add_extra_claims();
4094 
4095 	/*
4096 	 * Minimally verify that our placer will be able to resolve the constraints
4097 	 * of all claims
4098 	 */
4099 	bool has_min_address = false;
4100 	for (uint32_t i = 0; i < kmem_claim_count; i++) {
4101 		struct kmem_range_startup_spec sp_i = kmem_claims[i];
4102 
4103 		/* Verify that we have only one claim with a min address constraint */
4104 		if (sp_i.kc_range->min_address) {
4105 			if (has_min_address) {
4106 				panic("Cannot place with multiple min_address constraints");
4107 			} else {
4108 				has_min_address = true;
4109 			}
4110 		}
4111 
4112 		if (sp_i.kc_range->max_address) {
4113 			panic("Cannot place with a max_address constraint");
4114 		}
4115 	}
4116 
4117 
4118 	/*
4119 	 * Shuffle registered claims
4120 	 */
4121 	assert(kmem_claim_count < UINT16_MAX);
4122 	kmem_shuffle_claims();
4123 
4124 	/*
4125 	 * Apply restrictions and determine range for each claim
4126 	 */
4127 	for (uint32_t i = 0; i < kmem_claim_count; i++) {
4128 		struct kmem_range_startup_spec sp = kmem_claims[i];
4129 		struct mach_vm_range *sp_range = sp.kc_range;
4130 
4131 		/*
4132 		 * Find space using the allocation size (rather than the claim size) in
4133 		 * order to ensure we provide any applicable padding.
4134 		 */
4135 		bool is_last = (i == kmem_claim_count - 1);
4136 		vm_map_offset_t sp_allocation_size =
4137 		    kmem_claim_to_allocation_size(sp.kc_size, is_last);
4138 
4139 		if (vm_map_locate_space_anywhere(kernel_map, sp_allocation_size, 0,
4140 		    VM_MAP_KERNEL_FLAGS_ANYWHERE(.vmkf_no_soft_limit = true),
4141 		    &va_alloc_head, NULL) != KERN_SUCCESS) {
4142 			panic("kmem_range_init: vm_map_locate_space failing for claim %s, "
4143 			    "size 0x%llx",
4144 			    sp.kc_name, sp_allocation_size);
4145 		}
4146 
4147 		/*
4148 		 * Re-adjust ranges if restriction not met
4149 		 */
4150 		if (sp_range->min_address && va_alloc_head > sp_range->min_address) {
4151 			kmem_readjust_ranges(i);
4152 		} else {
4153 			/*
4154 			 * Though the actual allocated space may be larger, provide only the
4155 			 * size requested by the original claim.
4156 			 */
4157 			sp_range->min_address = va_alloc_head;
4158 			sp_range->max_address = va_alloc_head + sp.kc_size;
4159 		}
4160 
4161 		va_alloc_head += sp_allocation_size;
4162 	}
4163 
4164 	/*
4165 	 * We have settled on the ranges, now create temporary entries for the
4166 	 * claims
4167 	 */
4168 	for (uint32_t i = 0; i < kmem_claim_count; i++) {
4169 		struct kmem_range_startup_spec sp = kmem_claims[i];
4170 		bool is_last = (i == kmem_claim_count - 1);
4171 		vm_map_offset_t sp_allocation_size =
4172 		    kmem_claim_to_allocation_size(sp.kc_size, is_last);
4173 		vm_map_entry_t entry = NULL;
4174 		if (sp.kc_flags & KC_NO_ENTRY) {
4175 			continue;
4176 		}
4177 
4178 
4179 		/*
4180 		 * We reserve the full allocation size (rather than the claim size) so
4181 		 * that nothing ends up placed in the padding space (if applicable).
4182 		 */
4183 		if (vm_map_find_space(kernel_map, sp.kc_range->min_address,
4184 		    sp_allocation_size, 0,
4185 		    VM_MAP_KERNEL_FLAGS_ANYWHERE(.vmkf_no_soft_limit = true),
4186 		    &entry) != KERN_SUCCESS) {
4187 			panic("kmem_range_init: vm_map_find_space failing for claim %s",
4188 			    sp.kc_name);
4189 		}
4190 		vm_object_reference(kernel_object_default);
4191 		VME_OBJECT_SET(entry, kernel_object_default, false, 0);
4192 		VME_OFFSET_SET(entry, entry->vme_start);
4193 		vm_map_unlock(kernel_map);
4194 	}
4195 
4196 	/*
4197 	 * If we're not on a large memory system KMEM_RANGE_ID_IOKIT acts as a synonym for KMEM_RANGE_ID_DATA.
4198 	 * On large memory systems KMEM_RANGE_ID_IOKIT is a dedicated carveout.
4199 	 */
4200 #if !defined(ARM_LARGE_MEMORY)
4201 	kmem_ranges[KMEM_RANGE_ID_IOKIT] = kmem_ranges[KMEM_RANGE_ID_DATA];
4202 #endif /* !defined(ARM_LARGE_MEMORY) */
4203 
4204 	/*
4205 	 * Now that we are done assigning all the ranges, reset
4206 	 * kmem_ranges[KMEM_RANGE_ID_NONE]
4207 	 */
4208 	kmem_ranges[KMEM_RANGE_ID_NONE] = (struct mach_vm_range) {};
4209 
4210 #if DEBUG || DEVELOPMENT
4211 	for (uint32_t i = 0; i < kmem_claim_count; i++) {
4212 		struct kmem_range_startup_spec sp = kmem_claims[i];
4213 
4214 		printf("%-24s: %p - %p (%u%c)\n", sp.kc_name,
4215 		    (void *)sp.kc_range->min_address,
4216 		    (void *)sp.kc_range->max_address,
4217 		    mach_vm_size_pretty(sp.kc_size),
4218 		    mach_vm_size_unit(sp.kc_size));
4219 	}
4220 #endif /* DEBUG || DEVELOPMENT */
4221 
4222 #if MACH_ASSERT
4223 	/*
4224 	 * Since many parts of the claim infrastructure are marked as startup data
4225 	 * (and are thus unavailable post-lockdown), save off information our tests
4226 	 * need now.
4227 	 */
4228 	for (uint32_t i = 0; i < kmem_claim_count; i++) {
4229 		kmem_test_saved_ranges[i] = *(kmem_claims[i].kc_range);
4230 	}
4231 #endif /* MACH_ASSERT */
4232 }
4233 
4234 __startup_func
4235 static void
kmem_range_init(void)4236 kmem_range_init(void)
4237 {
4238 	vm_size_t range_adjustment;
4239 
4240 	kmem_scramble_ranges();
4241 
4242 	range_adjustment = sprayqtn_range_size >> 3;
4243 	kmem_large_ranges[KMEM_RANGE_ID_SPRAYQTN].min_address =
4244 	    kmem_ranges[KMEM_RANGE_ID_SPRAYQTN].min_address + range_adjustment;
4245 	kmem_large_ranges[KMEM_RANGE_ID_SPRAYQTN].max_address =
4246 	    kmem_ranges[KMEM_RANGE_ID_SPRAYQTN].max_address;
4247 
4248 	range_adjustment = iokit_range_size >> 3;
4249 	kmem_large_ranges[KMEM_RANGE_ID_IOKIT].min_address =
4250 	    kmem_ranges[KMEM_RANGE_ID_IOKIT].min_address + range_adjustment;
4251 	kmem_large_ranges[KMEM_RANGE_ID_IOKIT].max_address =
4252 	    kmem_ranges[KMEM_RANGE_ID_IOKIT].max_address;
4253 
4254 	range_adjustment = data_range_size >> 3;
4255 	kmem_large_ranges[KMEM_RANGE_ID_DATA].min_address =
4256 	    kmem_ranges[KMEM_RANGE_ID_DATA].min_address + range_adjustment;
4257 	kmem_large_ranges[KMEM_RANGE_ID_DATA].max_address =
4258 	    kmem_ranges[KMEM_RANGE_ID_DATA].max_address;
4259 
4260 	pmap_init();
4261 	kmem_metadata_init();
4262 	kmem_sizeclass_init();
4263 
4264 #if DEBUG || DEVELOPMENT
4265 	for (kmem_range_id_t i = 1; i < KMEM_RANGE_COUNT; i++) {
4266 		vm_size_t range_size = mach_vm_range_size(&kmem_large_ranges[i]);
4267 		printf("kmem_large_ranges[%d]    : %p - %p (%u%c)\n", i,
4268 		    (void *)kmem_large_ranges[i].min_address,
4269 		    (void *)kmem_large_ranges[i].max_address,
4270 		    mach_vm_size_pretty(range_size),
4271 		    mach_vm_size_unit(range_size));
4272 	}
4273 #endif
4274 }
4275 STARTUP(KMEM, STARTUP_RANK_THIRD, kmem_range_init);
4276 
4277 #if DEBUG || DEVELOPMENT
4278 __startup_func
4279 static void
kmem_log_init(void)4280 kmem_log_init(void)
4281 {
4282 	/*
4283 	 * Log can only be created after the the kmem subsystem is initialized as
4284 	 * btlog creation uses kmem
4285 	 */
4286 	kmem_outlier_log = btlog_create(BTLOG_LOG, KMEM_OUTLIER_LOG_SIZE, 0);
4287 }
4288 STARTUP(ZALLOC, STARTUP_RANK_FIRST, kmem_log_init);
4289 
4290 kmem_gobj_stats
kmem_get_gobj_stats(void)4291 kmem_get_gobj_stats(void)
4292 {
4293 	kmem_gobj_stats stats = {};
4294 
4295 	vm_map_lock(kernel_map);
4296 	for (uint8_t i = 0; i < kmem_ptr_ranges; i++) {
4297 		kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST + i;
4298 		struct mach_vm_range range = kmem_ranges[range_id];
4299 		struct kmem_page_meta *meta = kmem_meta_hwm[kmem_get_front(range_id, 0)];
4300 		struct kmem_page_meta *meta_end;
4301 		uint64_t meta_idx = meta - kmem_meta_base[range_id];
4302 		vm_map_size_t used = 0, va = 0, meta_sz = 0, pte_sz = 0;
4303 		vm_map_offset_t addr;
4304 		vm_map_entry_t entry;
4305 
4306 		/*
4307 		 * Left front
4308 		 */
4309 		va = (meta_idx * KMEM_CHUNK_SIZE_MIN);
4310 		meta_sz = round_page(meta_idx * sizeof(struct kmem_page_meta));
4311 
4312 		/*
4313 		 * Right front
4314 		 */
4315 		meta = kmem_meta_hwm[kmem_get_front(range_id, 1)];
4316 		meta_end = kmem_addr_to_meta(range.max_address, range_id, &addr,
4317 		    &meta_idx);
4318 		meta_idx = meta_end - meta;
4319 		meta_sz += round_page(meta_idx * sizeof(struct kmem_page_meta));
4320 		va += (meta_idx * KMEM_CHUNK_SIZE_MIN);
4321 
4322 		/*
4323 		 * Compute VA allocated in entire range
4324 		 */
4325 		if (vm_map_lookup_entry(kernel_map, range.min_address, &entry) == false) {
4326 			entry = entry->vme_next;
4327 		}
4328 		while (entry != vm_map_to_entry(kernel_map) &&
4329 		    entry->vme_start < range.max_address) {
4330 			used += (entry->vme_end - entry->vme_start);
4331 			entry = entry->vme_next;
4332 		}
4333 
4334 		pte_sz = round_page(atop(va - used) * 8);
4335 
4336 		stats.total_used += used;
4337 		stats.total_va += va;
4338 		stats.pte_sz += pte_sz;
4339 		stats.meta_sz += meta_sz;
4340 	}
4341 	vm_map_unlock(kernel_map);
4342 
4343 	return stats;
4344 }
4345 
4346 #endif /* DEBUG || DEVELOPMENT */
4347 
4348 /*
4349  *	kmem_init:
4350  *
4351  *	Initialize the kernel's virtual memory map, taking
4352  *	into account all memory allocated up to this time.
4353  */
4354 __startup_func
4355 void
kmem_init(vm_offset_t start,vm_offset_t end)4356 kmem_init(
4357 	vm_offset_t     start,
4358 	vm_offset_t     end)
4359 {
4360 	vm_map_offset_t map_start;
4361 	vm_map_offset_t map_end;
4362 
4363 	map_start = vm_map_trunc_page(start,
4364 	    VM_MAP_PAGE_MASK(kernel_map));
4365 	map_end = vm_map_round_page(end,
4366 	    VM_MAP_PAGE_MASK(kernel_map));
4367 
4368 	vm_map_will_allocate_early_map(&kernel_map);
4369 #if defined(__arm64__)
4370 	kernel_map = vm_map_create_options(pmap_kernel(),
4371 	    VM_MIN_KERNEL_AND_KEXT_ADDRESS,
4372 	    VM_MAX_KERNEL_ADDRESS,
4373 	    VM_MAP_CREATE_DEFAULT);
4374 	/*
4375 	 *	Reserve virtual memory allocated up to this time.
4376 	 */
4377 	{
4378 		unsigned int    region_select = 0;
4379 		vm_map_offset_t region_start;
4380 		vm_map_size_t   region_size;
4381 		vm_map_offset_t map_addr;
4382 		kern_return_t kr;
4383 
4384 		while (pmap_virtual_region(region_select, &region_start, &region_size)) {
4385 			map_addr = region_start;
4386 			kr = vm_map_enter(kernel_map, &map_addr,
4387 			    vm_map_round_page(region_size,
4388 			    VM_MAP_PAGE_MASK(kernel_map)),
4389 			    (vm_map_offset_t) 0,
4390 			    VM_MAP_KERNEL_FLAGS_FIXED_PERMANENT(
4391 				    .vmkf_no_pmap_check = true,
4392 				    .vmkf_no_soft_limit = true),
4393 			    VM_OBJECT_NULL,
4394 			    (vm_object_offset_t) 0, FALSE, VM_PROT_NONE, VM_PROT_NONE,
4395 			    VM_INHERIT_DEFAULT);
4396 
4397 			if (kr != KERN_SUCCESS) {
4398 				panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
4399 				    (uint64_t) start, (uint64_t) end, (uint64_t) region_start,
4400 				    (uint64_t) region_size, kr);
4401 			}
4402 
4403 			region_select++;
4404 		}
4405 	}
4406 #else
4407 	kernel_map = vm_map_create_options(pmap_kernel(),
4408 	    VM_MIN_KERNEL_AND_KEXT_ADDRESS, map_end,
4409 	    VM_MAP_CREATE_DEFAULT);
4410 	/*
4411 	 *	Reserve virtual memory allocated up to this time.
4412 	 */
4413 	if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
4414 		vm_map_offset_t map_addr;
4415 		kern_return_t kr;
4416 
4417 		map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
4418 		kr = vm_map_enter(kernel_map,
4419 		    &map_addr,
4420 		    (vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
4421 		    (vm_map_offset_t) 0,
4422 		    VM_MAP_KERNEL_FLAGS_FIXED(.vmkf_no_pmap_check = true),
4423 		    VM_OBJECT_NULL,
4424 		    (vm_object_offset_t) 0, FALSE,
4425 		    VM_PROT_NONE, VM_PROT_NONE,
4426 		    VM_INHERIT_DEFAULT);
4427 
4428 		if (kr != KERN_SUCCESS) {
4429 			panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
4430 			    (uint64_t) start, (uint64_t) end,
4431 			    (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS,
4432 			    (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
4433 			    kr);
4434 		}
4435 	}
4436 #endif
4437 
4438 	kmem_set_user_wire_limits();
4439 }
4440 
4441 
4442 #pragma mark map copyio
4443 static inline void
current_thread_set_sec_override(bool val)4444 current_thread_set_sec_override(bool val)
4445 {
4446 #pragma unused(val)
4447 }
4448 
4449 /*
4450  * Note: semantic types aren't used as `copyio` already validates.
4451  */
4452 
4453 kern_return_t
copyinmap(vm_map_t map,vm_map_offset_t fromaddr,void * todata,vm_size_t length)4454 copyinmap(
4455 	vm_map_t                map,
4456 	vm_map_offset_t         fromaddr,
4457 	void                   *todata,
4458 	vm_size_t               length)
4459 {
4460 	kern_return_t kr = KERN_SUCCESS;
4461 	vm_map_switch_context_t switch_ctx;
4462 
4463 	if (vm_map_pmap(map) == pmap_kernel()) {
4464 		/* assume a correct copy */
4465 		memcpy(todata, CAST_DOWN(void *, fromaddr), length);
4466 	} else if (current_map() == map) {
4467 		if (copyin(fromaddr, todata, length) != 0) {
4468 			kr = KERN_INVALID_ADDRESS;
4469 		}
4470 	} else {
4471 		vm_map_reference(map);
4472 		current_thread_set_sec_override(true);
4473 		switch_ctx = vm_map_switch_to(map);
4474 		if (copyin(fromaddr, todata, length) != 0) {
4475 			kr = KERN_INVALID_ADDRESS;
4476 		}
4477 		current_thread_set_sec_override(false);
4478 		vm_map_switch_back(switch_ctx);
4479 		vm_map_deallocate(map);
4480 	}
4481 	return kr;
4482 }
4483 
4484 kern_return_t
copyoutmap(vm_map_t map,void * fromdata,vm_map_address_t toaddr,vm_size_t length)4485 copyoutmap(
4486 	vm_map_t                map,
4487 	void                   *fromdata,
4488 	vm_map_address_t        toaddr,
4489 	vm_size_t               length)
4490 {
4491 	kern_return_t kr = KERN_SUCCESS;
4492 	vm_map_switch_context_t switch_ctx;
4493 
4494 	if (vm_map_pmap(map) == pmap_kernel()) {
4495 		/* assume a correct copy */
4496 		memcpy(CAST_DOWN(void *, toaddr), fromdata, length);
4497 	} else if (current_map() == map) {
4498 		if (copyout(fromdata, toaddr, length) != 0) {
4499 			ktriage_record(thread_tid(current_thread()),
4500 			    KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
4501 			    KDBG_TRIAGE_RESERVED,
4502 			    KDBG_TRIAGE_VM_COPYOUTMAP_SAMEMAP_ERROR),
4503 			    KERN_INVALID_ADDRESS /* arg */);
4504 			kr = KERN_INVALID_ADDRESS;
4505 		}
4506 	} else {
4507 		vm_map_reference(map);
4508 		current_thread_set_sec_override(true);
4509 		switch_ctx = vm_map_switch_to(map);
4510 		if (copyout(fromdata, toaddr, length) != 0) {
4511 			ktriage_record(thread_tid(current_thread()),
4512 			    KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
4513 			    KDBG_TRIAGE_RESERVED,
4514 			    KDBG_TRIAGE_VM_COPYOUTMAP_DIFFERENTMAP_ERROR),
4515 			    KERN_INVALID_ADDRESS /* arg */);
4516 			kr = KERN_INVALID_ADDRESS;
4517 		}
4518 		current_thread_set_sec_override(false);
4519 		vm_map_switch_back(switch_ctx);
4520 		vm_map_deallocate(map);
4521 	}
4522 	return kr;
4523 }
4524 
4525 kern_return_t
copyoutmap_atomic32(vm_map_t map,uint32_t value,vm_map_address_t toaddr)4526 copyoutmap_atomic32(
4527 	vm_map_t                map,
4528 	uint32_t                value,
4529 	vm_map_address_t        toaddr)
4530 {
4531 	kern_return_t kr = KERN_SUCCESS;
4532 	vm_map_switch_context_t switch_ctx;
4533 
4534 	if (vm_map_pmap(map) == pmap_kernel()) {
4535 		/* assume a correct toaddr */
4536 		*(uint32_t *)toaddr = value;
4537 	} else if (current_map() == map) {
4538 		if (copyout_atomic32(value, toaddr) != 0) {
4539 			kr = KERN_INVALID_ADDRESS;
4540 		}
4541 	} else {
4542 		vm_map_reference(map);
4543 		current_thread_set_sec_override(true);
4544 		switch_ctx = vm_map_switch_to(map);
4545 		if (copyout_atomic32(value, toaddr) != 0) {
4546 			kr = KERN_INVALID_ADDRESS;
4547 		}
4548 		current_thread_set_sec_override(false);
4549 		vm_map_switch_back(switch_ctx);
4550 		vm_map_deallocate(map);
4551 	}
4552 	return kr;
4553 }
4554 
4555 kern_return_t
copyoutmap_atomic64(vm_map_t map,uint64_t value,vm_map_address_t toaddr)4556 copyoutmap_atomic64(
4557 	vm_map_t                map,
4558 	uint64_t                value,
4559 	vm_map_address_t        toaddr)
4560 {
4561 	kern_return_t kr = KERN_SUCCESS;
4562 	vm_map_switch_context_t switch_ctx;
4563 
4564 	if (vm_map_pmap(map) == pmap_kernel()) {
4565 		/* assume a correct toaddr */
4566 		*(uint64_t *)toaddr = value;
4567 	} else if (current_map() == map) {
4568 		if (copyout_atomic64(value, toaddr) != 0) {
4569 			kr = KERN_INVALID_ADDRESS;
4570 		}
4571 	} else {
4572 		vm_map_reference(map);
4573 		current_thread_set_sec_override(true);
4574 		switch_ctx = vm_map_switch_to(map);
4575 		if (copyout_atomic64(value, toaddr) != 0) {
4576 			kr = KERN_INVALID_ADDRESS;
4577 		}
4578 		current_thread_set_sec_override(false);
4579 		vm_map_switch_back(switch_ctx);
4580 		vm_map_deallocate(map);
4581 	}
4582 	return kr;
4583 }
4584 
4585 
4586 #pragma mark pointer obfuscation / packing
4587 
4588 /*
4589  *
4590  *	The following two functions are to be used when exposing kernel
4591  *	addresses to userspace via any of the various debug or info
4592  *	facilities that exist. These are basically the same as VM_KERNEL_ADDRPERM()
4593  *	and VM_KERNEL_UNSLIDE_OR_PERM() except they use a different random seed and
4594  *	are exported to KEXTs.
4595  *
4596  *	NOTE: USE THE MACRO VERSIONS OF THESE FUNCTIONS (in vm_param.h) FROM WITHIN THE KERNEL
4597  */
4598 
4599 vm_offset_t
vm_kernel_addrhash_internal(vm_offset_t addr,uint64_t salt)4600 vm_kernel_addrhash_internal(vm_offset_t addr, uint64_t salt)
4601 {
4602 	assert(salt != 0);
4603 
4604 	if (addr == 0) {
4605 		return 0ul;
4606 	}
4607 
4608 	if (VM_KERNEL_IS_SLID(addr)) {
4609 		return VM_KERNEL_UNSLIDE(addr);
4610 	}
4611 
4612 	addr = VM_KERNEL_STRIP_UPTR(addr);
4613 
4614 	vm_offset_t sha_digest[SHA256_DIGEST_LENGTH / sizeof(vm_offset_t)];
4615 	SHA256_CTX sha_ctx;
4616 
4617 	SHA256_Init(&sha_ctx);
4618 	SHA256_Update(&sha_ctx, &salt, sizeof(salt));
4619 	SHA256_Update(&sha_ctx, &addr, sizeof(addr));
4620 	SHA256_Final(sha_digest, &sha_ctx);
4621 
4622 	return sha_digest[0];
4623 }
4624 
4625 __exported vm_offset_t
4626 vm_kernel_addrhash_external(vm_offset_t addr);
4627 vm_offset_t
vm_kernel_addrhash_external(vm_offset_t addr)4628 vm_kernel_addrhash_external(vm_offset_t addr)
4629 {
4630 	return vm_kernel_addrhash_internal(addr, vm_kernel_addrhash_salt_ext);
4631 }
4632 
4633 void
vm_kernel_addrhide(vm_offset_t addr,vm_offset_t * hide_addr)4634 vm_kernel_addrhide(
4635 	vm_offset_t addr,
4636 	vm_offset_t *hide_addr)
4637 {
4638 	*hide_addr = VM_KERNEL_ADDRHIDE(addr);
4639 }
4640 
4641 void
vm_kernel_addrperm_external(vm_offset_t addr,vm_offset_t * perm_addr)4642 vm_kernel_addrperm_external(
4643 	vm_offset_t addr,
4644 	vm_offset_t *perm_addr)
4645 {
4646 	if (VM_KERNEL_IS_SLID(addr)) {
4647 		*perm_addr = VM_KERNEL_UNSLIDE(addr);
4648 	} else if (VM_KERNEL_ADDRESS(addr)) {
4649 		*perm_addr = addr + vm_kernel_addrperm_ext;
4650 	} else {
4651 		*perm_addr = addr;
4652 	}
4653 }
4654 
4655 void
vm_kernel_unslide_or_perm_external(vm_offset_t addr,vm_offset_t * up_addr)4656 vm_kernel_unslide_or_perm_external(
4657 	vm_offset_t addr,
4658 	vm_offset_t *up_addr)
4659 {
4660 	vm_kernel_addrperm_external(addr, up_addr);
4661 }
4662 
4663 void
vm_packing_pointer_invalid(vm_offset_t ptr,vm_packing_params_t params)4664 vm_packing_pointer_invalid(vm_offset_t ptr, vm_packing_params_t params)
4665 {
4666 	if (ptr & ((1ul << params.vmpp_shift) - 1)) {
4667 		panic("pointer %p can't be packed: low %d bits aren't 0",
4668 		    (void *)ptr, params.vmpp_shift);
4669 	} else if (ptr <= params.vmpp_base) {
4670 		panic("pointer %p can't be packed: below base %p",
4671 		    (void *)ptr, (void *)params.vmpp_base);
4672 	} else {
4673 		panic("pointer %p can't be packed: maximum encodable pointer is %p",
4674 		    (void *)ptr, (void *)vm_packing_max_packable(params));
4675 	}
4676 }
4677 
4678 void
vm_packing_verify_range(const char * subsystem,vm_offset_t min_address,vm_offset_t max_address,vm_packing_params_t params)4679 vm_packing_verify_range(
4680 	const char *subsystem,
4681 	vm_offset_t min_address,
4682 	vm_offset_t max_address,
4683 	vm_packing_params_t params)
4684 {
4685 	if (min_address > max_address) {
4686 		panic("%s: %s range invalid min:%p > max:%p",
4687 		    __func__, subsystem, (void *)min_address, (void *)max_address);
4688 	}
4689 
4690 	if (!params.vmpp_base_relative) {
4691 		return;
4692 	}
4693 
4694 	if (min_address <= params.vmpp_base) {
4695 		panic("%s: %s range invalid min:%p <= base:%p",
4696 		    __func__, subsystem, (void *)min_address, (void *)params.vmpp_base);
4697 	}
4698 
4699 	if (max_address > vm_packing_max_packable(params)) {
4700 		panic("%s: %s range invalid max:%p >= max packable:%p",
4701 		    __func__, subsystem, (void *)max_address,
4702 		    (void *)vm_packing_max_packable(params));
4703 	}
4704 }
4705 
4706 #pragma mark tests
4707 #if MACH_ASSERT
4708 #include <sys/errno.h>
4709 
4710 static void
4711 kmem_test_for_entry(
4712 	vm_map_t                map,
4713 	vm_offset_t             addr,
4714 	void                  (^block)(vm_map_entry_t))
4715 {
4716 	vm_map_entry_t entry;
4717 
4718 	vm_map_lock(map);
4719 	block(vm_map_lookup_entry(map, addr, &entry) ? entry : NULL);
4720 	vm_map_unlock(map);
4721 }
4722 
4723 #define kmem_test_assert_map(map, pg, entries) ({ \
4724 	assert3u((map)->size, ==, ptoa(pg)); \
4725 	assert3u((map)->hdr.nentries, ==, entries); \
4726 })
4727 
4728 static bool
can_write_at(vm_offset_t offs,uint32_t page)4729 can_write_at(vm_offset_t offs, uint32_t page)
4730 {
4731 	static const int zero;
4732 
4733 	return verify_write(&zero, (void *)(offs + ptoa(page) + 128), 1) == 0;
4734 }
4735 #define assert_writeable(offs, page) \
4736 	assertf(can_write_at(offs, page), \
4737 	    "can write at %p + ptoa(%d)", (void *)offs, page)
4738 
4739 #define assert_faults(offs, page) \
4740 	assertf(!can_write_at(offs, page), \
4741 	    "can write at %p + ptoa(%d)", (void *)offs, page)
4742 
4743 #define peek(offs, page) \
4744 	(*(uint32_t *)((offs) + ptoa(page)))
4745 
4746 #define poke(offs, page, v) \
4747 	(*(uint32_t *)((offs) + ptoa(page)) = (v))
4748 
4749 #if CONFIG_SPTM
4750 __attribute__((noinline))
4751 static void
kmem_test_verify_type_policy(vm_offset_t addr,kmem_flags_t flags)4752 kmem_test_verify_type_policy(vm_offset_t addr, kmem_flags_t flags)
4753 {
4754 	extern bool use_xnu_restricted;
4755 	pmap_mapping_type_t expected_type = PMAP_MAPPING_TYPE_RESTRICTED;
4756 
4757 	/* Explicitly state the expected policy */
4758 	if (flags & (KMEM_DATA | KMEM_COMPRESSOR | KMEM_DATA_SHARED)) {
4759 		expected_type = PMAP_MAPPING_TYPE_DEFAULT;
4760 	}
4761 
4762 	/* If X_K_R is disabled, DEFAULT is the only possible mapping */
4763 	if (!use_xnu_restricted) {
4764 		expected_type = PMAP_MAPPING_TYPE_DEFAULT;
4765 	}
4766 
4767 	/* Verify if derived correctly */
4768 	assert3u(expected_type, ==, __kmem_mapping_type(flags));
4769 
4770 	pmap_paddr_t pa = kvtophys(addr);
4771 	if (pa == 0) {
4772 		return;
4773 	}
4774 
4775 	/* Verify if the mapped address actually got the expected type */
4776 	assert3u(expected_type, ==, sptm_get_frame_type(pa));
4777 }
4778 #endif /* CONFIG_SPTM */
4779 
4780 __attribute__((noinline))
4781 static void
kmem_alloc_basic_test(vm_map_t map)4782 kmem_alloc_basic_test(vm_map_t map)
4783 {
4784 	kmem_guard_t guard = {
4785 		.kmg_tag = VM_KERN_MEMORY_DIAG,
4786 	};
4787 	vm_offset_t addr;
4788 
4789 	/*
4790 	 * Test wired basics:
4791 	 * - KMA_KOBJECT
4792 	 * - KMA_GUARD_FIRST, KMA_GUARD_LAST
4793 	 * - allocation alignment
4794 	 */
4795 	addr = kmem_alloc_guard(map, ptoa(10), ptoa(2) - 1,
4796 	    KMA_KOBJECT | KMA_GUARD_FIRST | KMA_GUARD_LAST, guard).kmr_address;
4797 	assertf(addr != 0ull, "kma(%p, 10p, 0, KO | GF | GL)", map);
4798 	assert3u((addr + PAGE_SIZE) % ptoa(2), ==, 0);
4799 	kmem_test_assert_map(map, 10, 1);
4800 
4801 	kmem_test_for_entry(map, addr, ^(__assert_only vm_map_entry_t e){
4802 		assertf(e, "unable to find address %p in map %p", (void *)addr, map);
4803 		assert(e->vme_kernel_object);
4804 		assert(!e->vme_atomic);
4805 		assert3u(e->vme_start, <=, addr);
4806 		assert3u(addr + ptoa(10), <=, e->vme_end);
4807 	});
4808 
4809 	assert_faults(addr, 0);
4810 	for (int i = 1; i < 9; i++) {
4811 		assert_writeable(addr, i);
4812 	}
4813 	assert_faults(addr, 9);
4814 
4815 	kmem_free(map, addr, ptoa(10));
4816 	kmem_test_assert_map(map, 0, 0);
4817 
4818 	/*
4819 	 * Test pageable basics.
4820 	 */
4821 	addr = kmem_alloc_guard(map, ptoa(10), 0,
4822 	    KMA_PAGEABLE, guard).kmr_address;
4823 	assertf(addr != 0ull, "kma(%p, 10p, 0, KO | PG)", map);
4824 	kmem_test_assert_map(map, 10, 1);
4825 
4826 	for (int i = 0; i < 9; i++) {
4827 		assert_faults(addr, i);
4828 		poke(addr, i, 42);
4829 		assert_writeable(addr, i);
4830 	}
4831 
4832 	kmem_free_guard(map, addr, ptoa(10),
4833 	    KMF_GUARD_FIRST | KMF_GUARD_LAST, guard);
4834 	kmem_test_assert_map(map, 0, 0);
4835 }
4836 
4837 __attribute__((noinline))
4838 static void
kmem_realloc_basic_test(vm_map_t map,kmr_flags_t kind)4839 kmem_realloc_basic_test(vm_map_t map, kmr_flags_t kind)
4840 {
4841 	kmem_guard_t guard = {
4842 		.kmg_atomic  = !(kind & KMR_DATA),
4843 		.kmg_tag     = VM_KERN_MEMORY_DIAG,
4844 		.kmg_context = 0xefface,
4845 	};
4846 	vm_offset_t addr, newaddr;
4847 	const int N = 10;
4848 
4849 	/*
4850 	 *	This isn't something kmem_realloc_guard() _needs_ to do,
4851 	 *	we could conceive an implementation where it grows in place
4852 	 *	if there's space after it.
4853 	 *
4854 	 *	However, this is what the implementation does today.
4855 	 */
4856 	bool realloc_growth_changes_address = true;
4857 	bool GF = (kind & KMR_GUARD_FIRST);
4858 	bool GL = (kind & KMR_GUARD_LAST);
4859 
4860 	/*
4861 	 *	Initial N page allocation
4862 	 */
4863 	addr = kmem_alloc_guard(map, ptoa(N), 0,
4864 	    (kind & ~KMEM_FREEOLD) | KMA_ZERO, guard).kmr_address;
4865 	assert3u(addr, !=, 0);
4866 
4867 	kmem_test_assert_map(map, N, 1);
4868 	for (int pg = GF; pg < N - GL; pg++) {
4869 		poke(addr, pg, 42 + pg);
4870 	}
4871 	for (int pg = N - GL; pg < N; pg++) {
4872 		assert_faults(addr, pg);
4873 	}
4874 
4875 #if CONFIG_SPTM
4876 	kmem_test_verify_type_policy(addr, ANYF(kind));
4877 #endif /* CONFIG_SPTM */
4878 	/*
4879 	 *	Grow to N + 3 pages
4880 	 */
4881 	newaddr = kmem_realloc_guard(map, addr, ptoa(N), ptoa(N + 3),
4882 	    kind | KMR_ZERO, guard).kmr_address;
4883 	assert3u(newaddr, !=, 0);
4884 	if (realloc_growth_changes_address) {
4885 		assert3u(addr, !=, newaddr);
4886 	}
4887 	if ((kind & KMR_FREEOLD) || (addr == newaddr)) {
4888 		kmem_test_assert_map(map, N + 3, 1);
4889 	} else {
4890 		kmem_test_assert_map(map, 2 * N + 3, 2);
4891 	}
4892 	for (int pg = GF; pg < N - GL; pg++) {
4893 		assert3u(peek(newaddr, pg), ==, 42 + pg);
4894 	}
4895 	if ((kind & KMR_FREEOLD) == 0) {
4896 		for (int pg = GF; pg < N - GL; pg++) {
4897 			assert3u(peek(addr, pg), ==, 42 + pg);
4898 		}
4899 		/* check for tru-share */
4900 		poke(addr + 16, 0, 1234);
4901 		assert3u(peek(newaddr + 16, 0), ==, 1234);
4902 		kmem_free_guard(map, addr, ptoa(N),
4903 		    kind & (KMF_TAG | KMF_GUARD_FIRST | KMF_GUARD_LAST), guard);
4904 		kmem_test_assert_map(map, N + 3, 1);
4905 	}
4906 	if (addr != newaddr) {
4907 		for (int pg = GF; pg < N - GL; pg++) {
4908 			assert_faults(addr, pg);
4909 		}
4910 	}
4911 	for (int pg = N - GL; pg < N + 3 - GL; pg++) {
4912 		assert3u(peek(newaddr, pg), ==, 0);
4913 	}
4914 	for (int pg = N + 3 - GL; pg < N + 3; pg++) {
4915 		assert_faults(newaddr, pg);
4916 	}
4917 	addr = newaddr;
4918 
4919 
4920 	/*
4921 	 *	Shrink to N - 2 pages
4922 	 */
4923 	newaddr = kmem_realloc_guard(map, addr, ptoa(N + 3), ptoa(N - 2),
4924 	    kind | KMR_ZERO, guard).kmr_address;
4925 	assert3u(map->size, ==, ptoa(N - 2));
4926 	assert3u(newaddr, ==, addr);
4927 	kmem_test_assert_map(map, N - 2, 1);
4928 
4929 	for (int pg = GF; pg < N - 2 - GL; pg++) {
4930 		assert3u(peek(addr, pg), ==, 42 + pg);
4931 	}
4932 	for (int pg = N - 2 - GL; pg < N + 3; pg++) {
4933 		assert_faults(addr, pg);
4934 	}
4935 
4936 	kmem_free_guard(map, addr, ptoa(N - 2),
4937 	    kind & (KMF_TAG | KMF_GUARD_FIRST | KMF_GUARD_LAST), guard);
4938 	kmem_test_assert_map(map, 0, 0);
4939 }
4940 
4941 static int
kmem_basic_test(__unused int64_t in,int64_t * out)4942 kmem_basic_test(__unused int64_t in, int64_t *out)
4943 {
4944 	mach_vm_offset_t addr;
4945 	vm_map_t map;
4946 
4947 	printf("%s: test running\n", __func__);
4948 
4949 	map = kmem_suballoc(kernel_map, &addr, 64U << 20,
4950 	        VM_MAP_CREATE_DEFAULT, VM_FLAGS_ANYWHERE,
4951 	        KMS_NOFAIL | KMS_DATA, VM_KERN_MEMORY_DIAG).kmr_submap;
4952 
4953 	printf("%s: kmem_alloc ...\n", __func__);
4954 	kmem_alloc_basic_test(map);
4955 	printf("%s:     PASS\n", __func__);
4956 
4957 	printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD) ...\n", __func__);
4958 	kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD);
4959 	printf("%s:     PASS\n", __func__);
4960 
4961 	printf("%s: kmem_realloc (KMR_FREEOLD) ...\n", __func__);
4962 	kmem_realloc_basic_test(map, KMR_FREEOLD);
4963 	printf("%s:     PASS\n", __func__);
4964 
4965 	printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST) ...\n", __func__);
4966 	kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST);
4967 	printf("%s:     PASS\n", __func__);
4968 
4969 	printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_LAST) ...\n", __func__);
4970 	kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_LAST);
4971 	printf("%s:     PASS\n", __func__);
4972 
4973 	printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST) ...\n", __func__);
4974 	kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST);
4975 	printf("%s:     PASS\n", __func__);
4976 
4977 	printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_FIRST) ...\n", __func__);
4978 	kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_FIRST);
4979 	printf("%s:     PASS\n", __func__);
4980 
4981 	printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_LAST) ...\n", __func__);
4982 	kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_LAST);
4983 	printf("%s:     PASS\n", __func__);
4984 
4985 	printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST) ...\n", __func__);
4986 	kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST);
4987 	printf("%s:     PASS\n", __func__);
4988 
4989 
4990 	/* using KMR_DATA signals to test the non atomic realloc path */
4991 	printf("%s: kmem_realloc (KMR_DATA | KMR_FREEOLD) ...\n", __func__);
4992 	kmem_realloc_basic_test(map, KMR_DATA | KMR_FREEOLD);
4993 	printf("%s:     PASS\n", __func__);
4994 
4995 	printf("%s: kmem_realloc (KMR_DATA) ...\n", __func__);
4996 	kmem_realloc_basic_test(map, KMR_DATA);
4997 	printf("%s:     PASS\n", __func__);
4998 
4999 	/* test KMR_SHARED_DATA for the new shared kheap */
5000 	printf("%s: kmem_realloc (KMR_DATA_SHARED) ...\n", __func__);
5001 	kmem_realloc_basic_test(map, KMR_DATA_SHARED);
5002 	printf("%s:     PASS\n", __func__);
5003 
5004 	kmem_free_guard(kernel_map, addr, 64U << 20, KMF_NONE, KMEM_GUARD_SUBMAP);
5005 	vm_map_deallocate(map);
5006 
5007 	printf("%s: test passed\n", __func__);
5008 	*out = 1;
5009 	return 0;
5010 }
5011 SYSCTL_TEST_REGISTER(kmem_basic, kmem_basic_test);
5012 
5013 static void
kmem_test_get_size_idx_for_chunks(uint32_t chunks)5014 kmem_test_get_size_idx_for_chunks(uint32_t chunks)
5015 {
5016 	__assert_only uint32_t idx = kmem_get_size_idx_for_chunks(chunks);
5017 
5018 	assert(chunks >= kmem_size_array[idx].ks_num_chunk);
5019 }
5020 
5021 __attribute__((noinline))
5022 static void
kmem_test_get_size_idx_for_all_chunks()5023 kmem_test_get_size_idx_for_all_chunks()
5024 {
5025 	for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) {
5026 		uint32_t chunks = kmem_size_array[i].ks_num_chunk;
5027 
5028 		if (chunks != 1) {
5029 			kmem_test_get_size_idx_for_chunks(chunks - 1);
5030 		}
5031 		kmem_test_get_size_idx_for_chunks(chunks);
5032 		kmem_test_get_size_idx_for_chunks(chunks + 1);
5033 	}
5034 }
5035 
5036 static int
kmem_guard_obj_test(__unused int64_t in,int64_t * out)5037 kmem_guard_obj_test(__unused int64_t in, int64_t *out)
5038 {
5039 	printf("%s: test running\n", __func__);
5040 
5041 	printf("%s: kmem_get_size_idx_for_chunks\n", __func__);
5042 	kmem_test_get_size_idx_for_all_chunks();
5043 	printf("%s:     PASS\n", __func__);
5044 
5045 	printf("%s: test passed\n", __func__);
5046 	*out = 1;
5047 	return 0;
5048 }
5049 SYSCTL_TEST_REGISTER(kmem_guard_obj, kmem_guard_obj_test);
5050 
5051 
5052 #endif /* MACH_ASSERT */
5053