xref: /xnu-11417.121.6/osfmk/vm/vm_kern.c (revision a1e26a70f38d1d7daa7b49b258e2f8538ad81650)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 /*
59  *	File:	vm/vm_kern.c
60  *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
61  *	Date:	1985
62  *
63  *	Kernel memory management.
64  */
65 
66 #include <mach/kern_return.h>
67 #include <mach/vm_param.h>
68 #include <kern/assert.h>
69 #include <kern/thread.h>
70 #include <vm/vm_kern_internal.h>
71 #include <vm/vm_map_internal.h>
72 #include <vm/vm_object_internal.h>
73 #include <vm/vm_page_internal.h>
74 #include <vm/vm_compressor_xnu.h>
75 #include <vm/vm_pageout_xnu.h>
76 #include <vm/vm_init_xnu.h>
77 #include <vm/vm_fault.h>
78 #include <vm/vm_memtag.h>
79 #include <vm/vm_far.h>
80 #include <kern/misc_protos.h>
81 #include <vm/cpm_internal.h>
82 #include <kern/ledger.h>
83 #include <kern/bits.h>
84 #include <kern/startup.h>
85 #include <kern/telemetry.h>
86 
87 #include <string.h>
88 
89 #include <libkern/OSDebug.h>
90 #include <libkern/crypto/sha2.h>
91 #include <libkern/section_keywords.h>
92 #include <sys/kdebug.h>
93 #include <sys/kdebug_triage.h>
94 
95 #include <san/kasan.h>
96 #include <kern/kext_alloc.h>
97 #include <kern/backtrace.h>
98 #include <os/hash.h>
99 #include <kern/zalloc_internal.h>
100 #include <libkern/crypto/rand.h>
101 
102 /*
103  *	Variables exported by this module.
104  */
105 
106 SECURITY_READ_ONLY_LATE(vm_map_t) kernel_map;
107 SECURITY_READ_ONLY_LATE(struct mach_vm_range) kmem_ranges[KMEM_RANGE_COUNT];
108 SECURITY_READ_ONLY_LATE(struct mach_vm_range) kmem_large_ranges[KMEM_RANGE_COUNT];
109 
110 static TUNABLE(uint32_t, kmem_ptr_ranges, "kmem_ptr_ranges",
111     KMEM_RANGE_ID_NUM_PTR);
112 #define KMEM_GOBJ_THRESHOLD   (32ULL << 20)
113 #if DEBUG || DEVELOPMENT
114 #define KMEM_OUTLIER_LOG_SIZE (16ULL << 10)
115 #define KMEM_OUTLIER_SIZE      0
116 #define KMEM_OUTLIER_ALIGN     1
117 btlog_t kmem_outlier_log;
118 #endif /* DEBUG || DEVELOPMENT */
119 
120 __startup_data static vm_map_size_t data_range_size;
121 __startup_data static vm_map_size_t ptr_range_size;
122 __startup_data static vm_map_size_t sprayqtn_range_size;
123 
124 #pragma mark helpers
125 
126 __attribute__((overloadable))
127 __header_always_inline kmem_flags_t
ANYF(kma_flags_t flags)128 ANYF(kma_flags_t flags)
129 {
130 	return (kmem_flags_t)flags;
131 }
132 
133 __attribute__((overloadable))
134 __header_always_inline kmem_flags_t
ANYF(kmr_flags_t flags)135 ANYF(kmr_flags_t flags)
136 {
137 	return (kmem_flags_t)flags;
138 }
139 
140 __attribute__((overloadable))
141 __header_always_inline kmem_flags_t
ANYF(kmf_flags_t flags)142 ANYF(kmf_flags_t flags)
143 {
144 	return (kmem_flags_t)flags;
145 }
146 
147 __abortlike
148 static void
__kmem_invalid_size_panic(vm_map_t map,vm_size_t size,uint32_t flags)149 __kmem_invalid_size_panic(
150 	vm_map_t        map,
151 	vm_size_t       size,
152 	uint32_t        flags)
153 {
154 	panic("kmem(map=%p, flags=0x%x): invalid size %zd",
155 	    map, flags, (size_t)size);
156 }
157 
158 __abortlike
159 static void
__kmem_invalid_arguments_panic(const char * what,vm_map_t map,vm_address_t address,vm_size_t size,uint32_t flags)160 __kmem_invalid_arguments_panic(
161 	const char     *what,
162 	vm_map_t        map,
163 	vm_address_t    address,
164 	vm_size_t       size,
165 	uint32_t        flags)
166 {
167 	panic("kmem_%s(map=%p, addr=%p, size=%zd, flags=0x%x): "
168 	    "invalid arguments passed",
169 	    what, map, (void *)address, (size_t)size, flags);
170 }
171 
172 __abortlike
173 static void
__kmem_failed_panic(vm_map_t map,vm_size_t size,uint32_t flags,kern_return_t kr,const char * what)174 __kmem_failed_panic(
175 	vm_map_t        map,
176 	vm_size_t       size,
177 	uint32_t        flags,
178 	kern_return_t   kr,
179 	const char     *what)
180 {
181 	panic("kmem_%s(%p, %zd, 0x%x): failed with %d",
182 	    what, map, (size_t)size, flags, kr);
183 }
184 
185 __abortlike
186 static void
__kmem_entry_not_found_panic(vm_map_t map,vm_offset_t addr)187 __kmem_entry_not_found_panic(
188 	vm_map_t        map,
189 	vm_offset_t     addr)
190 {
191 	panic("kmem(map=%p) no entry found at %p", map, (void *)addr);
192 }
193 
194 static inline vm_object_t
__kmem_object(kmem_flags_t flags)195 __kmem_object(kmem_flags_t flags)
196 {
197 	if (flags & KMEM_COMPRESSOR) {
198 		if (flags & KMEM_KOBJECT) {
199 			panic("both KMEM_KOBJECT and KMEM_COMPRESSOR specified");
200 		}
201 		return compressor_object;
202 	}
203 	if (!(flags & KMEM_KOBJECT)) {
204 		panic("KMEM_KOBJECT or KMEM_COMPRESSOR is required");
205 	}
206 	return kernel_object_default;
207 }
208 
209 static inline pmap_mapping_type_t
__kmem_mapping_type(kmem_flags_t flags)210 __kmem_mapping_type(kmem_flags_t flags)
211 {
212 	if (flags & (KMEM_DATA | KMEM_COMPRESSOR | KMEM_DATA_SHARED)) {
213 		return PMAP_MAPPING_TYPE_DEFAULT;
214 	} else {
215 		return PMAP_MAPPING_TYPE_RESTRICTED;
216 	}
217 }
218 
219 static inline vm_size_t
__kmem_guard_left(kmem_flags_t flags)220 __kmem_guard_left(kmem_flags_t flags)
221 {
222 	return (flags & KMEM_GUARD_FIRST) ? PAGE_SIZE : 0;
223 }
224 
225 static inline vm_size_t
__kmem_guard_right(kmem_flags_t flags)226 __kmem_guard_right(kmem_flags_t flags)
227 {
228 	return (flags & KMEM_GUARD_LAST) ? PAGE_SIZE : 0;
229 }
230 
231 static inline vm_size_t
__kmem_guard_size(kmem_flags_t flags)232 __kmem_guard_size(kmem_flags_t flags)
233 {
234 	return __kmem_guard_left(flags) + __kmem_guard_right(flags);
235 }
236 
237 __pure2
238 static inline vm_size_t
__kmem_entry_orig_size(vm_map_entry_t entry)239 __kmem_entry_orig_size(vm_map_entry_t entry)
240 {
241 	vm_object_t object = VME_OBJECT(entry);
242 
243 	if (entry->vme_kernel_object) {
244 		return entry->vme_end - entry->vme_start -
245 		       entry->vme_object_or_delta;
246 	} else {
247 		return object->vo_size - object->vo_size_delta;
248 	}
249 }
250 
251 
252 #pragma mark kmem range methods
253 
254 #define mach_vm_range_load(r, rmin, rmax) \
255 	({ (rmin) = (r)->min_address; (rmax) = (r)->max_address; })
256 
257 __abortlike
258 static void
__mach_vm_range_overflow(mach_vm_offset_t addr,mach_vm_offset_t size)259 __mach_vm_range_overflow(
260 	mach_vm_offset_t        addr,
261 	mach_vm_offset_t        size)
262 {
263 	panic("invalid vm range: [0x%llx, 0x%llx + 0x%llx) wraps around",
264 	    addr, addr, size);
265 }
266 
267 __abortlike
268 static void
__mach_vm_range_invalid(mach_vm_offset_t min_address,mach_vm_offset_t max_address)269 __mach_vm_range_invalid(
270 	mach_vm_offset_t        min_address,
271 	mach_vm_offset_t        max_address)
272 {
273 	panic("invalid vm range: [0x%llx, 0x%llx) wraps around",
274 	    min_address, max_address);
275 }
276 
277 __header_always_inline mach_vm_size_t
mach_vm_range_size(const struct mach_vm_range * r)278 mach_vm_range_size(const struct mach_vm_range *r)
279 {
280 	mach_vm_offset_t rmin, rmax;
281 
282 	mach_vm_range_load(r, rmin, rmax);
283 	return rmax - rmin;
284 }
285 
286 __attribute__((overloadable))
287 __header_always_inline bool
mach_vm_range_contains(const struct mach_vm_range * r,mach_vm_offset_t addr)288 mach_vm_range_contains(const struct mach_vm_range *r, mach_vm_offset_t addr)
289 {
290 	mach_vm_offset_t rmin, rmax;
291 
292 #if CONFIG_KERNEL_TAGGING
293 	if (VM_KERNEL_ADDRESS(addr)) {
294 		addr = vm_memtag_canonicalize_kernel(addr);
295 	}
296 #endif /* CONFIG_KERNEL_TAGGING */
297 
298 	/*
299 	 * The `&` is not a typo: we really expect the check to pass,
300 	 * so encourage the compiler to eagerly load and test without branches
301 	 */
302 	mach_vm_range_load(r, rmin, rmax);
303 	return (addr >= rmin) & (addr < rmax);
304 }
305 
306 __attribute__((overloadable))
307 __header_always_inline bool
mach_vm_range_contains(const struct mach_vm_range * r,mach_vm_offset_t addr,mach_vm_offset_t size)308 mach_vm_range_contains(
309 	const struct mach_vm_range *r,
310 	mach_vm_offset_t        addr,
311 	mach_vm_offset_t        size)
312 {
313 	mach_vm_offset_t rmin, rmax;
314 
315 #if CONFIG_KERNEL_TAGGING
316 	if (VM_KERNEL_ADDRESS(addr)) {
317 		addr = vm_memtag_canonicalize_kernel(addr);
318 	}
319 #endif /* CONFIG_KERNEL_TAGGING */
320 
321 	mach_vm_offset_t end;
322 	if (__improbable(os_add_overflow(addr, size, &end))) {
323 		return false;
324 	}
325 
326 	/*
327 	 *	 The `&` is not a typo: we really expect the check to pass,
328 	 *   so encourage the compiler to eagerly load and test without branches
329 	 */
330 	mach_vm_range_load(r, rmin, rmax);
331 	return (addr >= rmin) & (end >= rmin) & (end <= rmax);
332 }
333 
334 __attribute__((overloadable))
335 __header_always_inline bool
mach_vm_range_intersects(const struct mach_vm_range * r1,const struct mach_vm_range * r2)336 mach_vm_range_intersects(
337 	const struct mach_vm_range *r1,
338 	const struct mach_vm_range *r2)
339 {
340 	mach_vm_offset_t r1_min, r1_max;
341 	mach_vm_offset_t r2_min, r2_max;
342 
343 	mach_vm_range_load(r1, r1_min, r1_max);
344 	r2_min = r2->min_address;
345 	r2_max = r2->max_address;
346 
347 	if (r1_min > r1_max) {
348 		__mach_vm_range_invalid(r1_min, r1_max);
349 	}
350 
351 	if (r2_min > r2_max) {
352 		__mach_vm_range_invalid(r2_min, r2_max);
353 	}
354 
355 	return r1_max > r2_min && r1_min < r2_max;
356 }
357 
358 __attribute__((overloadable))
359 __header_always_inline bool
mach_vm_range_intersects(const struct mach_vm_range * r1,mach_vm_offset_t addr,mach_vm_offset_t size)360 mach_vm_range_intersects(
361 	const struct mach_vm_range *r1,
362 	mach_vm_offset_t        addr,
363 	mach_vm_offset_t        size)
364 {
365 	struct mach_vm_range r2;
366 
367 #if CONFIG_KERNEL_TAGGING
368 	addr = VM_KERNEL_STRIP_UPTR(addr);
369 #endif /* CONFIG_KERNEL_TAGGING */
370 
371 	r2.min_address = addr;
372 	if (os_add_overflow(addr, size, &r2.max_address)) {
373 		__mach_vm_range_overflow(addr, size);
374 	}
375 
376 	return mach_vm_range_intersects(r1, &r2);
377 }
378 
379 bool
kmem_range_id_contains(kmem_range_id_t range_id,vm_map_offset_t addr,vm_map_size_t size)380 kmem_range_id_contains(
381 	kmem_range_id_t         range_id,
382 	vm_map_offset_t         addr,
383 	vm_map_size_t           size)
384 {
385 	return mach_vm_range_contains(&kmem_ranges[range_id], addr, size);
386 }
387 
388 __abortlike
389 static void
kmem_range_invalid_panic(kmem_range_id_t range_id,vm_map_offset_t addr,vm_map_size_t size)390 kmem_range_invalid_panic(
391 	kmem_range_id_t         range_id,
392 	vm_map_offset_t         addr,
393 	vm_map_size_t           size)
394 {
395 	const struct mach_vm_range *r = &kmem_ranges[range_id];
396 	mach_vm_offset_t rmin, rmax;
397 
398 	mach_vm_range_load(r, rmin, rmax);
399 	if (addr + size < rmin) {
400 		panic("addr %p + size %llu overflows %p", (void *)addr, size,
401 		    (void *)(addr + size));
402 	}
403 	panic("addr %p + size %llu doesnt fit in one range (id: %u min: %p max: %p)",
404 	    (void *)addr, size, range_id, (void *)rmin, (void *)rmax);
405 }
406 
407 /*
408  * Return whether the entire allocation is contained in the given range
409  */
410 static bool
kmem_range_contains_fully(kmem_range_id_t range_id,vm_map_offset_t addr,vm_map_size_t size)411 kmem_range_contains_fully(
412 	kmem_range_id_t         range_id,
413 	vm_map_offset_t         addr,
414 	vm_map_size_t           size)
415 {
416 	const struct mach_vm_range *r = &kmem_ranges[range_id];
417 	mach_vm_offset_t rmin, rmax;
418 	bool result = false;
419 
420 	if (VM_KERNEL_ADDRESS(addr)) {
421 		addr = vm_memtag_canonicalize_kernel(addr);
422 	}
423 
424 	/*
425 	 * The `&` is not a typo: we really expect the check to pass,
426 	 * so encourage the compiler to eagerly load and test without branches
427 	 */
428 	mach_vm_range_load(r, rmin, rmax);
429 	result = (addr >= rmin) & (addr < rmax);
430 	if (__improbable(result
431 	    && ((addr + size < rmin) || (addr + size > rmax)))) {
432 		kmem_range_invalid_panic(range_id, addr, size);
433 	}
434 	return result;
435 }
436 
437 vm_map_size_t
kmem_range_id_size(kmem_range_id_t range_id)438 kmem_range_id_size(kmem_range_id_t range_id)
439 {
440 	return mach_vm_range_size(&kmem_ranges[range_id]);
441 }
442 
443 kmem_range_id_t
kmem_addr_get_range(vm_map_offset_t addr,vm_map_size_t size)444 kmem_addr_get_range(vm_map_offset_t addr, vm_map_size_t size)
445 {
446 	kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST;
447 
448 	for (; range_id < KMEM_RANGE_COUNT; range_id++) {
449 		if (kmem_range_contains_fully(range_id, addr, size)) {
450 			return range_id;
451 		}
452 	}
453 	return KMEM_RANGE_ID_NONE;
454 }
455 
456 bool
kmem_is_ptr_range(vm_map_range_id_t range_id)457 kmem_is_ptr_range(vm_map_range_id_t range_id)
458 {
459 	return (range_id >= KMEM_RANGE_ID_FIRST) &&
460 	       (range_id <= KMEM_RANGE_ID_NUM_PTR);
461 }
462 
463 __abortlike
464 static void
kmem_range_invalid_for_overwrite(vm_map_offset_t addr)465 kmem_range_invalid_for_overwrite(vm_map_offset_t addr)
466 {
467 	panic("Can't overwrite mappings (addr: %p) in kmem ptr ranges",
468 	    (void *)addr);
469 }
470 
471 mach_vm_range_t
kmem_validate_range_for_overwrite(vm_map_offset_t addr,vm_map_size_t size)472 kmem_validate_range_for_overwrite(
473 	vm_map_offset_t         addr,
474 	vm_map_size_t           size)
475 {
476 	vm_map_range_id_t range_id = kmem_addr_get_range(addr, size);
477 
478 	if (kmem_is_ptr_range(range_id)) {
479 		kmem_range_invalid_for_overwrite(addr);
480 	}
481 
482 	return &kmem_ranges[range_id];
483 }
484 
485 
486 #pragma mark entry parameters
487 
488 
489 __abortlike
490 static void
__kmem_entry_validate_panic(vm_map_t map,vm_map_entry_t entry,vm_offset_t addr,vm_size_t size,uint32_t flags,kmem_guard_t guard)491 __kmem_entry_validate_panic(
492 	vm_map_t        map,
493 	vm_map_entry_t  entry,
494 	vm_offset_t     addr,
495 	vm_size_t       size,
496 	uint32_t        flags,
497 	kmem_guard_t    guard)
498 {
499 	const char *what = "???";
500 
501 	if (entry->vme_atomic != guard.kmg_atomic) {
502 		what = "atomicity";
503 	} else if (entry->is_sub_map != guard.kmg_submap) {
504 		what = "objectness";
505 	} else if (addr != entry->vme_start) {
506 		what = "left bound";
507 	} else if ((flags & KMF_GUESS_SIZE) == 0 && addr + size != entry->vme_end) {
508 		what = "right bound";
509 	} else if (guard.kmg_context != entry->vme_context) {
510 		what = "guard";
511 	}
512 
513 	panic("kmem(map=%p, addr=%p, size=%zd, flags=0x%x): "
514 	    "entry:%p %s mismatch guard(0x%08x)",
515 	    map, (void *)addr, size, flags, entry,
516 	    what, guard.kmg_context);
517 }
518 
519 static bool
__kmem_entry_validate_guard(vm_map_entry_t entry,vm_offset_t addr,vm_size_t size,kmem_flags_t flags,kmem_guard_t guard)520 __kmem_entry_validate_guard(
521 	vm_map_entry_t  entry,
522 	vm_offset_t     addr,
523 	vm_size_t       size,
524 	kmem_flags_t    flags,
525 	kmem_guard_t    guard)
526 {
527 	if (entry->vme_atomic != guard.kmg_atomic) {
528 		return false;
529 	}
530 
531 	if (!guard.kmg_atomic) {
532 		return true;
533 	}
534 
535 	if (entry->is_sub_map != guard.kmg_submap) {
536 		return false;
537 	}
538 
539 	if (addr != entry->vme_start) {
540 		return false;
541 	}
542 
543 	if ((flags & KMEM_GUESS_SIZE) == 0 && addr + size != entry->vme_end) {
544 		return false;
545 	}
546 
547 	if (!guard.kmg_submap && guard.kmg_context != entry->vme_context) {
548 		return false;
549 	}
550 
551 	return true;
552 }
553 
554 void
kmem_entry_validate_guard(vm_map_t map,vm_map_entry_t entry,vm_offset_t addr,vm_size_t size,kmem_guard_t guard)555 kmem_entry_validate_guard(
556 	vm_map_t        map,
557 	vm_map_entry_t  entry,
558 	vm_offset_t     addr,
559 	vm_size_t       size,
560 	kmem_guard_t    guard)
561 {
562 	if (!__kmem_entry_validate_guard(entry, addr, size, KMEM_NONE, guard)) {
563 		__kmem_entry_validate_panic(map, entry, addr, size, KMEM_NONE, guard);
564 	}
565 }
566 
567 __abortlike
568 static void
__kmem_entry_validate_object_panic(vm_map_t map,vm_map_entry_t entry,kmem_flags_t flags)569 __kmem_entry_validate_object_panic(
570 	vm_map_t        map,
571 	vm_map_entry_t  entry,
572 	kmem_flags_t    flags)
573 {
574 	const char *what;
575 	const char *verb;
576 
577 	if (entry->is_sub_map) {
578 		panic("kmem(map=%p) entry %p is a submap", map, entry);
579 	}
580 
581 	if (flags & KMEM_KOBJECT) {
582 		what = "kernel";
583 		verb = "isn't";
584 	} else if (flags & KMEM_COMPRESSOR) {
585 		what = "compressor";
586 		verb = "isn't";
587 	} else if (entry->vme_kernel_object) {
588 		what = "kernel";
589 		verb = "is unexpectedly";
590 	} else {
591 		what = "compressor";
592 		verb = "is unexpectedly";
593 	}
594 
595 	panic("kmem(map=%p, flags=0x%x): entry %p %s for the %s object",
596 	    map, flags, entry, verb, what);
597 }
598 
599 static bool
__kmem_entry_validate_object(vm_map_entry_t entry,kmem_flags_t flags)600 __kmem_entry_validate_object(
601 	vm_map_entry_t  entry,
602 	kmem_flags_t    flags)
603 {
604 	if (entry->is_sub_map) {
605 		return false;
606 	}
607 	if ((bool)(flags & KMEM_KOBJECT) != entry->vme_kernel_object) {
608 		return false;
609 	}
610 
611 	return (bool)(flags & KMEM_COMPRESSOR) ==
612 	       (VME_OBJECT(entry) == compressor_object);
613 }
614 
615 vm_size_t
kmem_size_guard(vm_map_t map,vm_offset_t addr,kmem_guard_t guard)616 kmem_size_guard(
617 	vm_map_t        map,
618 	vm_offset_t     addr,
619 	kmem_guard_t    guard)
620 {
621 	kmem_flags_t flags = KMEM_GUESS_SIZE;
622 	vm_map_entry_t entry;
623 	vm_size_t size;
624 
625 	vm_map_lock_read(map);
626 
627 #if KASAN_CLASSIC
628 	addr -= PAGE_SIZE;
629 #endif /* KASAN_CLASSIC */
630 	addr = vm_memtag_canonicalize_kernel(addr);
631 
632 	if (!vm_map_lookup_entry(map, addr, &entry)) {
633 		__kmem_entry_not_found_panic(map, addr);
634 	}
635 
636 	if (!__kmem_entry_validate_guard(entry, addr, 0, flags, guard)) {
637 		__kmem_entry_validate_panic(map, entry, addr, 0, flags, guard);
638 	}
639 
640 	size = __kmem_entry_orig_size(entry);
641 
642 	vm_map_unlock_read(map);
643 
644 	return size;
645 }
646 
647 static inline uint16_t
kmem_hash_backtrace(void * fp)648 kmem_hash_backtrace(
649 	void                     *fp)
650 {
651 	uint64_t  bt_count;
652 	uintptr_t bt[8] = {};
653 
654 	struct backtrace_control ctl = {
655 		.btc_frame_addr = (uintptr_t)fp,
656 	};
657 
658 	bt_count = backtrace(bt, sizeof(bt) / sizeof(bt[0]), &ctl, NULL);
659 	return (uint16_t) os_hash_jenkins(bt, bt_count * sizeof(bt[0]));
660 }
661 
662 static_assert(KMEM_RANGE_ID_DATA - 1 <= KMEM_RANGE_MASK,
663     "Insufficient bits to represent ptr ranges");
664 
665 kmem_range_id_t
kmem_adjust_range_id(uint32_t hash)666 kmem_adjust_range_id(
667 	uint32_t                  hash)
668 {
669 	return (kmem_range_id_t) (KMEM_RANGE_ID_PTR_0 +
670 	       (hash & KMEM_RANGE_MASK) % kmem_ptr_ranges);
671 }
672 
673 static bool
kmem_use_sprayqtn(kma_flags_t kma_flags,vm_map_size_t map_size,vm_offset_t mask)674 kmem_use_sprayqtn(
675 	kma_flags_t               kma_flags,
676 	vm_map_size_t             map_size,
677 	vm_offset_t               mask)
678 {
679 	/*
680 	 * Pointer allocations that are above the guard objects threshold or have
681 	 * leading guard pages with non standard alignment requests are redirected
682 	 * to the sprayqtn range.
683 	 */
684 #if DEBUG || DEVELOPMENT
685 	btref_get_flags_t flags = (kma_flags & KMA_NOPAGEWAIT) ?
686 	    BTREF_GET_NOWAIT : 0;
687 
688 	if ((kma_flags & KMA_SPRAYQTN) == 0) {
689 		if (map_size > KMEM_GOBJ_THRESHOLD) {
690 			btlog_record(kmem_outlier_log, (void *)map_size, KMEM_OUTLIER_SIZE,
691 			    btref_get(__builtin_frame_address(0), flags));
692 		} else if ((kma_flags & KMA_GUARD_FIRST) && (mask > PAGE_MASK)) {
693 			btlog_record(kmem_outlier_log, (void *)mask, KMEM_OUTLIER_ALIGN,
694 			    btref_get(__builtin_frame_address(0), flags));
695 		}
696 	}
697 #endif /* DEBUG || DEVELOPMENT */
698 
699 	return (kma_flags & KMA_SPRAYQTN) ||
700 	       (map_size > KMEM_GOBJ_THRESHOLD) ||
701 	       ((kma_flags & KMA_GUARD_FIRST) && (mask > PAGE_MASK));
702 }
703 
704 static void
kmem_apply_security_policy(vm_map_t map,kma_flags_t kma_flags,kmem_guard_t guard,vm_map_size_t map_size,vm_offset_t mask,vm_map_kernel_flags_t * vmk_flags,bool assert_dir __unused)705 kmem_apply_security_policy(
706 	vm_map_t                  map,
707 	kma_flags_t               kma_flags,
708 	kmem_guard_t              guard,
709 	vm_map_size_t             map_size,
710 	vm_offset_t               mask,
711 	vm_map_kernel_flags_t    *vmk_flags,
712 	bool                      assert_dir __unused)
713 {
714 	kmem_range_id_t range_id;
715 	bool from_right;
716 	uint16_t type_hash = guard.kmg_type_hash;
717 
718 	if (startup_phase < STARTUP_SUB_KMEM || map != kernel_map) {
719 		return;
720 	}
721 
722 	/*
723 	 * A non-zero type-hash must be passed by krealloc_type
724 	 */
725 #if (DEBUG || DEVELOPMENT)
726 	if (assert_dir && !(kma_flags & (KMA_DATA | KMA_DATA_SHARED))) {
727 		assert(type_hash != 0);
728 	}
729 #endif
730 
731 	if (kma_flags & (KMA_DATA | KMA_DATA_SHARED)) {
732 		range_id  = KMEM_RANGE_ID_DATA;
733 		/*
734 		 * As an optimization in KMA_DATA to avoid fragmentation,
735 		 * allocate static carveouts at the end of the DATA range.
736 		 */
737 		from_right = (bool)(kma_flags & KMA_PERMANENT);
738 	} else if (kmem_use_sprayqtn(kma_flags, map_size, mask)) {
739 		range_id = KMEM_RANGE_ID_SPRAYQTN;
740 		from_right = (bool)(kma_flags & KMA_PERMANENT);
741 	} else if (type_hash) {
742 		range_id  = (kmem_range_id_t)(type_hash & KMEM_RANGE_MASK);
743 		from_right = type_hash & KMEM_DIRECTION_MASK;
744 	} else {
745 		/*
746 		 * Range id needs to correspond to one of the PTR ranges
747 		 */
748 		type_hash = (uint16_t) kmem_hash_backtrace(__builtin_frame_address(0));
749 		range_id  = kmem_adjust_range_id(type_hash);
750 		from_right = type_hash & KMEM_DIRECTION_MASK;
751 	}
752 
753 	vmk_flags->vmkf_range_id = range_id;
754 	vmk_flags->vmkf_last_free = from_right;
755 }
756 
757 #pragma mark allocation
758 
759 static kmem_return_t
760 kmem_alloc_guard_internal(
761 	vm_map_t                map,
762 	vm_size_t               size,
763 	vm_offset_t             mask,
764 	kma_flags_t             flags,
765 	kmem_guard_t            guard,
766 	kern_return_t         (^alloc_pages)(vm_size_t, kma_flags_t, vm_page_t *))
767 {
768 	vm_object_t             object;
769 	vm_offset_t             delta = 0;
770 	vm_map_entry_t          entry = NULL;
771 	vm_map_offset_t         map_addr, fill_start;
772 	vm_map_size_t           map_size, fill_size;
773 	vm_page_t               guard_left = VM_PAGE_NULL;
774 	vm_page_t               guard_right = VM_PAGE_NULL;
775 	vm_page_t               wired_page_list = VM_PAGE_NULL;
776 	vm_map_kernel_flags_t   vmk_flags = VM_MAP_KERNEL_FLAGS_ANYWHERE();
777 	bool                    skip_guards;
778 	kmem_return_t           kmr = { };
779 
780 	assert(kernel_map && map->pmap == kernel_pmap);
781 
782 #if DEBUG || DEVELOPMENT
783 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_START,
784 	    size, 0, 0, 0);
785 #endif
786 
787 
788 	if (size == 0 ||
789 	    (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) ||
790 	    (size < __kmem_guard_size(ANYF(flags)))) {
791 		__kmem_invalid_size_panic(map, size, flags);
792 	}
793 
794 	/*
795 	 * limit the size of a single extent of wired memory
796 	 * to try and limit the damage to the system if
797 	 * too many pages get wired down
798 	 * limit raised to 2GB with 128GB max physical limit,
799 	 * but scaled by installed memory above this
800 	 *
801 	 * Note: kmem_alloc_contig_guard() is immune to this check.
802 	 */
803 	if (__improbable(!(flags & (KMA_VAONLY | KMA_PAGEABLE)) &&
804 	    alloc_pages == NULL &&
805 	    size > MAX(1ULL << 31, sane_size / 64))) {
806 		kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
807 		goto out_error;
808 	}
809 
810 #if 136275805
811 	/*
812 	 * XXX: Redundantly check the mapping size here so that failure stack traces
813 	 *      are more useful. This has no functional value but is helpful because
814 	 *      telemetry traps can currently only capture the last five calls and
815 	 *      so we want to trap as shallow as possible in a select few cases
816 	 *      where we anticipate issues.
817 	 *
818 	 *      When telemetry collection is complete, this will be removed.
819 	 */
820 	if (__improbable(!vm_map_is_map_size_valid(
821 		    kernel_map, size, flags & KMA_NOSOFTLIMIT))) {
822 		kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
823 		goto out_error;
824 	}
825 #endif /* 136275805 */
826 
827 	/*
828 	 * Guard pages:
829 	 *
830 	 * Guard pages are implemented as fictitious pages.
831 	 *
832 	 * However, some maps, and some objects are known
833 	 * to manage their memory explicitly, and do not need
834 	 * those to be materialized, which saves memory.
835 	 *
836 	 * By placing guard pages on either end of a stack,
837 	 * they can help detect cases where a thread walks
838 	 * off either end of its stack.
839 	 *
840 	 * They are allocated and set up here and attempts
841 	 * to access those pages are trapped in vm_fault_page().
842 	 *
843 	 * The map_size we were passed may include extra space for
844 	 * guard pages. fill_size represents the actual size to populate.
845 	 * Similarly, fill_start indicates where the actual pages
846 	 * will begin in the range.
847 	 */
848 
849 	map_size   = round_page(size);
850 	fill_start = 0;
851 	fill_size  = map_size - __kmem_guard_size(ANYF(flags));
852 
853 #if KASAN_CLASSIC
854 	if (flags & KMA_KASAN_GUARD) {
855 		assert((flags & (KMA_GUARD_FIRST | KMA_GUARD_LAST)) == 0);
856 		flags |= KMA_GUARD_FIRST | KMEM_GUARD_LAST;
857 		delta     = ptoa(2);
858 		map_size += delta;
859 	}
860 #else
861 	(void)delta;
862 #endif /* KASAN_CLASSIC */
863 
864 	skip_guards = (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) ||
865 	    map->never_faults;
866 
867 	if (flags & KMA_GUARD_FIRST) {
868 		vmk_flags.vmkf_guard_before = true;
869 		fill_start += PAGE_SIZE;
870 	}
871 	if (flags & KMA_NOSOFTLIMIT) {
872 		vmk_flags.vmkf_no_soft_limit = true;
873 	}
874 	if ((flags & KMA_GUARD_FIRST) && !skip_guards) {
875 		guard_left = vm_page_create_guard((flags & KMA_NOPAGEWAIT) == 0);
876 		if (__improbable(guard_left == VM_PAGE_NULL)) {
877 			kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
878 			goto out_error;
879 		}
880 	}
881 	if ((flags & KMA_GUARD_LAST) && !skip_guards) {
882 		guard_right = vm_page_create_guard((flags & KMA_NOPAGEWAIT) == 0);
883 		if (__improbable(guard_right == VM_PAGE_NULL)) {
884 			kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
885 			goto out_error;
886 		}
887 	}
888 
889 	if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
890 		if (alloc_pages) {
891 			kmr.kmr_return = alloc_pages(fill_size, flags,
892 			    &wired_page_list);
893 		} else {
894 			kmr.kmr_return = vm_page_alloc_list(atop(fill_size), flags,
895 			    &wired_page_list);
896 		}
897 		if (__improbable(kmr.kmr_return != KERN_SUCCESS)) {
898 			goto out_error;
899 		}
900 	}
901 
902 	/*
903 	 *	Allocate a new object (if necessary).  We must do this before
904 	 *	locking the map, or risk deadlock with the default pager.
905 	 */
906 	if (flags & KMA_KOBJECT) {
907 		{
908 			object = kernel_object_default;
909 		}
910 		vm_object_reference(object);
911 	} else if (flags & KMA_COMPRESSOR) {
912 		object = compressor_object;
913 		vm_object_reference(object);
914 	} else {
915 		object = vm_object_allocate(map_size, map->serial_id);
916 		vm_object_lock(object);
917 		vm_object_set_size(object, map_size, size);
918 		/* stabilize the object to prevent shadowing */
919 		object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
920 		VM_OBJECT_SET_TRUE_SHARE(object, TRUE);
921 		vm_object_unlock(object);
922 	}
923 
924 	if (flags & KMA_LAST_FREE) {
925 		vmk_flags.vmkf_last_free = true;
926 	}
927 	if (flags & KMA_PERMANENT) {
928 		vmk_flags.vmf_permanent = true;
929 	}
930 	kmem_apply_security_policy(map, flags, guard, map_size, mask, &vmk_flags,
931 	    false);
932 
933 	kmr.kmr_return = vm_map_find_space(map, 0, map_size, mask,
934 	    vmk_flags, &entry);
935 	if (__improbable(KERN_SUCCESS != kmr.kmr_return)) {
936 		vm_object_deallocate(object);
937 		goto out_error;
938 	}
939 
940 	map_addr = entry->vme_start;
941 	VME_OBJECT_SET(entry, object, guard.kmg_atomic, guard.kmg_context);
942 	VME_ALIAS_SET(entry, guard.kmg_tag);
943 	if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
944 		VME_OFFSET_SET(entry, map_addr);
945 	}
946 
947 #if KASAN
948 	if ((flags & KMA_KOBJECT) && guard.kmg_atomic) {
949 		entry->vme_object_or_delta = (-size & PAGE_MASK) + delta;
950 	}
951 #endif /* KASAN */
952 
953 	if (!(flags & (KMA_COMPRESSOR | KMA_PAGEABLE))) {
954 		entry->wired_count = 1;
955 		vme_btref_consider_and_set(entry, __builtin_frame_address(0));
956 	}
957 
958 	if (guard_left || guard_right || wired_page_list) {
959 		vm_object_offset_t offset = 0ull;
960 
961 		vm_object_lock(object);
962 		vm_map_unlock(map);
963 
964 		if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
965 			offset = map_addr;
966 		}
967 
968 		if (guard_left) {
969 			vm_page_insert(guard_left, object, offset);
970 			guard_left->vmp_busy = FALSE;
971 			guard_left = VM_PAGE_NULL;
972 		}
973 
974 		if (guard_right) {
975 			vm_page_insert(guard_right, object,
976 			    offset + fill_start + fill_size);
977 			guard_right->vmp_busy = FALSE;
978 			guard_right = VM_PAGE_NULL;
979 		}
980 
981 		if (wired_page_list) {
982 			kernel_memory_populate_object_and_unlock(object,
983 			    map_addr + fill_start, offset + fill_start, fill_size,
984 			    wired_page_list, flags, guard.kmg_tag, VM_PROT_DEFAULT,
985 			    __kmem_mapping_type(ANYF(flags)));
986 		} else {
987 			vm_object_unlock(object);
988 		}
989 	} else {
990 		vm_map_unlock(map);
991 	}
992 
993 	/*
994 	 * now that the pages are wired, we no longer have to fear coalesce
995 	 */
996 	if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
997 		vm_map_simplify(map, map_addr);
998 	}
999 
1000 #if DEBUG || DEVELOPMENT
1001 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
1002 	    atop(fill_size), 0, 0, 0);
1003 #endif /* DEBUG || DEVELOPMENT */
1004 	kmr.kmr_address = CAST_DOWN(vm_offset_t, map_addr);
1005 
1006 #if KASAN
1007 	if (flags & (KMA_KASAN_GUARD | KMA_PAGEABLE)) {
1008 		/*
1009 		 * We need to allow the range for pageable memory,
1010 		 * or faulting will not be allowed.
1011 		 */
1012 		kasan_notify_address(map_addr, map_size);
1013 	}
1014 #endif /* KASAN */
1015 #if KASAN_CLASSIC
1016 	if (flags & KMA_KASAN_GUARD) {
1017 		kmr.kmr_address += PAGE_SIZE;
1018 		kasan_alloc_large(kmr.kmr_address, size);
1019 	}
1020 #endif /* KASAN_CLASSIC */
1021 #if CONFIG_KERNEL_TAGGING
1022 	if (!(flags & KMA_VAONLY) && (flags & KMA_TAG)) {
1023 		kmr.kmr_ptr = vm_memtag_generate_and_store_tag((caddr_t)kmr.kmr_address + fill_start, fill_size);
1024 		kmr.kmr_ptr = (caddr_t)kmr.kmr_ptr - fill_start;
1025 #if KASAN_TBI
1026 		kasan_tbi_retag_unused_space(kmr.kmr_ptr, map_size, size);
1027 #endif /* KASAN_TBI */
1028 	}
1029 #endif /* CONFIG_KERNEL_TAGGING */
1030 	return kmr;
1031 
1032 out_error:
1033 	if (flags & KMA_NOFAIL) {
1034 		__kmem_failed_panic(map, size, flags, kmr.kmr_return, "alloc");
1035 	}
1036 	if (guard_left) {
1037 		guard_left->vmp_snext = wired_page_list;
1038 		wired_page_list = guard_left;
1039 	}
1040 	if (guard_right) {
1041 		guard_right->vmp_snext = wired_page_list;
1042 		wired_page_list = guard_right;
1043 	}
1044 	if (wired_page_list) {
1045 		vm_page_free_list(wired_page_list, FALSE);
1046 	}
1047 
1048 #if DEBUG || DEVELOPMENT
1049 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
1050 	    0, 0, 0, 0);
1051 #endif /* DEBUG || DEVELOPMENT */
1052 
1053 	return kmr;
1054 }
1055 
1056 kmem_return_t
kmem_alloc_guard(vm_map_t map,vm_size_t size,vm_offset_t mask,kma_flags_t flags,kmem_guard_t guard)1057 kmem_alloc_guard(
1058 	vm_map_t        map,
1059 	vm_size_t       size,
1060 	vm_offset_t     mask,
1061 	kma_flags_t     flags,
1062 	kmem_guard_t    guard)
1063 {
1064 	return kmem_alloc_guard_internal(map, size, mask, flags, guard, NULL);
1065 }
1066 
1067 kmem_return_t
kmem_alloc_contig_guard(vm_map_t map,vm_size_t size,vm_offset_t mask,ppnum_t max_pnum,ppnum_t pnum_mask,kma_flags_t flags,kmem_guard_t guard)1068 kmem_alloc_contig_guard(
1069 	vm_map_t                map,
1070 	vm_size_t               size,
1071 	vm_offset_t             mask,
1072 	ppnum_t                 max_pnum,
1073 	ppnum_t                 pnum_mask,
1074 	kma_flags_t             flags,
1075 	kmem_guard_t            guard)
1076 {
1077 	__auto_type alloc_pages = ^(vm_size_t fill_size, kma_flags_t kma_flags, vm_page_t *pages) {
1078 		return cpm_allocate(fill_size, pages, max_pnum, pnum_mask, FALSE, kma_flags);
1079 	};
1080 
1081 	return kmem_alloc_guard_internal(map, size, mask, flags, guard, alloc_pages);
1082 }
1083 
1084 kmem_return_t
kmem_suballoc(vm_map_t parent,mach_vm_offset_t * addr,vm_size_t size,vm_map_create_options_t vmc_options,int vm_flags,kms_flags_t flags,vm_tag_t tag)1085 kmem_suballoc(
1086 	vm_map_t                parent,
1087 	mach_vm_offset_t       *addr,
1088 	vm_size_t               size,
1089 	vm_map_create_options_t vmc_options,
1090 	int                     vm_flags,
1091 	kms_flags_t             flags,
1092 	vm_tag_t                tag)
1093 {
1094 	vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1095 	vm_map_offset_t map_addr = 0;
1096 	kmem_return_t kmr = { };
1097 	vm_map_t map;
1098 
1099 	assert(page_aligned(size));
1100 	assert(parent->pmap == kernel_pmap);
1101 
1102 	vm_map_kernel_flags_set_vmflags(&vmk_flags, vm_flags, tag);
1103 
1104 	if (parent == kernel_map) {
1105 		assert(vmk_flags.vmf_overwrite || (flags & KMS_DATA));
1106 	}
1107 
1108 	if (vmk_flags.vmf_fixed) {
1109 		map_addr = trunc_page(*addr);
1110 	}
1111 
1112 	pmap_reference(vm_map_pmap(parent));
1113 	map = vm_map_create_options(vm_map_pmap(parent), 0, size, vmc_options);
1114 
1115 	/*
1116 	 * 1. vm_map_enter() will consume one ref on success.
1117 	 *
1118 	 * 2. make the entry atomic as kernel submaps should never be split.
1119 	 *
1120 	 * 3. instruct vm_map_enter() that it is a fresh submap
1121 	 *    that needs to be taught its bounds as it inserted.
1122 	 */
1123 	vm_map_reference(map);
1124 
1125 	vmk_flags.vmkf_submap = true;
1126 	if ((flags & KMS_DATA) == 0) {
1127 		/* FIXME: IOKit submaps get fragmented and can't be atomic */
1128 		vmk_flags.vmkf_submap_atomic = true;
1129 	}
1130 	vmk_flags.vmkf_submap_adjust = true;
1131 	if (flags & KMS_LAST_FREE) {
1132 		vmk_flags.vmkf_last_free = true;
1133 	}
1134 	if (flags & KMS_PERMANENT) {
1135 		vmk_flags.vmf_permanent = true;
1136 	}
1137 	if (flags & KMS_DATA) {
1138 		vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
1139 	}
1140 	if (flags & KMS_NOSOFTLIMIT) {
1141 		vmk_flags.vmkf_no_soft_limit = true;
1142 	}
1143 
1144 	kmr.kmr_return = vm_map_enter(parent, &map_addr, size, 0,
1145 	    vmk_flags, (vm_object_t)map, 0, FALSE,
1146 	    VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1147 
1148 	if (kmr.kmr_return != KERN_SUCCESS) {
1149 		if (flags & KMS_NOFAIL) {
1150 			panic("kmem_suballoc(map=%p, size=%zd) failed with %d",
1151 			    parent, size, kmr.kmr_return);
1152 		}
1153 		assert(os_ref_get_count_raw(&map->map_refcnt) == 2);
1154 		vm_map_deallocate(map);
1155 		vm_map_deallocate(map); /* also removes ref to pmap */
1156 		return kmr;
1157 	}
1158 
1159 	/*
1160 	 * For kmem_suballocs that register a claim and are assigned a range, ensure
1161 	 * that the exact same range is returned.
1162 	 */
1163 	if (*addr != 0 && parent == kernel_map &&
1164 	    startup_phase > STARTUP_SUB_KMEM) {
1165 		assert(CAST_DOWN(vm_offset_t, map_addr) == *addr);
1166 	} else {
1167 		*addr = map_addr;
1168 	}
1169 
1170 	kmr.kmr_submap = map;
1171 	return kmr;
1172 }
1173 
1174 /*
1175  *	kmem_alloc:
1176  *
1177  *	Allocate wired-down memory in the kernel's address map
1178  *	or a submap.  The memory is not zero-filled.
1179  */
1180 
1181 __exported kern_return_t
1182 kmem_alloc_external(
1183 	vm_map_t        map,
1184 	vm_offset_t     *addrp,
1185 	vm_size_t       size);
1186 kern_return_t
kmem_alloc_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1187 kmem_alloc_external(
1188 	vm_map_t        map,
1189 	vm_offset_t     *addrp,
1190 	vm_size_t       size)
1191 {
1192 	if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
1193 		return kmem_alloc(map, addrp, size, KMA_NONE, vm_tag_bt());
1194 	}
1195 	/* Maintain ABI compatibility: invalid sizes used to be allowed */
1196 	return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
1197 }
1198 
1199 
1200 /*
1201  *	kmem_alloc_kobject:
1202  *
1203  *	Allocate wired-down memory in the kernel's address map
1204  *	or a submap.  The memory is not zero-filled.
1205  *
1206  *	The memory is allocated in the kernel_object.
1207  *	It may not be copied with vm_map_copy, and
1208  *	it may not be reallocated with kmem_realloc.
1209  */
1210 
1211 __exported kern_return_t
1212 kmem_alloc_kobject_external(
1213 	vm_map_t        map,
1214 	vm_offset_t     *addrp,
1215 	vm_size_t       size);
1216 kern_return_t
kmem_alloc_kobject_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1217 kmem_alloc_kobject_external(
1218 	vm_map_t        map,
1219 	vm_offset_t     *addrp,
1220 	vm_size_t       size)
1221 {
1222 	if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
1223 		return kmem_alloc(map, addrp, size, KMA_KOBJECT, vm_tag_bt());
1224 	}
1225 	/* Maintain ABI compatibility: invalid sizes used to be allowed */
1226 	return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
1227 }
1228 
1229 /*
1230  *	kmem_alloc_pageable:
1231  *
1232  *	Allocate pageable memory in the kernel's address map.
1233  */
1234 
1235 __exported kern_return_t
1236 kmem_alloc_pageable_external(
1237 	vm_map_t        map,
1238 	vm_offset_t     *addrp,
1239 	vm_size_t       size);
1240 kern_return_t
kmem_alloc_pageable_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1241 kmem_alloc_pageable_external(
1242 	vm_map_t        map,
1243 	vm_offset_t     *addrp,
1244 	vm_size_t       size)
1245 {
1246 	if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
1247 		return kmem_alloc(map, addrp, size, KMA_PAGEABLE | KMA_DATA, vm_tag_bt());
1248 	}
1249 	/* Maintain ABI compatibility: invalid sizes used to be allowed */
1250 	return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
1251 }
1252 
1253 static __attribute__((always_inline, warn_unused_result))
1254 kern_return_t
mach_vm_allocate_kernel_sanitize(vm_map_t map,mach_vm_offset_ut addr_u,mach_vm_size_ut size_u,vm_map_kernel_flags_t vmk_flags,vm_map_offset_t * map_addr,vm_map_size_t * map_size)1255 mach_vm_allocate_kernel_sanitize(
1256 	vm_map_t                map,
1257 	mach_vm_offset_ut       addr_u,
1258 	mach_vm_size_ut         size_u,
1259 	vm_map_kernel_flags_t   vmk_flags,
1260 	vm_map_offset_t        *map_addr,
1261 	vm_map_size_t          *map_size)
1262 {
1263 	kern_return_t   result;
1264 	vm_map_offset_t map_end;
1265 
1266 	if (vmk_flags.vmf_fixed) {
1267 		result = vm_sanitize_addr_size(addr_u, size_u,
1268 		    VM_SANITIZE_CALLER_VM_ALLOCATE_FIXED,
1269 		    map,
1270 		    VM_SANITIZE_FLAGS_SIZE_ZERO_SUCCEEDS | VM_SANITIZE_FLAGS_REALIGN_START,
1271 		    map_addr, &map_end, map_size);
1272 		if (__improbable(result != KERN_SUCCESS)) {
1273 			return result;
1274 		}
1275 	} else {
1276 		*map_addr = 0;
1277 		result = vm_sanitize_size(0, size_u,
1278 		    VM_SANITIZE_CALLER_VM_ALLOCATE_ANYWHERE, map,
1279 		    VM_SANITIZE_FLAGS_SIZE_ZERO_SUCCEEDS,
1280 		    map_size);
1281 		if (__improbable(result != KERN_SUCCESS)) {
1282 			return result;
1283 		}
1284 	}
1285 
1286 	return KERN_SUCCESS;
1287 }
1288 
1289 kern_return_t
mach_vm_allocate_kernel(vm_map_t map,mach_vm_offset_ut * addr_u,mach_vm_size_ut size_u,vm_map_kernel_flags_t vmk_flags)1290 mach_vm_allocate_kernel(
1291 	vm_map_t                map,
1292 	mach_vm_offset_ut      *addr_u,
1293 	mach_vm_size_ut         size_u,
1294 	vm_map_kernel_flags_t   vmk_flags)
1295 {
1296 	vm_map_offset_t map_addr;
1297 	vm_map_size_t   map_size;
1298 	kern_return_t   result;
1299 
1300 	if (map == VM_MAP_NULL) {
1301 		ktriage_record(thread_tid(current_thread()),
1302 		    KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
1303 		    KDBG_TRIAGE_RESERVED,
1304 		    KDBG_TRIAGE_VM_ALLOCATE_KERNEL_BADMAP_ERROR),
1305 		    KERN_INVALID_ARGUMENT /* arg */);
1306 		return KERN_INVALID_ARGUMENT;
1307 	}
1308 
1309 	if (!vm_map_kernel_flags_check_vm_and_kflags(vmk_flags,
1310 	    VM_FLAGS_USER_ALLOCATE)) {
1311 		return KERN_INVALID_ARGUMENT;
1312 	}
1313 
1314 	result = mach_vm_allocate_kernel_sanitize(map,
1315 	    *addr_u,
1316 	    size_u,
1317 	    vmk_flags,
1318 	    &map_addr,
1319 	    &map_size);
1320 	if (__improbable(result != KERN_SUCCESS)) {
1321 		result = vm_sanitize_get_kr(result);
1322 		if (result == KERN_SUCCESS) {
1323 			*addr_u = vm_sanitize_wrap_addr(0);
1324 		} else {
1325 			ktriage_record(thread_tid(current_thread()),
1326 			    KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
1327 			    KDBG_TRIAGE_RESERVED,
1328 			    KDBG_TRIAGE_VM_ALLOCATE_KERNEL_BADSIZE_ERROR),
1329 			    KERN_INVALID_ARGUMENT /* arg */);
1330 		}
1331 		return result;
1332 	}
1333 
1334 	vm_map_kernel_flags_update_range_id(&vmk_flags, map, map_size);
1335 
1336 	result = vm_map_enter(
1337 		map,
1338 		&map_addr,
1339 		map_size,
1340 		(vm_map_offset_t)0,
1341 		vmk_flags,
1342 		VM_OBJECT_NULL,
1343 		(vm_object_offset_t)0,
1344 		FALSE,
1345 		VM_PROT_DEFAULT,
1346 		VM_PROT_ALL,
1347 		VM_INHERIT_DEFAULT);
1348 
1349 	if (result == KERN_SUCCESS) {
1350 #if KASAN
1351 		if (map->pmap == kernel_pmap) {
1352 			kasan_notify_address(map_addr, map_size);
1353 		}
1354 #endif
1355 		*addr_u = vm_sanitize_wrap_addr(map_addr);
1356 	} else {
1357 		ktriage_record(thread_tid(current_thread()),
1358 		    KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
1359 		    KDBG_TRIAGE_RESERVED,
1360 		    KDBG_TRIAGE_VM_ALLOCATE_KERNEL_VMMAPENTER_ERROR),
1361 		    result /* arg */);
1362 	}
1363 	return result;
1364 }
1365 
1366 #pragma mark population
1367 
1368 static void
kernel_memory_populate_pmap_enter(vm_object_t object,vm_address_t addr,vm_object_offset_t offset,vm_page_t mem,vm_prot_t prot,int pe_flags,pmap_mapping_type_t mapping_type)1369 kernel_memory_populate_pmap_enter(
1370 	vm_object_t             object,
1371 	vm_address_t            addr,
1372 	vm_object_offset_t      offset,
1373 	vm_page_t               mem,
1374 	vm_prot_t               prot,
1375 	int                     pe_flags,
1376 	pmap_mapping_type_t     mapping_type)
1377 {
1378 	kern_return_t   pe_result;
1379 	int             pe_options;
1380 
1381 	if (VMP_ERROR_GET(mem)) {
1382 		panic("VM page %p should not have an error", mem);
1383 	}
1384 
1385 	pe_options = PMAP_OPTIONS_NOWAIT;
1386 	if (object->internal) {
1387 		pe_options |= PMAP_OPTIONS_INTERNAL;
1388 	}
1389 	if (mem->vmp_reusable || object->all_reusable) {
1390 		pe_options |= PMAP_OPTIONS_REUSABLE;
1391 	}
1392 
1393 	pe_result = pmap_enter_options(kernel_pmap, addr + offset,
1394 	    VM_PAGE_GET_PHYS_PAGE(mem), prot, VM_PROT_NONE,
1395 	    pe_flags, /* wired */ TRUE, pe_options, NULL, mapping_type);
1396 
1397 	if (pe_result == KERN_RESOURCE_SHORTAGE) {
1398 		vm_object_unlock(object);
1399 
1400 		pe_options &= ~PMAP_OPTIONS_NOWAIT;
1401 
1402 		pe_result = pmap_enter_options(kernel_pmap, addr + offset,
1403 		    VM_PAGE_GET_PHYS_PAGE(mem), prot, VM_PROT_NONE,
1404 		    pe_flags, /* wired */ TRUE, pe_options, NULL, mapping_type);
1405 
1406 		vm_object_lock(object);
1407 	}
1408 
1409 	assert(pe_result == KERN_SUCCESS);
1410 }
1411 
1412 void
kernel_memory_populate_object_and_unlock(vm_object_t object,vm_address_t addr,vm_offset_t offset,vm_size_t size,vm_page_t page_list,kma_flags_t flags,vm_tag_t tag,vm_prot_t prot,pmap_mapping_type_t mapping_type)1413 kernel_memory_populate_object_and_unlock(
1414 	vm_object_t             object, /* must be locked */
1415 	vm_address_t            addr,
1416 	vm_offset_t             offset,
1417 	vm_size_t               size,
1418 	vm_page_t               page_list,
1419 	kma_flags_t             flags,
1420 	vm_tag_t                tag,
1421 	vm_prot_t               prot,
1422 	pmap_mapping_type_t     mapping_type)
1423 {
1424 	vm_page_t       mem;
1425 	int             pe_flags;
1426 	bool            gobbled_list = page_list && page_list->vmp_gobbled;
1427 
1428 	assert(((flags & KMA_KOBJECT) != 0) == (is_kernel_object(object) != 0));
1429 	assert3u((bool)(flags & KMA_COMPRESSOR), ==, object == compressor_object);
1430 
1431 
1432 	if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
1433 		assert3u(offset, ==, addr);
1434 	} else {
1435 		/*
1436 		 * kernel_memory_populate_pmap_enter() might drop the object
1437 		 * lock, and the caller might not own a reference anymore
1438 		 * and rely on holding the vm object lock for liveness.
1439 		 */
1440 		vm_object_reference_locked(object);
1441 	}
1442 
1443 	if (flags & KMA_KSTACK) {
1444 		pe_flags = VM_MEM_STACK;
1445 	} else {
1446 		pe_flags = 0;
1447 	}
1448 
1449 
1450 	for (vm_object_offset_t pg_offset = 0;
1451 	    pg_offset < size;
1452 	    pg_offset += PAGE_SIZE_64) {
1453 		if (page_list == NULL) {
1454 			panic("%s: page_list too short", __func__);
1455 		}
1456 
1457 		mem = page_list;
1458 		page_list = mem->vmp_snext;
1459 		mem->vmp_snext = NULL;
1460 
1461 		assert(mem->vmp_wire_count == 0);
1462 		assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
1463 		assert(vm_page_is_canonical(mem));
1464 
1465 		if (flags & KMA_COMPRESSOR) {
1466 			mem->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
1467 			/*
1468 			 * Background processes doing I/O accounting can call
1469 			 * into NVME driver to do some work which results in
1470 			 * an allocation here and so we want to make sure
1471 			 * that the pages used by compressor, regardless of
1472 			 * process context, are never on the special Q.
1473 			 */
1474 			mem->vmp_on_specialq = VM_PAGE_SPECIAL_Q_EMPTY;
1475 
1476 			vm_page_insert(mem, object, offset + pg_offset);
1477 		} else {
1478 			mem->vmp_q_state = VM_PAGE_IS_WIRED;
1479 			mem->vmp_wire_count = 1;
1480 
1481 
1482 			vm_page_insert_wired(mem, object, offset + pg_offset, tag);
1483 		}
1484 
1485 		mem->vmp_gobbled = false;
1486 		mem->vmp_busy = false;
1487 		mem->vmp_pmapped = true;
1488 		mem->vmp_wpmapped = true;
1489 
1490 		/*
1491 		 * Manual PMAP_ENTER_OPTIONS() with shortcuts
1492 		 * for the kernel and compressor objects.
1493 		 */
1494 		kernel_memory_populate_pmap_enter(object, addr, pg_offset,
1495 		    mem, prot, pe_flags, mapping_type);
1496 
1497 		if (flags & KMA_NOENCRYPT) {
1498 			pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
1499 		}
1500 	}
1501 
1502 	if (page_list) {
1503 		panic("%s: page_list too long", __func__);
1504 	}
1505 
1506 	vm_object_unlock(object);
1507 	if ((flags & (KMA_KOBJECT | KMA_COMPRESSOR)) == 0) {
1508 		vm_object_deallocate(object);
1509 	}
1510 
1511 	/*
1512 	 * Update the accounting:
1513 	 * - the compressor "wired" pages don't really count as wired
1514 	 * - kmem_alloc_contig_guard() gives gobbled pages,
1515 	 *   which already count as wired but need to be ungobbled.
1516 	 */
1517 	if (gobbled_list) {
1518 		vm_page_lockspin_queues();
1519 		if (flags & KMA_COMPRESSOR) {
1520 			vm_page_wire_count -= atop(size);
1521 		}
1522 		vm_page_gobble_count -= atop(size);
1523 		vm_page_unlock_queues();
1524 	} else if ((flags & KMA_COMPRESSOR) == 0) {
1525 		vm_page_lockspin_queues();
1526 		vm_page_wire_count += atop(size);
1527 		vm_page_unlock_queues();
1528 	}
1529 
1530 	if (flags & KMA_KOBJECT) {
1531 		/* vm_page_insert_wired() handles regular objects already */
1532 		vm_tag_update_size(tag, size, NULL);
1533 	}
1534 
1535 #if KASAN
1536 	if (flags & KMA_COMPRESSOR) {
1537 		kasan_notify_address_nopoison(addr, size);
1538 	} else {
1539 		kasan_notify_address(addr, size);
1540 	}
1541 #endif /* KASAN */
1542 }
1543 
1544 
1545 kern_return_t
kernel_memory_populate(vm_offset_t addr,vm_size_t size,kma_flags_t flags,vm_tag_t tag)1546 kernel_memory_populate(
1547 	vm_offset_t     addr,
1548 	vm_size_t       size,
1549 	kma_flags_t     flags,
1550 	vm_tag_t        tag)
1551 {
1552 	kern_return_t   kr = KERN_SUCCESS;
1553 	vm_page_t       page_list = NULL;
1554 	vm_size_t       page_count = atop_64(size);
1555 	vm_object_t     object = __kmem_object(ANYF(flags));
1556 
1557 #if DEBUG || DEVELOPMENT
1558 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_START,
1559 	    size, 0, 0, 0);
1560 #endif /* DEBUG || DEVELOPMENT */
1561 
1562 
1563 	kr = vm_page_alloc_list(page_count, flags, &page_list);
1564 	if (kr == KERN_SUCCESS) {
1565 		vm_object_lock(object);
1566 		kernel_memory_populate_object_and_unlock(object, addr,
1567 		    addr, size, page_list, flags, tag, VM_PROT_DEFAULT,
1568 		    __kmem_mapping_type(ANYF(flags)));
1569 	}
1570 
1571 #if DEBUG || DEVELOPMENT
1572 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
1573 	    page_count, 0, 0, 0);
1574 #endif /* DEBUG || DEVELOPMENT */
1575 	return kr;
1576 }
1577 
1578 void
kernel_memory_depopulate(vm_offset_t addr,vm_size_t size,kma_flags_t flags,vm_tag_t tag)1579 kernel_memory_depopulate(
1580 	vm_offset_t        addr,
1581 	vm_size_t          size,
1582 	kma_flags_t        flags,
1583 	vm_tag_t           tag)
1584 {
1585 	vm_object_t        object = __kmem_object(ANYF(flags));
1586 	vm_object_offset_t offset = addr;
1587 	vm_page_t          mem;
1588 	vm_page_t          local_freeq = NULL;
1589 	unsigned int       pages_unwired = 0;
1590 
1591 	vm_object_lock(object);
1592 
1593 	pmap_protect(kernel_pmap, offset, offset + size, VM_PROT_NONE);
1594 
1595 	for (vm_object_offset_t pg_offset = 0;
1596 	    pg_offset < size;
1597 	    pg_offset += PAGE_SIZE_64) {
1598 		mem = vm_page_lookup(object, offset + pg_offset);
1599 
1600 		assert(mem);
1601 
1602 		if (flags & KMA_COMPRESSOR) {
1603 			assert(mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR);
1604 		} else {
1605 			assert(mem->vmp_q_state == VM_PAGE_IS_WIRED);
1606 			pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
1607 			pages_unwired++;
1608 		}
1609 
1610 		mem->vmp_busy = TRUE;
1611 
1612 		assert(mem->vmp_tabled);
1613 		vm_page_remove(mem, TRUE);
1614 		assert(mem->vmp_busy);
1615 
1616 		assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
1617 
1618 		mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
1619 		mem->vmp_snext = local_freeq;
1620 		local_freeq = mem;
1621 	}
1622 
1623 	vm_object_unlock(object);
1624 
1625 	vm_page_free_list(local_freeq, TRUE);
1626 
1627 	if (!(flags & KMA_COMPRESSOR)) {
1628 		vm_page_lockspin_queues();
1629 		vm_page_wire_count -= pages_unwired;
1630 		vm_page_unlock_queues();
1631 	}
1632 
1633 	if (flags & KMA_KOBJECT) {
1634 		/* vm_page_remove() handles regular objects already */
1635 		vm_tag_update_size(tag, -ptoa_64(pages_unwired), NULL);
1636 	}
1637 }
1638 
1639 #pragma mark reallocation
1640 
1641 __abortlike
1642 static void
__kmem_realloc_invalid_object_size_panic(vm_map_t map,vm_address_t address,vm_size_t size,vm_map_entry_t entry)1643 __kmem_realloc_invalid_object_size_panic(
1644 	vm_map_t                map,
1645 	vm_address_t            address,
1646 	vm_size_t               size,
1647 	vm_map_entry_t          entry)
1648 {
1649 	vm_object_t object  = VME_OBJECT(entry);
1650 	vm_size_t   objsize = __kmem_entry_orig_size(entry);
1651 
1652 	panic("kmem_realloc(map=%p, addr=%p, size=%zd, entry=%p): "
1653 	    "object %p has unexpected size %ld",
1654 	    map, (void *)address, (size_t)size, entry, object, objsize);
1655 }
1656 
1657 __abortlike
1658 static void
__kmem_realloc_invalid_pager_panic(vm_map_t map,vm_address_t address,vm_size_t size,vm_map_entry_t entry)1659 __kmem_realloc_invalid_pager_panic(
1660 	vm_map_t                map,
1661 	vm_address_t            address,
1662 	vm_size_t               size,
1663 	vm_map_entry_t          entry)
1664 {
1665 	vm_object_t object     = VME_OBJECT(entry);
1666 	memory_object_t pager  = object->pager;
1667 	bool pager_created     = object->pager_created;
1668 	bool pager_initialized = object->pager_initialized;
1669 	bool pager_ready       = object->pager_ready;
1670 
1671 	panic("kmem_realloc(map=%p, addr=%p, size=%zd, entry=%p): "
1672 	    "object %p has unexpected pager %p (%d,%d,%d)",
1673 	    map, (void *)address, (size_t)size, entry, object,
1674 	    pager, pager_created, pager_initialized, pager_ready);
1675 }
1676 
1677 static kmem_return_t
kmem_realloc_shrink_guard(vm_map_t map,vm_offset_t req_oldaddr,vm_size_t req_oldsize,vm_size_t req_newsize,kmr_flags_t flags,kmem_guard_t guard,vm_map_entry_t entry)1678 kmem_realloc_shrink_guard(
1679 	vm_map_t                map,
1680 	vm_offset_t             req_oldaddr,
1681 	vm_size_t               req_oldsize,
1682 	vm_size_t               req_newsize,
1683 	kmr_flags_t             flags,
1684 	kmem_guard_t            guard,
1685 	vm_map_entry_t          entry)
1686 {
1687 	vmr_flags_t             vmr_flags = VM_MAP_REMOVE_KUNWIRE;
1688 	vm_object_t             object;
1689 	vm_offset_t             delta = 0;
1690 	kmem_return_t           kmr;
1691 	bool                    was_atomic;
1692 	vm_size_t               oldsize = round_page(req_oldsize);
1693 	vm_size_t               newsize = round_page(req_newsize);
1694 	vm_address_t            oldaddr = req_oldaddr;
1695 
1696 #if KASAN_CLASSIC
1697 	if (flags & KMR_KASAN_GUARD) {
1698 		assert((flags & (KMR_GUARD_FIRST | KMR_GUARD_LAST)) == 0);
1699 		flags   |= KMR_GUARD_FIRST | KMR_GUARD_LAST;
1700 		oldaddr -= PAGE_SIZE;
1701 		delta    = ptoa(2);
1702 		oldsize += delta;
1703 		newsize += delta;
1704 	}
1705 #endif /* KASAN_CLASSIC */
1706 
1707 	if (flags & KMR_TAG) {
1708 		oldaddr = vm_memtag_canonicalize_kernel(req_oldaddr);
1709 	}
1710 
1711 	vm_map_lock_assert_exclusive(map);
1712 
1713 	if ((flags & KMR_KOBJECT) == 0) {
1714 		object = VME_OBJECT(entry);
1715 		vm_object_reference(object);
1716 	}
1717 
1718 	/*
1719 	 *	Shrinking an atomic entry starts with splitting it,
1720 	 *	and removing the second half.
1721 	 */
1722 	was_atomic = entry->vme_atomic;
1723 	entry->vme_atomic = false;
1724 	vm_map_clip_end(map, entry, entry->vme_start + newsize);
1725 	entry->vme_atomic = was_atomic;
1726 
1727 #if KASAN
1728 	if (entry->vme_kernel_object && was_atomic) {
1729 		entry->vme_object_or_delta = (-req_newsize & PAGE_MASK) + delta;
1730 	}
1731 #if KASAN_CLASSIC
1732 	if (flags & KMR_KASAN_GUARD) {
1733 		kasan_poison_range(oldaddr + newsize, oldsize - newsize,
1734 		    ASAN_VALID);
1735 	}
1736 #endif
1737 #if KASAN_TBI
1738 	if (flags & KMR_TAG) {
1739 		kasan_tbi_mark_free_space((caddr_t)req_oldaddr + newsize, oldsize - newsize);
1740 	}
1741 #endif /* KASAN_TBI */
1742 #endif /* KASAN */
1743 	(void)vm_map_remove_and_unlock(map,
1744 	    oldaddr + newsize, oldaddr + oldsize,
1745 	    vmr_flags, KMEM_GUARD_NONE);
1746 
1747 
1748 	/*
1749 	 *	Lastly, if there are guard pages, deal with them.
1750 	 *
1751 	 *	The kernel object just needs to depopulate,
1752 	 *	regular objects require freeing the last page
1753 	 *	and replacing it with a guard.
1754 	 */
1755 	if (flags & KMR_KOBJECT) {
1756 		if (flags & KMR_GUARD_LAST) {
1757 			kma_flags_t dflags = KMA_KOBJECT;
1758 			kernel_memory_depopulate(oldaddr + newsize - PAGE_SIZE,
1759 			    PAGE_SIZE, dflags, guard.kmg_tag);
1760 		}
1761 	} else {
1762 		vm_page_t guard_right = VM_PAGE_NULL;
1763 		vm_offset_t remove_start = newsize;
1764 
1765 		if (flags & KMR_GUARD_LAST) {
1766 			if (!map->never_faults) {
1767 				guard_right = vm_page_create_guard(true);
1768 			}
1769 			remove_start -= PAGE_SIZE;
1770 		}
1771 
1772 		vm_object_lock(object);
1773 
1774 		if (object->vo_size != oldsize) {
1775 			__kmem_realloc_invalid_object_size_panic(map,
1776 			    req_oldaddr, req_oldsize + delta, entry);
1777 		}
1778 		vm_object_set_size(object, newsize, req_newsize);
1779 
1780 		vm_object_page_remove(object, remove_start, oldsize);
1781 
1782 		if (guard_right) {
1783 			vm_page_insert(guard_right, object, newsize - PAGE_SIZE);
1784 			guard_right->vmp_busy = false;
1785 		}
1786 		vm_object_unlock(object);
1787 		vm_object_deallocate(object);
1788 	}
1789 
1790 	kmr.kmr_address = req_oldaddr;
1791 	kmr.kmr_return  = 0;
1792 #if KASAN_CLASSIC
1793 	if (flags & KMA_KASAN_GUARD) {
1794 		kasan_alloc_large(kmr.kmr_address, req_newsize);
1795 	}
1796 #endif /* KASAN_CLASSIC */
1797 #if KASAN_TBI
1798 	if ((flags & KMR_TAG) && (flags & KMR_FREEOLD)) {
1799 		kmr.kmr_ptr = vm_memtag_generate_and_store_tag(kmr.kmr_ptr, req_newsize);
1800 		kasan_tbi_retag_unused_space(kmr.kmr_ptr, newsize, req_newsize);
1801 	}
1802 #endif /* KASAN_TBI */
1803 
1804 	return kmr;
1805 }
1806 
1807 kmem_return_t
kmem_realloc_guard(vm_map_t map,vm_offset_t req_oldaddr,vm_size_t req_oldsize,vm_size_t req_newsize,kmr_flags_t flags,kmem_guard_t guard)1808 kmem_realloc_guard(
1809 	vm_map_t                map,
1810 	vm_offset_t             req_oldaddr,
1811 	vm_size_t               req_oldsize,
1812 	vm_size_t               req_newsize,
1813 	kmr_flags_t             flags,
1814 	kmem_guard_t            guard)
1815 {
1816 	vm_object_t             object;
1817 	vm_size_t               oldsize;
1818 	vm_size_t               newsize;
1819 	vm_offset_t             delta = 0;
1820 	vm_map_offset_t         oldaddr;
1821 	vm_map_offset_t         newaddr;
1822 	vm_object_offset_t      newoffs;
1823 	vm_map_entry_t          oldentry;
1824 	vm_map_entry_t          newentry;
1825 	vm_page_t               page_list = NULL;
1826 	bool                    needs_wakeup = false;
1827 	kmem_return_t           kmr = { };
1828 	unsigned int            last_timestamp;
1829 	vm_map_kernel_flags_t   vmk_flags = {
1830 		.vmkf_last_free = (bool)(flags & KMR_LAST_FREE),
1831 	};
1832 
1833 	assert(KMEM_REALLOC_FLAGS_VALID(flags));
1834 
1835 	if (!guard.kmg_atomic) {
1836 		if (!(flags & (KMR_DATA | KMR_DATA_SHARED))) {
1837 			__kmem_invalid_arguments_panic("realloc", map, req_oldaddr,
1838 			    req_oldsize, flags);
1839 		}
1840 
1841 		if (flags & KMR_KOBJECT) {
1842 			__kmem_invalid_arguments_panic("realloc", map, req_oldaddr,
1843 			    req_oldsize, flags);
1844 		}
1845 	}
1846 
1847 	if (req_oldaddr == 0ul) {
1848 		return kmem_alloc_guard(map, req_newsize, 0, (kma_flags_t)flags, guard);
1849 	}
1850 
1851 	if (req_newsize == 0ul) {
1852 		kmem_free_guard(map, req_oldaddr, req_oldsize,
1853 		    (kmf_flags_t)flags, guard);
1854 		return kmr;
1855 	}
1856 
1857 	if (req_newsize >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) {
1858 		__kmem_invalid_size_panic(map, req_newsize, flags);
1859 	}
1860 	if (req_newsize < __kmem_guard_size(ANYF(flags))) {
1861 		__kmem_invalid_size_panic(map, req_newsize, flags);
1862 	}
1863 
1864 	oldsize = round_page(req_oldsize);
1865 	newsize = round_page(req_newsize);
1866 	oldaddr = req_oldaddr;
1867 #if KASAN_CLASSIC
1868 	if (flags & KMR_KASAN_GUARD) {
1869 		flags   |= KMR_GUARD_FIRST | KMR_GUARD_LAST;
1870 		oldaddr -= PAGE_SIZE;
1871 		delta    = ptoa(2);
1872 		oldsize += delta;
1873 		newsize += delta;
1874 	}
1875 #endif /* KASAN_CLASSIC */
1876 #if CONFIG_KERNEL_TAGGING
1877 	if (flags & KMR_TAG) {
1878 		vm_memtag_verify_tag(req_oldaddr + __kmem_guard_left(ANYF(flags)));
1879 		oldaddr = vm_memtag_canonicalize_kernel(req_oldaddr);
1880 	}
1881 #endif /* CONFIG_KERNEL_TAGGING */
1882 
1883 #if !KASAN
1884 	/*
1885 	 *	If not on a KASAN variant and no difference in requested size,
1886 	 *  just return.
1887 	 *
1888 	 *	Otherwise we want to validate the size and re-tag for KASAN_TBI.
1889 	 */
1890 	if (oldsize == newsize) {
1891 		kmr.kmr_address = req_oldaddr;
1892 		return kmr;
1893 	}
1894 #endif /* !KASAN */
1895 
1896 	/*
1897 	 *	If we're growing the allocation,
1898 	 *	then reserve the pages we'll need,
1899 	 *	and find a spot for its new place.
1900 	 */
1901 	if (oldsize < newsize) {
1902 #if DEBUG || DEVELOPMENT
1903 		VM_DEBUG_CONSTANT_EVENT(vm_kern_request,
1904 		    DBG_VM_KERN_REQUEST, DBG_FUNC_START,
1905 		    newsize - oldsize, 0, 0, 0);
1906 #endif /* DEBUG || DEVELOPMENT */
1907 		kmr.kmr_return = vm_page_alloc_list(atop(newsize - oldsize),
1908 		    (kma_flags_t)flags, &page_list);
1909 		if (kmr.kmr_return == KERN_SUCCESS) {
1910 			kmem_apply_security_policy(map, (kma_flags_t)flags, guard,
1911 			    newsize, 0, &vmk_flags, true);
1912 			kmr.kmr_return = vm_map_find_space(map, 0, newsize, 0,
1913 			    vmk_flags, &newentry);
1914 		}
1915 		if (__improbable(kmr.kmr_return != KERN_SUCCESS)) {
1916 			if (flags & KMR_REALLOCF) {
1917 				kmem_free_guard(map, req_oldaddr, req_oldsize,
1918 				    flags & (KMF_TAG | KMF_GUARD_FIRST |
1919 				    KMF_GUARD_LAST | KMF_KASAN_GUARD), guard);
1920 			}
1921 			if (page_list) {
1922 				vm_page_free_list(page_list, FALSE);
1923 			}
1924 #if DEBUG || DEVELOPMENT
1925 			VM_DEBUG_CONSTANT_EVENT(vm_kern_request,
1926 			    DBG_VM_KERN_REQUEST, DBG_FUNC_END,
1927 			    0, 0, 0, 0);
1928 #endif /* DEBUG || DEVELOPMENT */
1929 			return kmr;
1930 		}
1931 
1932 		/* map is locked */
1933 	} else {
1934 		vm_map_lock(map);
1935 	}
1936 
1937 
1938 	/*
1939 	 *	Locate the entry:
1940 	 *	- wait for it to quiesce.
1941 	 *	- validate its guard,
1942 	 *	- learn its correct tag,
1943 	 */
1944 again:
1945 	if (!vm_map_lookup_entry(map, oldaddr, &oldentry)) {
1946 		__kmem_entry_not_found_panic(map, req_oldaddr);
1947 	}
1948 	if ((flags & KMR_KOBJECT) && oldentry->in_transition) {
1949 		oldentry->needs_wakeup = true;
1950 		vm_map_entry_wait(map, THREAD_UNINT);
1951 		goto again;
1952 	}
1953 	kmem_entry_validate_guard(map, oldentry, oldaddr, oldsize, guard);
1954 	if (!__kmem_entry_validate_object(oldentry, ANYF(flags))) {
1955 		__kmem_entry_validate_object_panic(map, oldentry, ANYF(flags));
1956 	}
1957 	/*
1958 	 *	TODO: We should validate for non atomic entries that the range
1959 	 *	      we are acting on is what we expect here.
1960 	 */
1961 #if KASAN
1962 	if (__kmem_entry_orig_size(oldentry) != req_oldsize) {
1963 		__kmem_realloc_invalid_object_size_panic(map,
1964 		    req_oldaddr, req_oldsize + delta, oldentry);
1965 	}
1966 
1967 	if (oldsize == newsize) {
1968 		kmr.kmr_address = req_oldaddr;
1969 		if (oldentry->vme_kernel_object) {
1970 			oldentry->vme_object_or_delta = delta +
1971 			    (-req_newsize & PAGE_MASK);
1972 		} else {
1973 			object = VME_OBJECT(oldentry);
1974 			vm_object_lock(object);
1975 			vm_object_set_size(object, newsize, req_newsize);
1976 			vm_object_unlock(object);
1977 		}
1978 		vm_map_unlock(map);
1979 
1980 #if KASAN_CLASSIC
1981 		if (flags & KMA_KASAN_GUARD) {
1982 			kasan_alloc_large(kmr.kmr_address, req_newsize);
1983 		}
1984 #endif /* KASAN_CLASSIC */
1985 #if KASAN_TBI
1986 		if ((flags & KMR_TAG) && (flags & KMR_FREEOLD)) {
1987 			kmr.kmr_ptr = vm_memtag_generate_and_store_tag(kmr.kmr_ptr, req_newsize);
1988 			kasan_tbi_retag_unused_space(kmr.kmr_ptr, newsize, req_newsize);
1989 		}
1990 #endif /* KASAN_TBI */
1991 		return kmr;
1992 	}
1993 #endif /* KASAN */
1994 
1995 	guard.kmg_tag = VME_ALIAS(oldentry);
1996 
1997 	if (newsize < oldsize) {
1998 		return kmem_realloc_shrink_guard(map, req_oldaddr,
1999 		           req_oldsize, req_newsize, flags, guard, oldentry);
2000 	}
2001 
2002 
2003 	/*
2004 	 *	We are growing the entry
2005 	 *
2006 	 *	For regular objects we use the object `vo_size` updates
2007 	 *	as a guarantee that no 2 kmem_realloc() can happen
2008 	 *	concurrently (by doing it before the map is unlocked.
2009 	 *
2010 	 *	For the kernel object, prevent the entry from being
2011 	 *	reallocated or changed by marking it "in_transition".
2012 	 */
2013 
2014 	object = VME_OBJECT(oldentry);
2015 	vm_object_lock(object);
2016 	vm_object_reference_locked(object);
2017 
2018 	newaddr = newentry->vme_start;
2019 	newoffs = oldsize;
2020 
2021 	VME_OBJECT_SET(newentry, object, guard.kmg_atomic, guard.kmg_context);
2022 	VME_ALIAS_SET(newentry, guard.kmg_tag);
2023 	if (flags & KMR_KOBJECT) {
2024 		oldentry->in_transition = true;
2025 		VME_OFFSET_SET(newentry, newaddr);
2026 		newentry->wired_count = 1;
2027 		vme_btref_consider_and_set(newentry, __builtin_frame_address(0));
2028 		newoffs = newaddr + oldsize;
2029 #if KASAN
2030 		newentry->vme_object_or_delta = delta +
2031 		    (-req_newsize & PAGE_MASK);
2032 #endif /* KASAN */
2033 	} else {
2034 		if (object->pager_created || object->pager) {
2035 			/*
2036 			 * We can't "realloc/grow" the pager, so pageable
2037 			 * allocations should not go through this path.
2038 			 */
2039 			__kmem_realloc_invalid_pager_panic(map,
2040 			    req_oldaddr, req_oldsize + delta, oldentry);
2041 		}
2042 		if (object->vo_size != oldsize) {
2043 			__kmem_realloc_invalid_object_size_panic(map,
2044 			    req_oldaddr, req_oldsize + delta, oldentry);
2045 		}
2046 		vm_object_set_size(object, newsize, req_newsize);
2047 	}
2048 
2049 	last_timestamp = map->timestamp;
2050 	vm_map_unlock(map);
2051 
2052 
2053 	/*
2054 	 *	Now proceed with the population of pages.
2055 	 *
2056 	 *	Kernel objects can use the kmem population helpers.
2057 	 *
2058 	 *	Regular objects will insert pages manually,
2059 	 *	then wire the memory into the new range.
2060 	 */
2061 
2062 	vm_size_t guard_right_size = __kmem_guard_right(ANYF(flags));
2063 
2064 	if (flags & KMR_KOBJECT) {
2065 		pmap_mapping_type_t mapping_type = __kmem_mapping_type(ANYF(flags));
2066 
2067 		pmap_protect(kernel_pmap,
2068 		    oldaddr, oldaddr + oldsize - guard_right_size,
2069 		    VM_PROT_NONE);
2070 
2071 		for (vm_object_offset_t offset = 0;
2072 		    offset < oldsize - guard_right_size;
2073 		    offset += PAGE_SIZE_64) {
2074 			vm_page_t mem;
2075 
2076 			mem = vm_page_lookup(object, oldaddr + offset);
2077 			if (mem == VM_PAGE_NULL) {
2078 				continue;
2079 			}
2080 
2081 			pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
2082 
2083 			mem->vmp_busy = true;
2084 			vm_page_remove(mem, true);
2085 			vm_page_insert_wired(mem, object, newaddr + offset,
2086 			    guard.kmg_tag);
2087 			mem->vmp_busy = false;
2088 
2089 			kernel_memory_populate_pmap_enter(object, newaddr,
2090 			    offset, mem, VM_PROT_DEFAULT, 0, mapping_type);
2091 		}
2092 
2093 		kernel_memory_populate_object_and_unlock(object,
2094 		    newaddr + oldsize - guard_right_size,
2095 		    newoffs - guard_right_size,
2096 		    newsize - oldsize,
2097 		    page_list, (kma_flags_t)flags,
2098 		    guard.kmg_tag, VM_PROT_DEFAULT, mapping_type);
2099 	} else {
2100 		vm_page_t guard_right = VM_PAGE_NULL;
2101 
2102 		/*
2103 		 *	Note: we are borrowing the new entry reference
2104 		 *	on the object for the duration of this code,
2105 		 *	which works because we keep the object locked
2106 		 *	throughout.
2107 		 */
2108 		if ((flags & KMR_GUARD_LAST) && !map->never_faults) {
2109 			guard_right = vm_page_lookup(object, oldsize - PAGE_SIZE);
2110 			assert(vm_page_is_guard(guard_right));
2111 			guard_right->vmp_busy = true;
2112 			vm_page_remove(guard_right, true);
2113 		}
2114 
2115 		if (flags & KMR_FREEOLD) {
2116 			/*
2117 			 * Freeing the old mapping will make
2118 			 * the old pages become pageable until
2119 			 * the new mapping makes them wired again.
2120 			 * Let's take an extra "wire_count" to
2121 			 * prevent any accidental "page out".
2122 			 * We'll have to undo that after wiring
2123 			 * the new mapping.
2124 			 */
2125 			vm_object_reference_locked(object); /* keep object alive */
2126 			for (vm_object_offset_t offset = 0;
2127 			    offset < oldsize - guard_right_size;
2128 			    offset += PAGE_SIZE_64) {
2129 				vm_page_t mem;
2130 
2131 				mem = vm_page_lookup(object, offset);
2132 				assert(mem != VM_PAGE_NULL);
2133 				assertf(!VM_PAGE_PAGEABLE(mem),
2134 				    "mem %p qstate %d",
2135 				    mem, mem->vmp_q_state);
2136 				if (vm_page_is_guard(mem)) {
2137 					/* guard pages are not wired */
2138 				} else {
2139 					assertf(VM_PAGE_WIRED(mem),
2140 					    "mem %p qstate %d wirecount %d",
2141 					    mem,
2142 					    mem->vmp_q_state,
2143 					    mem->vmp_wire_count);
2144 					assertf(mem->vmp_wire_count >= 1,
2145 					    "mem %p wirecount %d",
2146 					    mem, mem->vmp_wire_count);
2147 					mem->vmp_wire_count++;
2148 				}
2149 			}
2150 		}
2151 
2152 		for (vm_object_offset_t offset = oldsize - guard_right_size;
2153 		    offset < newsize - guard_right_size;
2154 		    offset += PAGE_SIZE_64) {
2155 			vm_page_t mem = page_list;
2156 
2157 			page_list = mem->vmp_snext;
2158 			mem->vmp_snext = VM_PAGE_NULL;
2159 			assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
2160 			assert(!VM_PAGE_PAGEABLE(mem));
2161 
2162 			vm_page_insert(mem, object, offset);
2163 			mem->vmp_busy = false;
2164 		}
2165 
2166 		if (guard_right) {
2167 			vm_page_insert(guard_right, object, newsize - PAGE_SIZE);
2168 			guard_right->vmp_busy = false;
2169 		}
2170 
2171 		vm_object_unlock(object);
2172 	}
2173 
2174 	/*
2175 	 *	Mark the entry as idle again,
2176 	 *	and honor KMR_FREEOLD if needed.
2177 	 */
2178 
2179 	vm_map_lock(map);
2180 	if (last_timestamp + 1 != map->timestamp &&
2181 	    !vm_map_lookup_entry(map, oldaddr, &oldentry)) {
2182 		__kmem_entry_not_found_panic(map, req_oldaddr);
2183 	}
2184 
2185 	if (flags & KMR_KOBJECT) {
2186 		assert(oldentry->in_transition);
2187 		oldentry->in_transition = false;
2188 		if (oldentry->needs_wakeup) {
2189 			needs_wakeup = true;
2190 			oldentry->needs_wakeup = false;
2191 		}
2192 	}
2193 
2194 	if (flags & KMR_FREEOLD) {
2195 		vmr_flags_t vmr_flags = VM_MAP_REMOVE_KUNWIRE;
2196 
2197 #if KASAN_CLASSIC
2198 		if (flags & KMR_KASAN_GUARD) {
2199 			kasan_poison_range(oldaddr, oldsize, ASAN_VALID);
2200 		}
2201 #endif
2202 #if KASAN_TBI
2203 		if (flags & KMR_TAG) {
2204 			kasan_tbi_mark_free_space((caddr_t)req_oldaddr, oldsize);
2205 		}
2206 #endif /* KASAN_TBI */
2207 		if (flags & KMR_GUARD_LAST) {
2208 			vmr_flags |= VM_MAP_REMOVE_NOKUNWIRE_LAST;
2209 		}
2210 		(void)vm_map_remove_and_unlock(map,
2211 		    oldaddr, oldaddr + oldsize,
2212 		    vmr_flags, guard);
2213 	} else {
2214 		vm_map_unlock(map);
2215 	}
2216 
2217 	if ((flags & KMR_KOBJECT) == 0) {
2218 		kern_return_t kr;
2219 		/*
2220 		 * This must happen _after_ we do the KMR_FREEOLD,
2221 		 * because wiring the pages will call into the pmap,
2222 		 * and if the pages are typed XNU_KERNEL_RESTRICTED,
2223 		 * this would cause a second mapping of the page and panic.
2224 		 */
2225 		kr = vm_map_wire_kernel(map,
2226 		    vm_sanitize_wrap_addr(newaddr),
2227 		    vm_sanitize_wrap_addr(newaddr + newsize),
2228 		    vm_sanitize_wrap_prot(VM_PROT_DEFAULT),
2229 		    guard.kmg_tag, FALSE);
2230 		assert(kr == KERN_SUCCESS);
2231 
2232 		if (flags & KMR_FREEOLD) {
2233 			/*
2234 			 * Undo the extra "wiring" we made above
2235 			 * and release the extra reference we took
2236 			 * on the object.
2237 			 */
2238 			vm_object_lock(object);
2239 			for (vm_object_offset_t offset = 0;
2240 			    offset < oldsize - guard_right_size;
2241 			    offset += PAGE_SIZE_64) {
2242 				vm_page_t mem;
2243 
2244 				mem = vm_page_lookup(object, offset);
2245 				assert(mem != VM_PAGE_NULL);
2246 				assertf(!VM_PAGE_PAGEABLE(mem),
2247 				    "mem %p qstate %d",
2248 				    mem, mem->vmp_q_state);
2249 				if (vm_page_is_guard(mem)) {
2250 					/* guard pages are not wired */
2251 				} else {
2252 					assertf(VM_PAGE_WIRED(mem),
2253 					    "mem %p qstate %d wirecount %d",
2254 					    mem,
2255 					    mem->vmp_q_state,
2256 					    mem->vmp_wire_count);
2257 					assertf(mem->vmp_wire_count >= 2,
2258 					    "mem %p wirecount %d",
2259 					    mem, mem->vmp_wire_count);
2260 					mem->vmp_wire_count--;
2261 					assert(VM_PAGE_WIRED(mem));
2262 					assert(mem->vmp_wire_count >= 1);
2263 				}
2264 			}
2265 			vm_object_unlock(object);
2266 			vm_object_deallocate(object); /* release extra ref */
2267 		}
2268 	}
2269 
2270 	if (needs_wakeup) {
2271 		vm_map_entry_wakeup(map);
2272 	}
2273 
2274 #if DEBUG || DEVELOPMENT
2275 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
2276 	    atop(newsize - oldsize), 0, 0, 0);
2277 #endif /* DEBUG || DEVELOPMENT */
2278 	kmr.kmr_address = newaddr;
2279 
2280 #if KASAN
2281 	kasan_notify_address(kmr.kmr_address, newsize);
2282 #endif /* KASAN */
2283 #if KASAN_CLASSIC
2284 	if (flags & KMR_KASAN_GUARD) {
2285 		kmr.kmr_address += PAGE_SIZE;
2286 		kasan_alloc_large(kmr.kmr_address, req_newsize);
2287 	}
2288 #endif /* KASAN_CLASSIC */
2289 #if CONFIG_KERNEL_TAGGING
2290 	if (flags & KMR_TAG) {
2291 #if   KASAN_TBI
2292 		/*
2293 		 * Validate the current buffer, then generate a new tag,
2294 		 * even if the address is stable, it's a "new" allocation.
2295 		 */
2296 		__asan_loadN((vm_offset_t)kmr.kmr_address, oldsize);
2297 		kmr.kmr_ptr = vm_memtag_generate_and_store_tag(kmr.kmr_ptr, req_newsize);
2298 		kasan_tbi_retag_unused_space(kmr.kmr_ptr, newsize, req_newsize);
2299 #endif /* KASAN_TBI */
2300 	}
2301 #endif /* CONFIG_KERNEL_TAGGING */
2302 
2303 	return kmr;
2304 }
2305 
2306 #pragma mark map/remap/wire
2307 
2308 kern_return_t
mach_vm_map_kernel(vm_map_t target_map,mach_vm_offset_ut * address,mach_vm_size_ut initial_size,mach_vm_offset_ut mask,vm_map_kernel_flags_t vmk_flags,ipc_port_t port,memory_object_offset_ut offset,boolean_t copy,vm_prot_ut cur_protection,vm_prot_ut max_protection,vm_inherit_ut inheritance)2309 mach_vm_map_kernel(
2310 	vm_map_t                target_map,
2311 	mach_vm_offset_ut      *address,
2312 	mach_vm_size_ut         initial_size,
2313 	mach_vm_offset_ut       mask,
2314 	vm_map_kernel_flags_t   vmk_flags,
2315 	ipc_port_t              port,
2316 	memory_object_offset_ut offset,
2317 	boolean_t               copy,
2318 	vm_prot_ut              cur_protection,
2319 	vm_prot_ut              max_protection,
2320 	vm_inherit_ut           inheritance)
2321 {
2322 	/* range_id is set by vm_map_enter_mem_object */
2323 	return vm_map_enter_mem_object(target_map,
2324 	           address,
2325 	           initial_size,
2326 	           mask,
2327 	           vmk_flags,
2328 	           port,
2329 	           offset,
2330 	           copy,
2331 	           cur_protection,
2332 	           max_protection,
2333 	           inheritance,
2334 	           NULL,
2335 	           0);
2336 }
2337 
2338 kern_return_t
mach_vm_remap_new_kernel(vm_map_t target_map,mach_vm_offset_ut * address,mach_vm_size_ut size,mach_vm_offset_ut mask,vm_map_kernel_flags_t vmk_flags,vm_map_t src_map,mach_vm_offset_ut memory_address,boolean_t copy,vm_prot_ut * cur_protection,vm_prot_ut * max_protection,vm_inherit_ut inheritance)2339 mach_vm_remap_new_kernel(
2340 	vm_map_t                target_map,
2341 	mach_vm_offset_ut      *address,
2342 	mach_vm_size_ut         size,
2343 	mach_vm_offset_ut       mask,
2344 	vm_map_kernel_flags_t   vmk_flags,
2345 	vm_map_t                src_map,
2346 	mach_vm_offset_ut       memory_address,
2347 	boolean_t               copy,
2348 	vm_prot_ut             *cur_protection,   /* IN/OUT */
2349 	vm_prot_ut             *max_protection,   /* IN/OUT */
2350 	vm_inherit_ut           inheritance)
2351 {
2352 	if (!vm_map_kernel_flags_check_vm_and_kflags(vmk_flags,
2353 	    VM_FLAGS_USER_REMAP)) {
2354 		return KERN_INVALID_ARGUMENT;
2355 	}
2356 
2357 
2358 	vmk_flags.vmf_return_data_addr = true;
2359 
2360 	/* range_id is set by vm_map_remap */
2361 	return vm_map_remap(target_map,
2362 	           address,
2363 	           size,
2364 	           mask,
2365 	           vmk_flags,
2366 	           src_map,
2367 	           memory_address,
2368 	           copy,
2369 	           cur_protection,
2370 	           max_protection,
2371 	           inheritance);
2372 }
2373 
2374 #pragma mark free
2375 
2376 #if KASAN
2377 
2378 __abortlike
2379 static void
__kmem_free_invalid_object_size_panic(vm_map_t map,vm_address_t address,vm_size_t size,vm_map_entry_t entry)2380 __kmem_free_invalid_object_size_panic(
2381 	vm_map_t                map,
2382 	vm_address_t            address,
2383 	vm_size_t               size,
2384 	vm_map_entry_t          entry)
2385 {
2386 	vm_object_t object  = VME_OBJECT(entry);
2387 	vm_size_t   objsize = __kmem_entry_orig_size(entry);
2388 
2389 	panic("kmem_free(map=%p, addr=%p, size=%zd, entry=%p): "
2390 	    "object %p has unexpected size %ld",
2391 	    map, (void *)address, (size_t)size, entry, object, objsize);
2392 }
2393 
2394 #endif /* KASAN */
2395 
2396 vm_size_t
kmem_free_guard(vm_map_t map,vm_offset_t req_addr,vm_size_t req_size,kmf_flags_t flags,kmem_guard_t guard)2397 kmem_free_guard(
2398 	vm_map_t        map,
2399 	vm_offset_t     req_addr,
2400 	vm_size_t       req_size,
2401 	kmf_flags_t     flags,
2402 	kmem_guard_t    guard)
2403 {
2404 	vmr_flags_t     vmr_flags = VM_MAP_REMOVE_KUNWIRE;
2405 	vm_address_t    addr      = req_addr;
2406 	vm_offset_t     delta     = 0;
2407 	vm_size_t       size;
2408 #if KASAN
2409 	vm_map_entry_t  entry;
2410 #endif /* KASAN */
2411 
2412 	assert(map->pmap == kernel_pmap);
2413 
2414 #if KASAN_CLASSIC
2415 	if (flags & KMF_KASAN_GUARD) {
2416 		addr  -= PAGE_SIZE;
2417 		delta  = ptoa(2);
2418 	}
2419 #endif /* KASAN_CLASSIC */
2420 #if CONFIG_KERNEL_TAGGING
2421 	if (flags & KMF_TAG) {
2422 		vm_memtag_verify_tag(req_addr + __kmem_guard_left(ANYF(flags)));
2423 		addr = vm_memtag_canonicalize_kernel(req_addr);
2424 	}
2425 #endif /* CONFIG_KERNEL_TAGGING */
2426 
2427 	if (flags & KMF_GUESS_SIZE) {
2428 		vmr_flags |= VM_MAP_REMOVE_GUESS_SIZE;
2429 		size = PAGE_SIZE;
2430 	} else if (req_size == 0) {
2431 		__kmem_invalid_size_panic(map, req_size, flags);
2432 	} else {
2433 		size = round_page(req_size) + delta;
2434 	}
2435 
2436 	vm_map_lock(map);
2437 
2438 #if KASAN
2439 	if (!vm_map_lookup_entry(map, addr, &entry)) {
2440 		__kmem_entry_not_found_panic(map, req_addr);
2441 	}
2442 	if (flags & KMF_GUESS_SIZE) {
2443 		vmr_flags &= ~VM_MAP_REMOVE_GUESS_SIZE;
2444 		req_size = __kmem_entry_orig_size(entry);
2445 		size = round_page(req_size + delta);
2446 	} else if (guard.kmg_atomic && entry->vme_kernel_object &&
2447 	    __kmem_entry_orig_size(entry) != req_size) {
2448 		/*
2449 		 * We can't make a strict check for regular
2450 		 * VM objects because it could be:
2451 		 *
2452 		 * - the kmem_guard_free() of a kmem_realloc_guard() without
2453 		 *   KMR_FREEOLD, and in that case the object size won't match.
2454 		 *
2455 		 * - a submap, in which case there is no "orig size".
2456 		 */
2457 		__kmem_free_invalid_object_size_panic(map,
2458 		    req_addr, req_size + delta, entry);
2459 	}
2460 #endif /* KASAN */
2461 #if KASAN_CLASSIC
2462 	if (flags & KMR_KASAN_GUARD) {
2463 		kasan_poison_range(addr, size, ASAN_VALID);
2464 	}
2465 #endif
2466 #if KASAN_TBI
2467 	if (flags & KMF_TAG) {
2468 		kasan_tbi_mark_free_space((caddr_t)req_addr, size);
2469 	}
2470 #endif /* KASAN_TBI */
2471 
2472 	/*
2473 	 * vm_map_remove_and_unlock is called with VM_MAP_REMOVE_KUNWIRE, which
2474 	 * unwires the kernel mapping. The page won't be mapped any longer so
2475 	 * there is no extra step that is required for memory tagging to "clear"
2476 	 * it -- the page will be later laundered when reused.
2477 	 */
2478 	return vm_map_remove_and_unlock(map, addr, addr + size,
2479 	           vmr_flags, guard).kmr_size - delta;
2480 }
2481 
2482 __exported void
2483 kmem_free_external(
2484 	vm_map_t        map,
2485 	vm_offset_t     addr,
2486 	vm_size_t       size);
2487 void
kmem_free_external(vm_map_t map,vm_offset_t addr,vm_size_t size)2488 kmem_free_external(
2489 	vm_map_t        map,
2490 	vm_offset_t     addr,
2491 	vm_size_t       size)
2492 {
2493 	if (size) {
2494 		kmem_free(map, trunc_page(addr), size);
2495 #if MACH_ASSERT
2496 	} else {
2497 		printf("kmem_free(map=%p, addr=%p) called with size=0, lr: %p\n",
2498 		    map, (void *)addr, __builtin_return_address(0));
2499 #endif
2500 	}
2501 }
2502 
2503 #pragma mark kmem metadata
2504 
2505 /*
2506  * Guard objects for kmem pointer allocation:
2507  *
2508  * Guard objects introduce size slabs to kmem pointer allocations that are
2509  * allocated in chunks of n * sizeclass. When an allocation of a specific
2510  * sizeclass is requested a random slot from [0, n) is returned.
2511  * Allocations are returned from that chunk until m slots are left. The
2512  * remaining m slots are referred to as guard objects. They don't get
2513  * allocated and the chunk is now considered full. When an allocation is
2514  * freed to the chunk 1 slot is now available from m + 1 for the next
2515  * allocation of that sizeclass.
2516  *
2517  * Guard objects are intended to make exploitation of use after frees harder
2518  * as allocations that are freed can no longer be reliable reallocated.
2519  * They also make exploitation of OOBs harder as overflowing out of an
2520  * allocation can no longer be safe even with sufficient spraying.
2521  */
2522 
2523 #define KMEM_META_PRIMARY    UINT8_MAX
2524 #define KMEM_META_START     (UINT8_MAX - 1)
2525 #define KMEM_META_FREE      (UINT8_MAX - 2)
2526 #if __ARM_16K_PG__
2527 #define KMEM_MIN_SIZE        PAGE_SIZE
2528 #define KMEM_CHUNK_SIZE_MIN (KMEM_MIN_SIZE * 16)
2529 #else /* __ARM_16K_PG__ */
2530 /*
2531  * PAGE_SIZE isn't a compile time constant on some arm64 devices. Those
2532  * devices use 4k page size when their RAM is <= 1GB and 16k otherwise.
2533  * Therefore populate sizeclasses from 4k for those devices.
2534  */
2535 #define KMEM_MIN_SIZE       (4 * 1024)
2536 #define KMEM_CHUNK_SIZE_MIN (KMEM_MIN_SIZE * 32)
2537 #endif /* __ARM_16K_PG__ */
2538 #define KMEM_MAX_SIZE       (32ULL << 20)
2539 #define KMEM_START_IDX      (kmem_log2down(KMEM_MIN_SIZE))
2540 #define KMEM_LAST_IDX       (kmem_log2down(KMEM_MAX_SIZE))
2541 #define KMEM_NUM_SIZECLASS  (KMEM_LAST_IDX - KMEM_START_IDX + 1)
2542 #define KMEM_FRONTS         (KMEM_RANGE_ID_NUM_PTR * 2)
2543 #define KMEM_NUM_GUARDS      2
2544 
2545 struct kmem_page_meta {
2546 	union {
2547 		/*
2548 		 * On primary allocated chunk with KMEM_META_PRIMARY marker
2549 		 */
2550 		uint32_t km_bitmap;
2551 		/*
2552 		 * On start and end of free chunk with KMEM_META_FREE marker
2553 		 */
2554 		uint32_t km_free_chunks;
2555 	};
2556 	/*
2557 	 * KMEM_META_PRIMARY: Start meta of allocated chunk
2558 	 * KMEM_META_FREE   : Start and end meta of free chunk
2559 	 * KMEM_META_START  : Meta region start and end
2560 	 */
2561 	uint8_t  km_page_marker;
2562 	uint8_t  km_sizeclass;
2563 	union {
2564 		/*
2565 		 * On primary allocated chunk with KMEM_META_PRIMARY marker
2566 		 */
2567 		uint16_t km_chunk_len;
2568 		/*
2569 		 * On secondary allocated chunks
2570 		 */
2571 		uint16_t km_page_idx;
2572 	};
2573 	LIST_ENTRY(kmem_page_meta) km_link;
2574 } kmem_page_meta_t;
2575 
2576 typedef LIST_HEAD(kmem_list_head, kmem_page_meta) kmem_list_head_t;
2577 struct kmem_sizeclass {
2578 	vm_map_size_t                   ks_size;
2579 	uint32_t                        ks_num_chunk;
2580 	uint32_t                        ks_num_elem;
2581 	crypto_random_ctx_t __zpercpu   ks_rng_ctx;
2582 	kmem_list_head_t                ks_allfree_head[KMEM_FRONTS];
2583 	kmem_list_head_t                ks_partial_head[KMEM_FRONTS];
2584 	kmem_list_head_t                ks_full_head[KMEM_FRONTS];
2585 };
2586 
2587 static struct kmem_sizeclass kmem_size_array[KMEM_NUM_SIZECLASS];
2588 
2589 /*
2590  * Locks to synchronize metadata population
2591  */
2592 static LCK_GRP_DECLARE(kmem_locks_grp, "kmem_locks");
2593 static LCK_MTX_DECLARE(kmem_meta_region_lck, &kmem_locks_grp);
2594 #define kmem_meta_lock()   lck_mtx_lock(&kmem_meta_region_lck)
2595 #define kmem_meta_unlock() lck_mtx_unlock(&kmem_meta_region_lck)
2596 
2597 static SECURITY_READ_ONLY_LATE(struct mach_vm_range)
2598 kmem_meta_range[KMEM_RANGE_ID_NUM_PTR + 1];
2599 static SECURITY_READ_ONLY_LATE(struct kmem_page_meta *)
2600 kmem_meta_base[KMEM_RANGE_ID_NUM_PTR + 1];
2601 /*
2602  * Keeps track of metadata high water mark for each front
2603  */
2604 static struct kmem_page_meta *kmem_meta_hwm[KMEM_FRONTS];
2605 static SECURITY_READ_ONLY_LATE(vm_map_t)
2606 kmem_meta_map[KMEM_RANGE_ID_NUM_PTR + 1];
2607 static vm_map_size_t kmem_meta_size;
2608 
2609 static uint32_t
kmem_get_front(kmem_range_id_t range_id,bool from_right)2610 kmem_get_front(
2611 	kmem_range_id_t         range_id,
2612 	bool                    from_right)
2613 {
2614 	assert((range_id >= KMEM_RANGE_ID_FIRST) &&
2615 	    (range_id <= KMEM_RANGE_ID_NUM_PTR));
2616 	return (range_id - KMEM_RANGE_ID_FIRST) * 2 + from_right;
2617 }
2618 
2619 static inline uint32_t
kmem_slot_idx_to_bit(uint32_t slot_idx,uint32_t size_idx __unused)2620 kmem_slot_idx_to_bit(
2621 	uint32_t                slot_idx,
2622 	uint32_t                size_idx __unused)
2623 {
2624 	assert(slot_idx < kmem_size_array[size_idx].ks_num_elem);
2625 	return 1ull << slot_idx;
2626 }
2627 
2628 static uint32_t
kmem_get_idx_from_size(vm_map_size_t size)2629 kmem_get_idx_from_size(vm_map_size_t size)
2630 {
2631 	assert(size >= KMEM_MIN_SIZE && size <= KMEM_MAX_SIZE);
2632 	return kmem_log2down(size - 1) - KMEM_START_IDX + 1;
2633 }
2634 
2635 __abortlike
2636 static void
kmem_invalid_size_idx(uint32_t idx)2637 kmem_invalid_size_idx(uint32_t idx)
2638 {
2639 	panic("Invalid sizeclass idx %u", idx);
2640 }
2641 
2642 static vm_map_size_t
kmem_get_size_from_idx(uint32_t idx)2643 kmem_get_size_from_idx(uint32_t idx)
2644 {
2645 	if (__improbable(idx >= KMEM_NUM_SIZECLASS)) {
2646 		kmem_invalid_size_idx(idx);
2647 	}
2648 	return 1ul << (idx + KMEM_START_IDX);
2649 }
2650 
2651 static inline uint16_t
kmem_get_page_idx(struct kmem_page_meta * meta)2652 kmem_get_page_idx(struct kmem_page_meta *meta)
2653 {
2654 	uint8_t page_marker = meta->km_page_marker;
2655 
2656 	return (page_marker == KMEM_META_PRIMARY) ? 0 : meta->km_page_idx;
2657 }
2658 
2659 __abortlike
2660 static void
kmem_invalid_chunk_len(struct kmem_page_meta * meta)2661 kmem_invalid_chunk_len(struct kmem_page_meta *meta)
2662 {
2663 	panic("Reading free chunks for meta %p where marker != KMEM_META_PRIMARY",
2664 	    meta);
2665 }
2666 
2667 static inline uint16_t
kmem_get_chunk_len(struct kmem_page_meta * meta)2668 kmem_get_chunk_len(struct kmem_page_meta *meta)
2669 {
2670 	if (__improbable(meta->km_page_marker != KMEM_META_PRIMARY)) {
2671 		kmem_invalid_chunk_len(meta);
2672 	}
2673 
2674 	return meta->km_chunk_len;
2675 }
2676 
2677 __abortlike
2678 static void
kmem_invalid_free_chunk_len(struct kmem_page_meta * meta)2679 kmem_invalid_free_chunk_len(struct kmem_page_meta *meta)
2680 {
2681 	panic("Reading free chunks for meta %p where marker != KMEM_META_FREE",
2682 	    meta);
2683 }
2684 
2685 static inline uint32_t
kmem_get_free_chunk_len(struct kmem_page_meta * meta)2686 kmem_get_free_chunk_len(struct kmem_page_meta *meta)
2687 {
2688 	if (__improbable(meta->km_page_marker != KMEM_META_FREE)) {
2689 		kmem_invalid_free_chunk_len(meta);
2690 	}
2691 
2692 	return meta->km_free_chunks;
2693 }
2694 
2695 /*
2696  * Return the metadata corresponding to the specified address
2697  */
2698 static struct kmem_page_meta *
kmem_addr_to_meta(vm_map_offset_t addr,vm_map_range_id_t range_id,vm_map_offset_t * range_start,uint64_t * meta_idx)2699 kmem_addr_to_meta(
2700 	vm_map_offset_t         addr,
2701 	vm_map_range_id_t       range_id,
2702 	vm_map_offset_t        *range_start,
2703 	uint64_t               *meta_idx)
2704 {
2705 	struct kmem_page_meta *meta_base = kmem_meta_base[range_id];
2706 
2707 	*range_start = kmem_ranges[range_id].min_address;
2708 	*meta_idx = (addr - *range_start) / KMEM_CHUNK_SIZE_MIN;
2709 	return VM_FAR_ADD_PTR_UNBOUNDED(meta_base, *meta_idx);
2710 }
2711 
2712 /*
2713  * Return the metadata start of the chunk that the address belongs to
2714  */
2715 static struct kmem_page_meta *
kmem_addr_to_meta_start(vm_address_t addr,vm_map_range_id_t range_id,vm_map_offset_t * chunk_start)2716 kmem_addr_to_meta_start(
2717 	vm_address_t            addr,
2718 	vm_map_range_id_t       range_id,
2719 	vm_map_offset_t        *chunk_start)
2720 {
2721 	vm_map_offset_t range_start;
2722 	uint64_t meta_idx;
2723 	struct kmem_page_meta *meta;
2724 
2725 	meta = kmem_addr_to_meta(addr, range_id, &range_start, &meta_idx);
2726 	meta_idx -= kmem_get_page_idx(meta);
2727 	meta = VM_FAR_ADD_PTR_UNBOUNDED(meta, -(ptrdiff_t)kmem_get_page_idx(meta));
2728 	assert(meta->km_page_marker == KMEM_META_PRIMARY);
2729 	*chunk_start = range_start + (meta_idx * KMEM_CHUNK_SIZE_MIN);
2730 	return meta;
2731 }
2732 
2733 __startup_func
2734 static void
kmem_init_meta_front(struct kmem_page_meta * meta,kmem_range_id_t range_id,bool from_right)2735 kmem_init_meta_front(
2736 	struct kmem_page_meta  *meta,
2737 	kmem_range_id_t         range_id,
2738 	bool                    from_right)
2739 {
2740 	kernel_memory_populate(trunc_page((vm_map_offset_t) meta), PAGE_SIZE,
2741 	    KMA_KOBJECT | KMA_ZERO | KMA_NOFAIL, VM_KERN_MEMORY_OSFMK);
2742 	meta->km_page_marker = KMEM_META_START;
2743 	if (!from_right) {
2744 		meta++;
2745 		kmem_meta_base[range_id] = meta;
2746 	}
2747 	kmem_meta_hwm[kmem_get_front(range_id, from_right)] = meta;
2748 }
2749 
2750 __startup_func
2751 static void
kmem_metadata_init(void)2752 kmem_metadata_init(void)
2753 {
2754 	for (kmem_range_id_t i = KMEM_RANGE_ID_FIRST; i <= kmem_ptr_ranges; i++) {
2755 		vm_map_offset_t addr = kmem_meta_range[i].min_address;
2756 		struct kmem_page_meta *meta;
2757 		uint64_t meta_idx;
2758 
2759 		vm_map_will_allocate_early_map(&kmem_meta_map[i]);
2760 		kmem_meta_map[i] = kmem_suballoc(kernel_map, &addr, kmem_meta_size,
2761 		    VM_MAP_CREATE_NEVER_FAULTS | VM_MAP_CREATE_DISABLE_HOLELIST,
2762 		    VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
2763 		    KMS_PERMANENT | KMS_NOFAIL | KMS_NOSOFTLIMIT,
2764 		    VM_KERN_MEMORY_OSFMK).kmr_submap;
2765 
2766 		kmem_meta_range[i].min_address = addr;
2767 		kmem_meta_range[i].max_address = addr + kmem_meta_size;
2768 
2769 		meta = (struct kmem_page_meta *) kmem_meta_range[i].min_address;
2770 		kmem_init_meta_front(meta, i, 0);
2771 
2772 		meta = kmem_addr_to_meta(kmem_ranges[i].max_address, i, &addr,
2773 		    &meta_idx);
2774 		kmem_init_meta_front(meta, i, 1);
2775 	}
2776 }
2777 
2778 __startup_func
2779 static void
kmem_init_front_head(struct kmem_sizeclass * ks,uint32_t front)2780 kmem_init_front_head(
2781 	struct kmem_sizeclass  *ks,
2782 	uint32_t                front)
2783 {
2784 	LIST_INIT(&ks->ks_allfree_head[front]);
2785 	LIST_INIT(&ks->ks_partial_head[front]);
2786 	LIST_INIT(&ks->ks_full_head[front]);
2787 }
2788 
2789 __startup_func
2790 static void
kmem_sizeclass_init(void)2791 kmem_sizeclass_init(void)
2792 {
2793 	for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) {
2794 		struct kmem_sizeclass *ks = &kmem_size_array[i];
2795 		kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST;
2796 
2797 		ks->ks_size = kmem_get_size_from_idx(i);
2798 		ks->ks_num_chunk = roundup(8 * ks->ks_size, KMEM_CHUNK_SIZE_MIN) /
2799 		    KMEM_CHUNK_SIZE_MIN;
2800 		ks->ks_num_elem = (ks->ks_num_chunk * KMEM_CHUNK_SIZE_MIN) / ks->ks_size;
2801 		assert(ks->ks_num_elem <=
2802 		    (sizeof(((struct kmem_page_meta *)0)->km_bitmap) * 8));
2803 		for (; range_id <= KMEM_RANGE_ID_NUM_PTR; range_id++) {
2804 			kmem_init_front_head(ks, kmem_get_front(range_id, 0));
2805 			kmem_init_front_head(ks, kmem_get_front(range_id, 1));
2806 		}
2807 	}
2808 }
2809 
2810 /*
2811  * This is done during EARLY_BOOT as it needs the corecrypto module to be
2812  * set up.
2813  */
2814 __startup_func
2815 static void
kmem_crypto_init(void)2816 kmem_crypto_init(void)
2817 {
2818 	vm_size_t ctx_size = crypto_random_kmem_ctx_size();
2819 
2820 	for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) {
2821 		struct kmem_sizeclass *ks = &kmem_size_array[i];
2822 
2823 		ks->ks_rng_ctx = zalloc_percpu_permanent(ctx_size, ZALIGN_PTR);
2824 		zpercpu_foreach(ctx, ks->ks_rng_ctx) {
2825 			crypto_random_kmem_init(ctx);
2826 		}
2827 	}
2828 }
2829 STARTUP(EARLY_BOOT, STARTUP_RANK_MIDDLE, kmem_crypto_init);
2830 
2831 __abortlike
2832 static void
kmem_validate_slot_panic(vm_map_offset_t addr,struct kmem_page_meta * meta,uint32_t slot_idx,uint32_t size_idx)2833 kmem_validate_slot_panic(
2834 	vm_map_offset_t         addr,
2835 	struct kmem_page_meta  *meta,
2836 	uint32_t                slot_idx,
2837 	uint32_t                size_idx)
2838 {
2839 	if (meta->km_page_marker != KMEM_META_PRIMARY) {
2840 		panic("Metadata (%p) for addr (%p) not primary", meta, (void *)addr);
2841 	}
2842 	if (meta->km_sizeclass != size_idx) {
2843 		panic("Metadata's (%p) sizeclass (%u != %u) changed during deletion",
2844 		    meta, meta->km_sizeclass, size_idx);
2845 	}
2846 	panic("Double free detected: Slot (%u) in meta (%p) for addr %p marked free",
2847 	    slot_idx, meta, (void *)addr);
2848 }
2849 
2850 __abortlike
2851 static void
kmem_invalid_slot_for_addr(mach_vm_range_t slot,vm_map_offset_t start,vm_map_offset_t end)2852 kmem_invalid_slot_for_addr(
2853 	mach_vm_range_t         slot,
2854 	vm_map_offset_t         start,
2855 	vm_map_offset_t         end)
2856 {
2857 	panic("Invalid kmem ptr slot [%p:%p] for allocation [%p:%p]",
2858 	    (void *)slot->min_address, (void *)slot->max_address,
2859 	    (void *)start, (void *)end);
2860 }
2861 
2862 void
kmem_validate_slot(vm_map_offset_t addr,struct kmem_page_meta * meta,uint32_t size_idx,uint32_t slot_idx)2863 kmem_validate_slot(
2864 	vm_map_offset_t         addr,
2865 	struct kmem_page_meta  *meta,
2866 	uint32_t                size_idx,
2867 	uint32_t                slot_idx)
2868 {
2869 	if ((meta->km_page_marker != KMEM_META_PRIMARY) ||
2870 	    (meta->km_sizeclass != size_idx) ||
2871 	    ((meta->km_bitmap & kmem_slot_idx_to_bit(slot_idx, size_idx)) != 0)) {
2872 		kmem_validate_slot_panic(addr, meta, size_idx, slot_idx);
2873 	}
2874 }
2875 
2876 static void
kmem_validate_slot_initial(mach_vm_range_t slot,vm_map_offset_t start,vm_map_offset_t end,struct kmem_page_meta * meta,uint32_t size_idx,uint32_t slot_idx)2877 kmem_validate_slot_initial(
2878 	mach_vm_range_t         slot,
2879 	vm_map_offset_t         start,
2880 	vm_map_offset_t         end,
2881 	struct kmem_page_meta  *meta,
2882 	uint32_t                size_idx,
2883 	uint32_t                slot_idx)
2884 {
2885 	if ((slot->min_address == 0) || (slot->max_address == 0) ||
2886 	    (start < slot->min_address) || (start >= slot->max_address) ||
2887 	    (end > slot->max_address)) {
2888 		kmem_invalid_slot_for_addr(slot, start, end);
2889 	}
2890 
2891 	kmem_validate_slot(start, meta, size_idx, slot_idx);
2892 }
2893 
2894 uint32_t
kmem_addr_get_slot_idx(vm_map_offset_t start,vm_map_offset_t end,vm_map_range_id_t range_id,struct kmem_page_meta ** meta,uint32_t * size_idx,mach_vm_range_t slot)2895 kmem_addr_get_slot_idx(
2896 	vm_map_offset_t         start,
2897 	vm_map_offset_t         end,
2898 	vm_map_range_id_t       range_id,
2899 	struct kmem_page_meta **meta,
2900 	uint32_t               *size_idx,
2901 	mach_vm_range_t         slot)
2902 {
2903 	vm_map_offset_t chunk_start;
2904 	vm_map_size_t slot_size;
2905 	uint32_t slot_idx;
2906 
2907 	*meta = kmem_addr_to_meta_start(start, range_id, &chunk_start);
2908 	*size_idx = (*meta)->km_sizeclass;
2909 	slot_size = kmem_get_size_from_idx(*size_idx);
2910 	slot_idx = (start - chunk_start) / slot_size;
2911 	slot->min_address = chunk_start + slot_idx * slot_size;
2912 	slot->max_address = slot->min_address + slot_size;
2913 
2914 	kmem_validate_slot_initial(slot, start, end, *meta, *size_idx, slot_idx);
2915 
2916 	return slot_idx;
2917 }
2918 
2919 static bool
kmem_populate_needed(vm_offset_t from,vm_offset_t to)2920 kmem_populate_needed(vm_offset_t from, vm_offset_t to)
2921 {
2922 #if KASAN
2923 #pragma unused(from, to)
2924 	return true;
2925 #else
2926 	vm_offset_t page_addr = trunc_page(from);
2927 
2928 	for (; page_addr < to; page_addr += PAGE_SIZE) {
2929 		/*
2930 		 * This can race with another thread doing a populate on the same metadata
2931 		 * page, where we see an updated pmap but unmapped KASan shadow, causing a
2932 		 * fault in the shadow when we first access the metadata page. Avoid this
2933 		 * by always synchronizing on the kmem_meta_lock with KASan.
2934 		 */
2935 		if (!pmap_find_phys(kernel_pmap, page_addr)) {
2936 			return true;
2937 		}
2938 	}
2939 
2940 	return false;
2941 #endif /* !KASAN */
2942 }
2943 
2944 static void
kmem_populate_meta_locked(vm_offset_t from,vm_offset_t to)2945 kmem_populate_meta_locked(vm_offset_t from, vm_offset_t to)
2946 {
2947 	vm_offset_t page_addr = trunc_page(from);
2948 
2949 	vm_map_unlock(kernel_map);
2950 
2951 	for (; page_addr < to; page_addr += PAGE_SIZE) {
2952 		for (;;) {
2953 			kern_return_t ret = KERN_SUCCESS;
2954 
2955 			/*
2956 			 * All updates to kmem metadata are done under the kmem_meta_lock
2957 			 */
2958 			kmem_meta_lock();
2959 			if (0 == pmap_find_phys(kernel_pmap, page_addr)) {
2960 				ret = kernel_memory_populate(page_addr,
2961 				    PAGE_SIZE, KMA_NOPAGEWAIT | KMA_KOBJECT | KMA_ZERO,
2962 				    VM_KERN_MEMORY_OSFMK);
2963 			}
2964 			kmem_meta_unlock();
2965 
2966 			if (ret == KERN_SUCCESS) {
2967 				break;
2968 			}
2969 
2970 			/*
2971 			 * We can't pass KMA_NOPAGEWAIT under a global lock as it leads
2972 			 * to bad system deadlocks, so if the allocation failed,
2973 			 * we need to do the VM_PAGE_WAIT() outside of the lock.
2974 			 */
2975 			VM_PAGE_WAIT();
2976 		}
2977 	}
2978 
2979 	vm_map_lock(kernel_map);
2980 }
2981 
2982 __abortlike
2983 static void
kmem_invalid_meta_panic(struct kmem_page_meta * meta,uint32_t slot_idx,struct kmem_sizeclass sizeclass)2984 kmem_invalid_meta_panic(
2985 	struct kmem_page_meta  *meta,
2986 	uint32_t                slot_idx,
2987 	struct kmem_sizeclass   sizeclass)
2988 {
2989 	uint32_t size_idx = kmem_get_idx_from_size(sizeclass.ks_size);
2990 
2991 	if (slot_idx >= sizeclass.ks_num_elem) {
2992 		panic("Invalid slot idx %u [0:%u] for meta %p", slot_idx,
2993 		    sizeclass.ks_num_elem, meta);
2994 	}
2995 	if (meta->km_sizeclass != size_idx) {
2996 		panic("Invalid size_idx (%u != %u) in meta %p", size_idx,
2997 		    meta->km_sizeclass, meta);
2998 	}
2999 	panic("page_marker %u not primary in meta %p", meta->km_page_marker, meta);
3000 }
3001 
3002 __abortlike
3003 static void
kmem_slot_has_entry_panic(vm_map_entry_t entry,vm_map_offset_t addr)3004 kmem_slot_has_entry_panic(
3005 	vm_map_entry_t          entry,
3006 	vm_map_offset_t         addr)
3007 {
3008 	panic("Entry (%p) already exists for addr (%p) being returned",
3009 	    entry, (void *)addr);
3010 }
3011 
3012 __abortlike
3013 static void
kmem_slot_not_found(struct kmem_page_meta * meta,uint32_t slot_idx)3014 kmem_slot_not_found(
3015 	struct kmem_page_meta  *meta,
3016 	uint32_t                slot_idx)
3017 {
3018 	panic("%uth free slot not found for meta %p bitmap %u", slot_idx, meta,
3019 	    meta->km_bitmap);
3020 }
3021 
3022 /*
3023  * Returns a 16bit random number between 0 and
3024  * upper_limit (inclusive)
3025  */
3026 __startup_func
3027 uint16_t
kmem_get_random16(uint16_t upper_limit)3028 kmem_get_random16(
3029 	uint16_t                upper_limit)
3030 {
3031 	static uint64_t random_entropy;
3032 	assert(upper_limit < UINT16_MAX);
3033 	if (random_entropy == 0) {
3034 		random_entropy = early_random();
3035 	}
3036 	uint32_t result = random_entropy & UINT32_MAX;
3037 	random_entropy >>= 32;
3038 	return (uint16_t)(result % (upper_limit + 1));
3039 }
3040 
3041 static uint32_t
kmem_get_nth_free_slot(struct kmem_page_meta * meta,uint32_t n,uint32_t bitmap)3042 kmem_get_nth_free_slot(
3043 	struct kmem_page_meta  *meta,
3044 	uint32_t                n,
3045 	uint32_t                bitmap)
3046 {
3047 	uint32_t zeros_seen = 0, ones_seen = 0;
3048 
3049 	while (bitmap) {
3050 		uint32_t count = __builtin_ctz(bitmap);
3051 
3052 		zeros_seen += count;
3053 		bitmap >>= count;
3054 		if (__probable(~bitmap)) {
3055 			count = __builtin_ctz(~bitmap);
3056 		} else {
3057 			count = 32;
3058 		}
3059 		if (count + ones_seen > n) {
3060 			return zeros_seen + n;
3061 		}
3062 		ones_seen += count;
3063 		bitmap >>= count;
3064 	}
3065 
3066 	kmem_slot_not_found(meta, n);
3067 }
3068 
3069 
3070 static uint32_t
kmem_get_next_slot(struct kmem_page_meta * meta,struct kmem_sizeclass sizeclass,uint32_t bitmap)3071 kmem_get_next_slot(
3072 	struct kmem_page_meta  *meta,
3073 	struct kmem_sizeclass   sizeclass,
3074 	uint32_t                bitmap)
3075 {
3076 	uint32_t num_slots = __builtin_popcount(bitmap);
3077 	uint64_t slot_idx = 0;
3078 
3079 	assert(num_slots > 0);
3080 	if (__improbable(startup_phase < STARTUP_SUB_EARLY_BOOT)) {
3081 		/*
3082 		 * Use early random prior to early boot as the ks_rng_ctx requires
3083 		 * the corecrypto module to be setup before it is initialized and
3084 		 * used.
3085 		 *
3086 		 * num_slots can't be 0 as we take this path when we have more than
3087 		 * one slot left.
3088 		 */
3089 		slot_idx = kmem_get_random16((uint16_t)num_slots - 1);
3090 	} else {
3091 		crypto_random_uniform(zpercpu_get(sizeclass.ks_rng_ctx), num_slots,
3092 		    &slot_idx);
3093 	}
3094 
3095 	return kmem_get_nth_free_slot(meta, slot_idx, bitmap);
3096 }
3097 
3098 /*
3099  * Returns an unallocated slot from the given metadata
3100  */
3101 static vm_map_offset_t
kmem_get_addr_from_meta(struct kmem_page_meta * meta,vm_map_range_id_t range_id,struct kmem_sizeclass sizeclass,vm_map_entry_t * entry)3102 kmem_get_addr_from_meta(
3103 	struct kmem_page_meta  *meta,
3104 	vm_map_range_id_t       range_id,
3105 	struct kmem_sizeclass   sizeclass,
3106 	vm_map_entry_t         *entry)
3107 {
3108 	vm_map_offset_t addr;
3109 	vm_map_size_t size = sizeclass.ks_size;
3110 	uint32_t size_idx = kmem_get_idx_from_size(size);
3111 	uint64_t meta_idx = meta - kmem_meta_base[range_id];
3112 	mach_vm_offset_t range_start = kmem_ranges[range_id].min_address;
3113 	uint32_t slot_bit;
3114 	uint32_t slot_idx = kmem_get_next_slot(meta, sizeclass, meta->km_bitmap);
3115 
3116 	if ((slot_idx >= sizeclass.ks_num_elem) ||
3117 	    (meta->km_sizeclass != size_idx) ||
3118 	    (meta->km_page_marker != KMEM_META_PRIMARY)) {
3119 		kmem_invalid_meta_panic(meta, slot_idx, sizeclass);
3120 	}
3121 
3122 	slot_bit = kmem_slot_idx_to_bit(slot_idx, size_idx);
3123 	meta->km_bitmap &= ~slot_bit;
3124 
3125 	addr = range_start + (meta_idx * KMEM_CHUNK_SIZE_MIN) + (slot_idx * size);
3126 	assert(kmem_range_contains_fully(range_id, addr, size));
3127 	if (vm_map_lookup_entry(kernel_map, addr, entry)) {
3128 		kmem_slot_has_entry_panic(*entry, addr);
3129 	}
3130 	if ((*entry != vm_map_to_entry(kernel_map)) &&
3131 	    ((*entry)->vme_next != vm_map_to_entry(kernel_map)) &&
3132 	    ((*entry)->vme_next->vme_start < (addr + size))) {
3133 		kmem_slot_has_entry_panic(*entry, addr);
3134 	}
3135 	return addr;
3136 }
3137 
3138 __abortlike
3139 static void
kmem_range_out_of_va(kmem_range_id_t range_id,uint32_t num_chunks)3140 kmem_range_out_of_va(
3141 	kmem_range_id_t         range_id,
3142 	uint32_t                num_chunks)
3143 {
3144 	panic("No more VA to allocate %u chunks in range %u", num_chunks, range_id);
3145 }
3146 
3147 static void
kmem_init_allocated_chunk(struct kmem_page_meta * meta,struct kmem_sizeclass sizeclass,uint32_t size_idx)3148 kmem_init_allocated_chunk(
3149 	struct kmem_page_meta  *meta,
3150 	struct kmem_sizeclass   sizeclass,
3151 	uint32_t                size_idx)
3152 {
3153 	uint32_t meta_num = sizeclass.ks_num_chunk;
3154 	uint32_t num_elem = sizeclass.ks_num_elem;
3155 
3156 	meta->km_bitmap = (1ull << num_elem) - 1;
3157 	meta->km_chunk_len = (uint16_t)meta_num;
3158 	assert(LIST_NEXT(meta, km_link) == NULL);
3159 	assert(meta->km_link.le_prev == NULL);
3160 	meta->km_sizeclass = (uint8_t)size_idx;
3161 	meta->km_page_marker = KMEM_META_PRIMARY;
3162 	meta++;
3163 	for (uint32_t i = 1; i < meta_num; i++) {
3164 		meta->km_page_idx = (uint16_t)i;
3165 		meta->km_sizeclass = (uint8_t)size_idx;
3166 		meta->km_page_marker = 0;
3167 		meta->km_bitmap = 0;
3168 		meta++;
3169 	}
3170 }
3171 
3172 static uint32_t
kmem_get_additional_meta(struct kmem_page_meta * meta,uint32_t meta_req,bool from_right,struct kmem_page_meta ** adj_free_meta)3173 kmem_get_additional_meta(
3174 	struct kmem_page_meta  *meta,
3175 	uint32_t                meta_req,
3176 	bool                    from_right,
3177 	struct kmem_page_meta **adj_free_meta)
3178 {
3179 	struct kmem_page_meta *meta_prev = from_right ? meta : (meta - 1);
3180 
3181 	if (meta_prev->km_page_marker == KMEM_META_FREE) {
3182 		uint32_t chunk_len = kmem_get_free_chunk_len(meta_prev);
3183 
3184 		*adj_free_meta = from_right ? meta_prev : (meta_prev - chunk_len + 1);
3185 		meta_req -= chunk_len;
3186 	} else {
3187 		*adj_free_meta = NULL;
3188 	}
3189 
3190 	return meta_req;
3191 }
3192 
3193 
3194 static struct kmem_page_meta *
kmem_get_new_chunk(vm_map_range_id_t range_id,bool from_right,uint32_t size_idx)3195 kmem_get_new_chunk(
3196 	vm_map_range_id_t       range_id,
3197 	bool                    from_right,
3198 	uint32_t                size_idx)
3199 {
3200 	struct kmem_sizeclass sizeclass = kmem_size_array[size_idx];
3201 	struct kmem_page_meta *start, *end, *meta_update;
3202 	struct kmem_page_meta *adj_free_meta = NULL;
3203 	uint32_t meta_req = sizeclass.ks_num_chunk;
3204 
3205 	for (;;) {
3206 		struct kmem_page_meta *metaf = kmem_meta_hwm[kmem_get_front(range_id, 0)];
3207 		struct kmem_page_meta *metab = kmem_meta_hwm[kmem_get_front(range_id, 1)];
3208 		struct kmem_page_meta *meta;
3209 		vm_offset_t start_addr, end_addr;
3210 		uint32_t meta_num;
3211 
3212 		meta = from_right ? metab : metaf;
3213 		meta_num = kmem_get_additional_meta(meta, meta_req, from_right,
3214 		    &adj_free_meta);
3215 
3216 		if (metaf + meta_num >= metab) {
3217 			kmem_range_out_of_va(range_id, meta_num);
3218 		}
3219 
3220 		start = from_right ? (metab - meta_num) : metaf;
3221 		end = from_right ? metab : (metaf + meta_num);
3222 
3223 		start_addr = (vm_offset_t)start;
3224 		end_addr   = (vm_offset_t)end;
3225 
3226 		/*
3227 		 * If the new high watermark stays on the same page,
3228 		 * no need to populate and drop the lock.
3229 		 */
3230 		if (!page_aligned(from_right ? end_addr : start_addr) &&
3231 		    trunc_page(start_addr) == trunc_page(end_addr - 1)) {
3232 			break;
3233 		}
3234 		if (!kmem_populate_needed(start_addr, end_addr)) {
3235 			break;
3236 		}
3237 
3238 		kmem_populate_meta_locked(start_addr, end_addr);
3239 
3240 		/*
3241 		 * Since we dropped the lock, reassess conditions still hold:
3242 		 * - the HWM we are changing must not have moved
3243 		 * - the other HWM must not intersect with ours
3244 		 * - in case of coalescing, the adjacent free meta must still
3245 		 *   be free and of the same size.
3246 		 *
3247 		 * If we failed to grow, reevaluate whether freelists have
3248 		 * entries now by returning NULL.
3249 		 */
3250 		metaf = kmem_meta_hwm[kmem_get_front(range_id, 0)];
3251 		metab = kmem_meta_hwm[kmem_get_front(range_id, 1)];
3252 		if (meta != (from_right ? metab : metaf)) {
3253 			return NULL;
3254 		}
3255 		if (metaf + meta_num >= metab) {
3256 			kmem_range_out_of_va(range_id, meta_num);
3257 		}
3258 		if (adj_free_meta) {
3259 			if (adj_free_meta->km_page_marker != KMEM_META_FREE ||
3260 			    kmem_get_free_chunk_len(adj_free_meta) !=
3261 			    meta_req - meta_num) {
3262 				return NULL;
3263 			}
3264 		}
3265 
3266 		break;
3267 	}
3268 
3269 	/*
3270 	 * If there is an adjacent free chunk remove it from free list
3271 	 */
3272 	if (adj_free_meta) {
3273 		LIST_REMOVE(adj_free_meta, km_link);
3274 		LIST_NEXT(adj_free_meta, km_link) = NULL;
3275 		adj_free_meta->km_link.le_prev = NULL;
3276 	}
3277 
3278 	/*
3279 	 * Update hwm
3280 	 */
3281 	meta_update = from_right ? start : end;
3282 	kmem_meta_hwm[kmem_get_front(range_id, from_right)] = meta_update;
3283 
3284 	/*
3285 	 * Initialize metadata
3286 	 */
3287 	start = from_right ? start : (end - meta_req);
3288 	kmem_init_allocated_chunk(start, sizeclass, size_idx);
3289 
3290 	return start;
3291 }
3292 
3293 static void
kmem_requeue_meta(struct kmem_page_meta * meta,struct kmem_list_head * head)3294 kmem_requeue_meta(
3295 	struct kmem_page_meta  *meta,
3296 	struct kmem_list_head  *head)
3297 {
3298 	LIST_REMOVE(meta, km_link);
3299 	LIST_INSERT_HEAD(head, meta, km_link);
3300 }
3301 
3302 /*
3303  * Return corresponding sizeclass to stash free chunks in
3304  */
3305 __abortlike
3306 static void
kmem_invalid_chunk_num(uint32_t chunks)3307 kmem_invalid_chunk_num(uint32_t chunks)
3308 {
3309 	panic("Invalid number of chunks %u\n", chunks);
3310 }
3311 
3312 static uint32_t
kmem_get_size_idx_for_chunks(uint32_t chunks)3313 kmem_get_size_idx_for_chunks(uint32_t chunks)
3314 {
3315 	for (uint32_t i = KMEM_NUM_SIZECLASS - 1; i > 0; i--) {
3316 		if (chunks >= kmem_size_array[i].ks_num_chunk) {
3317 			return i;
3318 		}
3319 	}
3320 	kmem_invalid_chunk_num(chunks);
3321 }
3322 
3323 static void
kmem_clear_meta_range(struct kmem_page_meta * meta,uint32_t count)3324 kmem_clear_meta_range(struct kmem_page_meta *meta, uint32_t count)
3325 {
3326 	bzero(meta, count * sizeof(struct kmem_page_meta));
3327 }
3328 
3329 static void
kmem_check_meta_range_is_clear(struct kmem_page_meta * meta,uint32_t count)3330 kmem_check_meta_range_is_clear(struct kmem_page_meta *meta, uint32_t count)
3331 {
3332 #if MACH_ASSERT
3333 	size_t size = count * sizeof(struct kmem_page_meta);
3334 
3335 	assert(memcmp_zero_ptr_aligned(meta, size) == 0);
3336 #else
3337 #pragma unused(meta, count)
3338 #endif
3339 }
3340 
3341 /*!
3342  * @function kmem_init_free_chunk()
3343  *
3344  * @discussion
3345  * This function prepares a range of chunks to be put on a free list.
3346  * The first and last metadata might be dirty, but the "inner" ones
3347  * must be zero filled by the caller prior to calling this function.
3348  */
3349 static void
kmem_init_free_chunk(struct kmem_page_meta * meta,uint32_t num_chunks,uint32_t front)3350 kmem_init_free_chunk(
3351 	struct kmem_page_meta  *meta,
3352 	uint32_t                num_chunks,
3353 	uint32_t                front)
3354 {
3355 	struct kmem_sizeclass *sizeclass;
3356 	uint32_t size_idx = kmem_get_size_idx_for_chunks(num_chunks);
3357 
3358 	if (num_chunks > 2) {
3359 		kmem_check_meta_range_is_clear(meta + 1, num_chunks - 2);
3360 	}
3361 
3362 	meta[0] = (struct kmem_page_meta){
3363 		.km_free_chunks = num_chunks,
3364 		.km_page_marker = KMEM_META_FREE,
3365 		.km_sizeclass   = (uint8_t)size_idx,
3366 	};
3367 	if (num_chunks > 1) {
3368 		meta[num_chunks - 1] = (struct kmem_page_meta){
3369 			.km_free_chunks = num_chunks,
3370 			.km_page_marker = KMEM_META_FREE,
3371 			.km_sizeclass   = (uint8_t)size_idx,
3372 		};
3373 	}
3374 
3375 	sizeclass = &kmem_size_array[size_idx];
3376 	LIST_INSERT_HEAD(&sizeclass->ks_allfree_head[front], meta, km_link);
3377 }
3378 
3379 static struct kmem_page_meta *
kmem_get_free_chunk_from_list(struct kmem_sizeclass * org_sizeclass,uint32_t size_idx,uint32_t front)3380 kmem_get_free_chunk_from_list(
3381 	struct kmem_sizeclass  *org_sizeclass,
3382 	uint32_t                size_idx,
3383 	uint32_t                front)
3384 {
3385 	struct kmem_sizeclass *sizeclass;
3386 	uint32_t num_chunks = org_sizeclass->ks_num_chunk;
3387 	struct kmem_page_meta *meta;
3388 	uint32_t idx = size_idx;
3389 
3390 	while (idx < KMEM_NUM_SIZECLASS) {
3391 		sizeclass = &kmem_size_array[idx];
3392 		meta = LIST_FIRST(&sizeclass->ks_allfree_head[front]);
3393 		if (meta) {
3394 			break;
3395 		}
3396 		idx++;
3397 	}
3398 
3399 	/*
3400 	 * Trim if larger in size
3401 	 */
3402 	if (meta) {
3403 		uint32_t num_chunks_free = kmem_get_free_chunk_len(meta);
3404 
3405 		assert(meta->km_page_marker == KMEM_META_FREE);
3406 		LIST_REMOVE(meta, km_link);
3407 		LIST_NEXT(meta, km_link) = NULL;
3408 		meta->km_link.le_prev = NULL;
3409 		if (num_chunks_free > num_chunks) {
3410 			num_chunks_free -= num_chunks;
3411 			kmem_init_free_chunk(meta + num_chunks, num_chunks_free, front);
3412 		}
3413 
3414 		kmem_init_allocated_chunk(meta, *org_sizeclass, size_idx);
3415 	}
3416 
3417 	return meta;
3418 }
3419 
3420 kern_return_t
kmem_locate_space(vm_map_size_t size,vm_map_range_id_t range_id,bool from_right,vm_map_offset_t * start_inout,vm_map_entry_t * entry_out)3421 kmem_locate_space(
3422 	vm_map_size_t           size,
3423 	vm_map_range_id_t       range_id,
3424 	bool                    from_right,
3425 	vm_map_offset_t        *start_inout,
3426 	vm_map_entry_t         *entry_out)
3427 {
3428 	vm_map_entry_t entry;
3429 	uint32_t size_idx = kmem_get_idx_from_size(size);
3430 	uint32_t front = kmem_get_front(range_id, from_right);
3431 	struct kmem_sizeclass *sizeclass = &kmem_size_array[size_idx];
3432 	struct kmem_page_meta *meta;
3433 
3434 	assert(size <= sizeclass->ks_size);
3435 again:
3436 	if ((meta = LIST_FIRST(&sizeclass->ks_partial_head[front])) != NULL) {
3437 		*start_inout = kmem_get_addr_from_meta(meta, range_id, *sizeclass, &entry);
3438 		/*
3439 		 * Requeue to full if necessary
3440 		 */
3441 		assert(meta->km_page_marker == KMEM_META_PRIMARY);
3442 		if (__builtin_popcount(meta->km_bitmap) == KMEM_NUM_GUARDS) {
3443 			kmem_requeue_meta(meta, &sizeclass->ks_full_head[front]);
3444 		}
3445 	} else if ((meta = kmem_get_free_chunk_from_list(sizeclass, size_idx,
3446 	    front)) != NULL) {
3447 		*start_inout = kmem_get_addr_from_meta(meta, range_id, *sizeclass, &entry);
3448 		/*
3449 		 * Queue to partial
3450 		 */
3451 		assert(meta->km_page_marker == KMEM_META_PRIMARY);
3452 		assert(__builtin_popcount(meta->km_bitmap) > KMEM_NUM_GUARDS);
3453 		LIST_INSERT_HEAD(&sizeclass->ks_partial_head[front], meta, km_link);
3454 	} else {
3455 		meta = kmem_get_new_chunk(range_id, from_right, size_idx);
3456 		if (meta == NULL) {
3457 			goto again;
3458 		}
3459 		*start_inout = kmem_get_addr_from_meta(meta, range_id, *sizeclass, &entry);
3460 		assert(meta->km_page_marker == KMEM_META_PRIMARY);
3461 		LIST_INSERT_HEAD(&sizeclass->ks_partial_head[front], meta, km_link);
3462 	}
3463 
3464 	if (entry_out) {
3465 		*entry_out = entry;
3466 	}
3467 
3468 	return KERN_SUCCESS;
3469 }
3470 
3471 /*
3472  * Determine whether the given metadata was allocated from the right
3473  */
3474 static bool
kmem_meta_is_from_right(kmem_range_id_t range_id,struct kmem_page_meta * meta)3475 kmem_meta_is_from_right(
3476 	kmem_range_id_t         range_id,
3477 	struct kmem_page_meta  *meta)
3478 {
3479 	struct kmem_page_meta *metaf = kmem_meta_hwm[kmem_get_front(range_id, 0)];
3480 	__assert_only struct kmem_page_meta *metab = kmem_meta_hwm[kmem_get_front(range_id, 1)];
3481 	struct kmem_page_meta *meta_base = kmem_meta_base[range_id];
3482 	struct kmem_page_meta *meta_end;
3483 
3484 	meta_end = (struct kmem_page_meta *)kmem_meta_range[range_id].max_address;
3485 
3486 	if ((meta >= meta_base) && (meta < metaf)) {
3487 		return false;
3488 	}
3489 
3490 	assert(meta >= metab && meta < meta_end);
3491 	return true;
3492 }
3493 
3494 static void
kmem_free_chunk(kmem_range_id_t range_id,struct kmem_page_meta * meta,bool from_right)3495 kmem_free_chunk(
3496 	kmem_range_id_t         range_id,
3497 	struct kmem_page_meta  *meta,
3498 	bool                    from_right)
3499 {
3500 	struct kmem_page_meta *meta_coalesce = meta - 1;
3501 	struct kmem_page_meta *meta_start = meta;
3502 	uint32_t num_chunks = kmem_get_chunk_len(meta);
3503 	uint32_t add_chunks;
3504 	struct kmem_page_meta *meta_end = meta + num_chunks;
3505 	struct kmem_page_meta *meta_hwm_l, *meta_hwm_r;
3506 	uint32_t front = kmem_get_front(range_id, from_right);
3507 
3508 	meta_hwm_l = kmem_meta_hwm[kmem_get_front(range_id, 0)];
3509 	meta_hwm_r = kmem_meta_hwm[kmem_get_front(range_id, 1)];
3510 
3511 	LIST_REMOVE(meta, km_link);
3512 	kmem_clear_meta_range(meta, num_chunks);
3513 
3514 	/*
3515 	 * Coalesce left
3516 	 */
3517 	if (((from_right && (meta_coalesce >= meta_hwm_r)) || !from_right) &&
3518 	    (meta_coalesce->km_page_marker == KMEM_META_FREE)) {
3519 		meta_start = meta_coalesce - kmem_get_free_chunk_len(meta_coalesce) + 1;
3520 		add_chunks = kmem_get_free_chunk_len(meta_start);
3521 		num_chunks += add_chunks;
3522 		LIST_REMOVE(meta_start, km_link);
3523 		kmem_clear_meta_range(meta_start + add_chunks - 1, 1);
3524 	}
3525 
3526 	/*
3527 	 * Coalesce right
3528 	 */
3529 	if (((!from_right && (meta_end < meta_hwm_l)) || from_right) &&
3530 	    (meta_end->km_page_marker == KMEM_META_FREE)) {
3531 		add_chunks = kmem_get_free_chunk_len(meta_end);
3532 		LIST_REMOVE(meta_end, km_link);
3533 		kmem_clear_meta_range(meta_end, 1);
3534 		meta_end = meta_end + add_chunks;
3535 		num_chunks += add_chunks;
3536 	}
3537 
3538 	kmem_init_free_chunk(meta_start, num_chunks, front);
3539 }
3540 
3541 static void
kmem_free_slot(kmem_range_id_t range_id,mach_vm_range_t slot)3542 kmem_free_slot(
3543 	kmem_range_id_t         range_id,
3544 	mach_vm_range_t         slot)
3545 {
3546 	struct kmem_page_meta *meta;
3547 	vm_map_offset_t chunk_start;
3548 	uint32_t size_idx, chunk_elem, slot_idx, num_elem;
3549 	struct kmem_sizeclass *sizeclass;
3550 	vm_map_size_t slot_size;
3551 
3552 	meta = kmem_addr_to_meta_start(slot->min_address, range_id, &chunk_start);
3553 	size_idx = meta->km_sizeclass;
3554 	slot_size = kmem_get_size_from_idx(size_idx);
3555 	slot_idx = (slot->min_address - chunk_start) / slot_size;
3556 	assert((meta->km_bitmap & kmem_slot_idx_to_bit(slot_idx, size_idx)) == 0);
3557 	meta->km_bitmap |= kmem_slot_idx_to_bit(slot_idx, size_idx);
3558 
3559 	sizeclass = &kmem_size_array[size_idx];
3560 	chunk_elem = sizeclass->ks_num_elem;
3561 	num_elem = __builtin_popcount(meta->km_bitmap);
3562 
3563 	if (num_elem == chunk_elem) {
3564 		/*
3565 		 * If entire chunk empty add to emtpy list
3566 		 */
3567 		bool from_right = kmem_meta_is_from_right(range_id, meta);
3568 
3569 		kmem_free_chunk(range_id, meta, from_right);
3570 	} else if (num_elem == KMEM_NUM_GUARDS + 1) {
3571 		/*
3572 		 * If we freed to full chunk move it to partial
3573 		 */
3574 		uint32_t front = kmem_get_front(range_id,
3575 		    kmem_meta_is_from_right(range_id, meta));
3576 
3577 		kmem_requeue_meta(meta, &sizeclass->ks_partial_head[front]);
3578 	}
3579 }
3580 
3581 void
kmem_free_space(vm_map_offset_t start,vm_map_offset_t end,vm_map_range_id_t range_id,mach_vm_range_t slot)3582 kmem_free_space(
3583 	vm_map_offset_t         start,
3584 	vm_map_offset_t         end,
3585 	vm_map_range_id_t       range_id,
3586 	mach_vm_range_t         slot)
3587 {
3588 	bool entry_present = false;
3589 	vm_map_entry_t prev_entry;
3590 	vm_map_entry_t next_entry;
3591 
3592 	if ((slot->min_address == start) && (slot->max_address == end)) {
3593 		/*
3594 		 * Entire slot is being freed at once
3595 		 */
3596 		return kmem_free_slot(range_id, slot);
3597 	}
3598 
3599 	entry_present = vm_map_lookup_entry(kernel_map, start, &prev_entry);
3600 	assert(!entry_present);
3601 	next_entry = prev_entry->vme_next;
3602 
3603 	if (((prev_entry == vm_map_to_entry(kernel_map) ||
3604 	    prev_entry->vme_end <= slot->min_address)) &&
3605 	    (next_entry == vm_map_to_entry(kernel_map) ||
3606 	    (next_entry->vme_start >= slot->max_address))) {
3607 		/*
3608 		 * Free entire slot
3609 		 */
3610 		kmem_free_slot(range_id, slot);
3611 	}
3612 }
3613 
3614 #pragma mark kmem init
3615 
3616 /*
3617  * The default percentage of memory that can be mlocked is scaled based on the total
3618  * amount of memory in the system. These percentages are caclulated
3619  * offline and stored in this table. We index this table by
3620  * log2(max_mem) - VM_USER_WIREABLE_MIN_CONFIG. We clamp this index in the range
3621  * [0, sizeof(wire_limit_percents) / sizeof(vm_map_size_t))
3622  *
3623  * Note that these values were picked for mac.
3624  * If we ever have very large memory config arm devices, we may want to revisit
3625  * since the kernel overhead is smaller there due to the larger page size.
3626  */
3627 
3628 /* Start scaling iff we're managing > 2^32 = 4GB of RAM. */
3629 #define VM_USER_WIREABLE_MIN_CONFIG 32
3630 #if CONFIG_JETSAM
3631 /* Systems with jetsam can wire a bit more b/c the system can relieve wired
3632  * pressure.
3633  */
3634 static vm_map_size_t wire_limit_percents[] =
3635 { 80, 80, 80, 80, 82, 85, 88, 91, 94, 97};
3636 #else
3637 static vm_map_size_t wire_limit_percents[] =
3638 { 70, 73, 76, 79, 82, 85, 88, 91, 94, 97};
3639 #endif /* CONFIG_JETSAM */
3640 
3641 /* Set limit to 95% of DRAM if serverperfmode=1 */
3642 #define VM_USER_SERVERPERF_WIRE_LIMIT_PERCENT 95
3643 /* Use special serverperfmode behavior iff DRAM > 2^35 = 32GiB of RAM. */
3644 #define VM_USER_SERVERPERF_WIREABLE_MIN_CONFIG 35
3645 
3646 /*
3647  * Sets the default global user wire limit which limits the amount of
3648  * memory that can be locked via mlock() based on the above algorithm..
3649  * This can be overridden via a sysctl.
3650  */
3651 static void
kmem_set_user_wire_limits(void)3652 kmem_set_user_wire_limits(void)
3653 {
3654 	uint64_t available_mem_log;
3655 	uint64_t max_wire_percent;
3656 	size_t wire_limit_percents_length = sizeof(wire_limit_percents) /
3657 	    sizeof(vm_map_size_t);
3658 	vm_map_size_t limit;
3659 	uint64_t config_memsize = max_mem;
3660 #if defined(XNU_TARGET_OS_OSX)
3661 	config_memsize = max_mem_actual;
3662 #endif /* defined(XNU_TARGET_OS_OSX) */
3663 
3664 	available_mem_log = bit_floor(config_memsize);
3665 
3666 	if (serverperfmode &&
3667 	    (available_mem_log >= VM_USER_SERVERPERF_WIREABLE_MIN_CONFIG)) {
3668 		max_wire_percent = VM_USER_SERVERPERF_WIRE_LIMIT_PERCENT;
3669 	} else {
3670 		if (available_mem_log < VM_USER_WIREABLE_MIN_CONFIG) {
3671 			available_mem_log = 0;
3672 		} else {
3673 			available_mem_log -= VM_USER_WIREABLE_MIN_CONFIG;
3674 		}
3675 		if (available_mem_log >= wire_limit_percents_length) {
3676 			available_mem_log = wire_limit_percents_length - 1;
3677 		}
3678 		max_wire_percent = wire_limit_percents[available_mem_log];
3679 	}
3680 
3681 	limit = config_memsize * max_wire_percent / 100;
3682 	/* Cap the number of non lockable bytes at VM_NOT_USER_WIREABLE_MAX */
3683 	if (config_memsize - limit > VM_NOT_USER_WIREABLE_MAX) {
3684 		limit = config_memsize - VM_NOT_USER_WIREABLE_MAX;
3685 	}
3686 
3687 	vm_global_user_wire_limit = limit;
3688 	/* the default per task limit is the same as the global limit */
3689 	vm_per_task_user_wire_limit = limit;
3690 	vm_add_wire_count_over_global_limit = 0;
3691 	vm_add_wire_count_over_user_limit = 0;
3692 }
3693 
3694 #define KMEM_MAX_CLAIMS 50
3695 __startup_data
3696 struct kmem_range_startup_spec kmem_claims[KMEM_MAX_CLAIMS] = {};
3697 
3698 #if !MACH_ASSERT
3699 __startup_data
3700 #endif /* !MACH_ASSERT */
3701 uint32_t kmem_claim_count = 0;
3702 
3703 #if MACH_ASSERT
3704 /**
3705  * Save off some minimal information about the ranges for consumption by
3706  * post-lockdown tests.
3707  */
3708 static struct mach_vm_range kmem_test_saved_ranges[KMEM_MAX_CLAIMS];
3709 #endif /* MACH_ASSERT */
3710 
3711 /**
3712  * For a requested claim size (i.e. kc_size), get the number of bytes which
3713  * should actually be allocated for a region in order to be able to properly
3714  * provide the requested size (the allocation size).
3715  *
3716  * This allocation size is always greater or equal to the claim size. It can,
3717  * for example, include additional space as required by the kernel memory
3718  * configuration.
3719  *
3720  * @param known_last Is the claim in question known to be the last region after
3721  * all placing has completed? The size for a known_last allocation is always
3722  * less than or equal to a non-known_last allocation of the same size.
3723  */
3724 __startup_func
3725 static vm_map_size_t
kmem_claim_to_allocation_size(vm_map_size_t claim_size,bool known_last)3726 kmem_claim_to_allocation_size(vm_map_size_t claim_size, bool known_last)
3727 {
3728 	(void)known_last;
3729 	/*
3730 	 * Allocation size and claim size are identical.
3731 	 */
3732 	return claim_size;
3733 }
3734 
3735 /**
3736  * Compute the largest claim which can be made from a given allocation size.
3737  */
3738 static vm_map_size_t
kmem_allocation_to_claim_size(vm_map_size_t allocation_size)3739 kmem_allocation_to_claim_size(vm_map_size_t allocation_size)
3740 {
3741 	/*
3742 	 * Allocation size and claim size are identical.
3743 	 */
3744 	return allocation_size;
3745 }
3746 
3747 __startup_func
3748 void
kmem_range_startup_init(struct kmem_range_startup_spec * sp)3749 kmem_range_startup_init(
3750 	struct kmem_range_startup_spec *sp)
3751 {
3752 	assert(kmem_claim_count < KMEM_MAX_CLAIMS - KMEM_RANGE_COUNT);
3753 	if (sp->kc_calculate_sz) {
3754 		sp->kc_size = (sp->kc_calculate_sz)();
3755 	}
3756 	if (sp->kc_size) {
3757 		kmem_claims[kmem_claim_count] = *sp;
3758 		kmem_claim_count++;
3759 	}
3760 }
3761 
3762 static vm_offset_t
kmem_fuzz_start(void)3763 kmem_fuzz_start(void)
3764 {
3765 	vm_offset_t kmapoff_kaddr = 0;
3766 	uint32_t kmapoff_pgcnt;
3767 
3768 	kmapoff_pgcnt = (early_random() & 0x1ff) + 1; /* 9 bits */
3769 
3770 	vm_map_size_t kmapoff_size = ptoa(kmapoff_pgcnt);
3771 
3772 	kmem_alloc(kernel_map, &kmapoff_kaddr, kmapoff_size,
3773 	    KMA_NOFAIL | KMA_KOBJECT | KMA_PERMANENT | KMA_VAONLY,
3774 	    VM_KERN_MEMORY_OSFMK);
3775 
3776 
3777 	return kmapoff_kaddr + kmapoff_size;
3778 }
3779 
3780 /*
3781  * Generate a randomly shuffled array of indices from 0 to count - 1
3782  */
3783 __startup_func
3784 void
kmem_shuffle(uint16_t * shuffle_buf,uint16_t count)3785 kmem_shuffle(
3786 	uint16_t       *shuffle_buf,
3787 	uint16_t        count)
3788 {
3789 	for (uint16_t i = 0; i < count; i++) {
3790 		uint16_t j = kmem_get_random16(i);
3791 		if (j != i) {
3792 			shuffle_buf[i] = shuffle_buf[j];
3793 		}
3794 		shuffle_buf[j] = i;
3795 	}
3796 }
3797 
3798 __startup_func
3799 static void
kmem_shuffle_claims(void)3800 kmem_shuffle_claims(void)
3801 {
3802 	uint16_t shuffle_buf[KMEM_MAX_CLAIMS] = {};
3803 	uint16_t limit = (uint16_t)kmem_claim_count;
3804 
3805 	kmem_shuffle(&shuffle_buf[0], limit);
3806 	for (uint16_t i = 0; i < limit; i++) {
3807 		struct kmem_range_startup_spec tmp = kmem_claims[i];
3808 		kmem_claims[i] = kmem_claims[shuffle_buf[i]];
3809 		kmem_claims[shuffle_buf[i]] = tmp;
3810 	}
3811 }
3812 
3813 __startup_func
3814 static void
kmem_readjust_ranges(uint32_t cur_idx)3815 kmem_readjust_ranges(
3816 	uint32_t        cur_idx)
3817 {
3818 	assert(cur_idx != 0);
3819 	uint32_t j = cur_idx - 1, random;
3820 	struct kmem_range_startup_spec sp = kmem_claims[cur_idx];
3821 	struct mach_vm_range *sp_range = sp.kc_range;
3822 	/*
3823 	 * Even if sp is currently last, it will never be last after it is moved.
3824 	 * As such, we want to bump other claims over it and include any necessary
3825 	 * padding for a non-last claim.
3826 	 *
3827 	 * While changing which claim is last can impact the total VA usage, since a
3828 	 * known_last allocation size is guaranteed to always be less-than-or-equal
3829 	 * to a non-known_last allocation (which is used for pre-placement sizing),
3830 	 * we will always have enough space so long as the pre-placement sizing had
3831 	 * enough space.
3832 	 */
3833 	vm_map_offset_t sp_allocation_size =
3834 	    kmem_claim_to_allocation_size(sp.kc_size, /* known_last */ false);
3835 
3836 	/*
3837 	 * Find max index where restriction is met
3838 	 */
3839 	for (; j > 0; j--) {
3840 		struct kmem_range_startup_spec spj = kmem_claims[j];
3841 		vm_map_offset_t max_start = spj.kc_range->min_address;
3842 		if (spj.kc_flags & KC_NO_MOVE) {
3843 			panic("kmem_range_init: Can't scramble with multiple constraints");
3844 		}
3845 		if (max_start <= sp_range->min_address) {
3846 			break;
3847 		}
3848 	}
3849 
3850 	/*
3851 	 * Pick a random index from 0 to max index and shift claims to the right
3852 	 * to make room for restricted claim
3853 	 */
3854 	random = kmem_get_random16((uint16_t)j);
3855 	assert(random <= j);
3856 
3857 	sp_range->min_address = kmem_claims[random].kc_range->min_address;
3858 	sp_range->max_address = sp_range->min_address + sp.kc_size;
3859 
3860 	for (j = cur_idx - 1; j >= random && j != UINT32_MAX; j--) {
3861 		struct kmem_range_startup_spec spj = kmem_claims[j];
3862 		struct mach_vm_range *range = spj.kc_range;
3863 		range->min_address += sp_allocation_size;
3864 		range->max_address += sp_allocation_size;
3865 		kmem_claims[j + 1] = spj;
3866 	}
3867 
3868 	sp.kc_flags |= KC_NO_MOVE;
3869 	kmem_claims[random] = sp;
3870 }
3871 
3872 __startup_func
3873 static void
kmem_add_ptr_claims(void)3874 kmem_add_ptr_claims(void)
3875 {
3876 	uint64_t kmem_meta_num, kmem_ptr_chunks;
3877 	vm_map_size_t org_ptr_range_size __assert_only;
3878 
3879 	org_ptr_range_size = ptr_range_size;
3880 
3881 	ptr_range_size -= PAGE_SIZE;
3882 	ptr_range_size *= KMEM_CHUNK_SIZE_MIN;
3883 	ptr_range_size /= (KMEM_CHUNK_SIZE_MIN + sizeof(struct kmem_page_meta));
3884 
3885 	kmem_ptr_chunks = ptr_range_size / KMEM_CHUNK_SIZE_MIN;
3886 	ptr_range_size = kmem_ptr_chunks * KMEM_CHUNK_SIZE_MIN;
3887 
3888 	kmem_meta_num = kmem_ptr_chunks + 2;
3889 	kmem_meta_size = round_page(kmem_meta_num * sizeof(struct kmem_page_meta));
3890 
3891 	assert(kmem_meta_size + ptr_range_size <= org_ptr_range_size);
3892 	/*
3893 	 * Add claims for kmem's ranges
3894 	 */
3895 	for (uint32_t i = 0; i < kmem_ptr_ranges; i++) {
3896 		struct kmem_range_startup_spec kmem_spec = {
3897 			.kc_name = "kmem_ptr_range",
3898 			.kc_range = &kmem_ranges[KMEM_RANGE_ID_PTR_0 + i],
3899 			.kc_size = ptr_range_size,
3900 			.kc_flags = KC_NO_ENTRY,
3901 		};
3902 		kmem_claims[kmem_claim_count++] = kmem_spec;
3903 
3904 		struct kmem_range_startup_spec kmem_meta_spec = {
3905 			.kc_name = "kmem_ptr_range_meta",
3906 			.kc_range = &kmem_meta_range[KMEM_RANGE_ID_PTR_0 + i],
3907 			.kc_size = kmem_meta_size,
3908 			.kc_flags = KC_NONE,
3909 		};
3910 		kmem_claims[kmem_claim_count++] = kmem_meta_spec;
3911 	}
3912 }
3913 
3914 __startup_func
3915 static void
kmem_add_extra_claims(void)3916 kmem_add_extra_claims(void)
3917 {
3918 	vm_map_size_t largest_free_size = 0, total_claims = 0;
3919 	vm_map_size_t sane_sprayqtn_size = 0, sprayqtn_allocation_size = 0;
3920 	vm_map_size_t ptr_total_allocation_size = 0;
3921 
3922 	vm_map_sizes(kernel_map, NULL, NULL, &largest_free_size);
3923 	largest_free_size = trunc_page(largest_free_size);
3924 
3925 	/*
3926 	 * kasan and configs w/o *TRR need to have just one ptr range due to
3927 	 * resource constraints.
3928 	 */
3929 #if !ZSECURITY_CONFIG(KERNEL_PTR_SPLIT)
3930 	kmem_ptr_ranges = 1;
3931 #endif
3932 	/*
3933 	 * Determine size of data and pointer kmem_ranges
3934 	 */
3935 	for (uint32_t i = 0; i < kmem_claim_count; i++) {
3936 		struct kmem_range_startup_spec sp_i = kmem_claims[i];
3937 
3938 		total_claims += kmem_claim_to_allocation_size(
3939 			sp_i.kc_size, /* known_last */ false);
3940 	}
3941 	assert((total_claims & PAGE_MASK) == 0);
3942 
3943 
3944 	largest_free_size -= total_claims;
3945 
3946 	/*
3947 	 * Use half the total available VA for all pointer allocations (this
3948 	 * includes the kmem_sprayqtn range). Given that we have 4 total
3949 	 * ranges divide the available VA by 8.
3950 	 */
3951 	ptr_range_size = largest_free_size / ((kmem_ptr_ranges + 1) * 2);
3952 
3953 	sprayqtn_range_size = ptr_range_size;
3954 	sane_sprayqtn_size = kmem_claim_to_allocation_size(
3955 		/* claim_size */ sane_size / 2, /* known_last */ false);
3956 	if (sprayqtn_range_size > sane_sprayqtn_size) {
3957 		vm_map_size_t sprayqtn_extra;
3958 
3959 		/*
3960 		 * Spray quarantine doesn't need that much space.
3961 		 * Shrink it to something reasonable and equally share the leftover VA
3962 		 * with the other pointer ranges.
3963 		 */
3964 		sprayqtn_extra = sprayqtn_range_size - sane_sprayqtn_size;
3965 		sprayqtn_range_size -= sprayqtn_extra;
3966 		ptr_range_size += sprayqtn_extra / kmem_ptr_ranges;
3967 	}
3968 
3969 	ptr_range_size = round_page(ptr_range_size);
3970 	sprayqtn_range_size = round_page(sprayqtn_range_size);
3971 
3972 	/* Less any necessary allocation padding... */
3973 	ptr_range_size = kmem_allocation_to_claim_size(ptr_range_size);
3974 	sprayqtn_range_size = kmem_allocation_to_claim_size(sprayqtn_range_size);
3975 
3976 	/*
3977 	 * Add the pointer and metadata claims
3978 	 * Note: this call modifies ptr_range_size and may, depending on the padding
3979 	 * requirements, slightly increase or decrease the overall allocation size
3980 	 * of the pointer+metadata region.
3981 	 */
3982 	kmem_add_ptr_claims();
3983 
3984 	sprayqtn_allocation_size = kmem_claim_to_allocation_size(
3985 		sprayqtn_range_size, /* known_last */ false);
3986 	ptr_total_allocation_size =
3987 	    (kmem_claim_to_allocation_size(ptr_range_size, /* known_last */ false) +
3988 	    kmem_claim_to_allocation_size(kmem_meta_size, /* known_last */ false)) *
3989 	    kmem_ptr_ranges;
3990 
3991 	/*
3992 	 * Check: spray and ptr_range are minimally valid.
3993 	 * This is a useful assert as it should catch us if we were to end up with a
3994 	 * "negative" (or extremely large) data_range_size.
3995 	 */
3996 	assert(sprayqtn_allocation_size + ptr_total_allocation_size < largest_free_size);
3997 
3998 	/*
3999 	 * Finally, give any remaining allocable space to the data region.
4000 	 */
4001 	data_range_size = largest_free_size - sprayqtn_allocation_size -
4002 	    ptr_total_allocation_size;
4003 
4004 	/* Less any necessary allocation padding... */
4005 	data_range_size = kmem_allocation_to_claim_size(data_range_size);
4006 
4007 	/* Check: our allocations should all still fit in the free space */
4008 	assert(sprayqtn_allocation_size + ptr_total_allocation_size +
4009 	    kmem_claim_to_allocation_size(data_range_size, /* known_last */ false) <=
4010 	    largest_free_size);
4011 
4012 	struct kmem_range_startup_spec kmem_spec_sprayqtn = {
4013 		.kc_name = "kmem_sprayqtn_range",
4014 		.kc_range = &kmem_ranges[KMEM_RANGE_ID_SPRAYQTN],
4015 		.kc_size = sprayqtn_range_size,
4016 		.kc_flags = KC_NO_ENTRY,
4017 	};
4018 	kmem_claims[kmem_claim_count++] = kmem_spec_sprayqtn;
4019 
4020 	struct kmem_range_startup_spec kmem_spec_data = {
4021 		.kc_name = "kmem_data_range",
4022 		.kc_range = &kmem_ranges[KMEM_RANGE_ID_DATA],
4023 		.kc_size = data_range_size,
4024 		.kc_flags = KC_NO_ENTRY,
4025 	};
4026 	kmem_claims[kmem_claim_count++] = kmem_spec_data;
4027 }
4028 
4029 __startup_func
4030 static void
kmem_scramble_ranges(void)4031 kmem_scramble_ranges(void)
4032 {
4033 	vm_map_offset_t va_alloc_head = 0;
4034 
4035 	/*
4036 	 * Initiatize KMEM_RANGE_ID_NONE range to use the entire map so that
4037 	 * the vm can find the requested ranges.
4038 	 */
4039 	kmem_ranges[KMEM_RANGE_ID_NONE].min_address = MAX(kernel_map->min_offset,
4040 	    VM_MAP_PAGE_SIZE(kernel_map));
4041 	kmem_ranges[KMEM_RANGE_ID_NONE].max_address = kernel_map->max_offset;
4042 
4043 	/*
4044 	 * Allocating the g_kext_map prior to randomizing the remaining submaps as
4045 	 * this map is 2G in size and starts at the end of kernel_text on x86. It
4046 	 * could overflow into the heap.
4047 	 */
4048 	kext_alloc_init();
4049 
4050 	/*
4051 	 * Eat a random amount of kernel_map to fuzz subsequent heap, zone and
4052 	 * stack addresses. (With a 4K page and 9 bits of randomness, this
4053 	 * eats about 2M of VA from the map)
4054 	 *
4055 	 * Note that we always need to slide by at least one page because the VM
4056 	 * pointer packing schemes using KERNEL_PMAP_HEAP_RANGE_START as a base
4057 	 * do not admit this address to be part of any zone submap.
4058 	 */
4059 	va_alloc_head = kmem_fuzz_start();
4060 
4061 	/*
4062 	 * Add claims for ptr and data kmem_ranges
4063 	 */
4064 	kmem_add_extra_claims();
4065 
4066 	/*
4067 	 * Minimally verify that our placer will be able to resolve the constraints
4068 	 * of all claims
4069 	 */
4070 	bool has_min_address = false;
4071 	for (uint32_t i = 0; i < kmem_claim_count; i++) {
4072 		struct kmem_range_startup_spec sp_i = kmem_claims[i];
4073 
4074 		/* Verify that we have only one claim with a min address constraint */
4075 		if (sp_i.kc_range->min_address) {
4076 			if (has_min_address) {
4077 				panic("Cannot place with multiple min_address constraints");
4078 			} else {
4079 				has_min_address = true;
4080 			}
4081 		}
4082 
4083 		if (sp_i.kc_range->max_address) {
4084 			panic("Cannot place with a max_address constraint");
4085 		}
4086 	}
4087 
4088 
4089 	/*
4090 	 * Shuffle registered claims
4091 	 */
4092 	assert(kmem_claim_count < UINT16_MAX);
4093 	kmem_shuffle_claims();
4094 
4095 	/*
4096 	 * Apply restrictions and determine range for each claim
4097 	 */
4098 	for (uint32_t i = 0; i < kmem_claim_count; i++) {
4099 		struct kmem_range_startup_spec sp = kmem_claims[i];
4100 		struct mach_vm_range *sp_range = sp.kc_range;
4101 
4102 		/*
4103 		 * Find space using the allocation size (rather than the claim size) in
4104 		 * order to ensure we provide any applicable padding.
4105 		 */
4106 		bool is_last = (i == kmem_claim_count - 1);
4107 		vm_map_offset_t sp_allocation_size =
4108 		    kmem_claim_to_allocation_size(sp.kc_size, is_last);
4109 
4110 		if (vm_map_locate_space_anywhere(kernel_map, sp_allocation_size, 0,
4111 		    VM_MAP_KERNEL_FLAGS_ANYWHERE(.vmkf_no_soft_limit = true),
4112 		    &va_alloc_head, NULL) != KERN_SUCCESS) {
4113 			panic("kmem_range_init: vm_map_locate_space failing for claim %s, "
4114 			    "size 0x%llx",
4115 			    sp.kc_name, sp_allocation_size);
4116 		}
4117 
4118 		/*
4119 		 * Re-adjust ranges if restriction not met
4120 		 */
4121 		if (sp_range->min_address && va_alloc_head > sp_range->min_address) {
4122 			kmem_readjust_ranges(i);
4123 		} else {
4124 			/*
4125 			 * Though the actual allocated space may be larger, provide only the
4126 			 * size requested by the original claim.
4127 			 */
4128 			sp_range->min_address = va_alloc_head;
4129 			sp_range->max_address = va_alloc_head + sp.kc_size;
4130 		}
4131 
4132 		va_alloc_head += sp_allocation_size;
4133 	}
4134 
4135 	/*
4136 	 * We have settled on the ranges, now create temporary entries for the
4137 	 * claims
4138 	 */
4139 	for (uint32_t i = 0; i < kmem_claim_count; i++) {
4140 		struct kmem_range_startup_spec sp = kmem_claims[i];
4141 		bool is_last = (i == kmem_claim_count - 1);
4142 		vm_map_offset_t sp_allocation_size =
4143 		    kmem_claim_to_allocation_size(sp.kc_size, is_last);
4144 		vm_map_entry_t entry = NULL;
4145 		if (sp.kc_flags & KC_NO_ENTRY) {
4146 			continue;
4147 		}
4148 
4149 
4150 		/*
4151 		 * We reserve the full allocation size (rather than the claim size) so
4152 		 * that nothing ends up placed in the padding space (if applicable).
4153 		 */
4154 		if (vm_map_find_space(kernel_map, sp.kc_range->min_address,
4155 		    sp_allocation_size, 0,
4156 		    VM_MAP_KERNEL_FLAGS_ANYWHERE(.vmkf_no_soft_limit = true),
4157 		    &entry) != KERN_SUCCESS) {
4158 			panic("kmem_range_init: vm_map_find_space failing for claim %s",
4159 			    sp.kc_name);
4160 		}
4161 		vm_object_reference(kernel_object_default);
4162 		VME_OBJECT_SET(entry, kernel_object_default, false, 0);
4163 		VME_OFFSET_SET(entry, entry->vme_start);
4164 		vm_map_unlock(kernel_map);
4165 	}
4166 
4167 	/*
4168 	 * Now that we are done assigning all the ranges, reset
4169 	 * kmem_ranges[KMEM_RANGE_ID_NONE]
4170 	 */
4171 	kmem_ranges[KMEM_RANGE_ID_NONE] = (struct mach_vm_range) {};
4172 
4173 #if DEBUG || DEVELOPMENT
4174 	for (uint32_t i = 0; i < kmem_claim_count; i++) {
4175 		struct kmem_range_startup_spec sp = kmem_claims[i];
4176 
4177 		printf("%-24s: %p - %p (%u%c)\n", sp.kc_name,
4178 		    (void *)sp.kc_range->min_address,
4179 		    (void *)sp.kc_range->max_address,
4180 		    mach_vm_size_pretty(sp.kc_size),
4181 		    mach_vm_size_unit(sp.kc_size));
4182 	}
4183 #endif /* DEBUG || DEVELOPMENT */
4184 
4185 #if MACH_ASSERT
4186 	/*
4187 	 * Since many parts of the claim infrastructure are marked as startup data
4188 	 * (and are thus unavailable post-lockdown), save off information our tests
4189 	 * need now.
4190 	 */
4191 	for (uint32_t i = 0; i < kmem_claim_count; i++) {
4192 		kmem_test_saved_ranges[i] = *(kmem_claims[i].kc_range);
4193 	}
4194 #endif /* MACH_ASSERT */
4195 }
4196 
4197 __startup_func
4198 static void
kmem_range_init(void)4199 kmem_range_init(void)
4200 {
4201 	vm_size_t range_adjustment;
4202 
4203 	kmem_scramble_ranges();
4204 
4205 	range_adjustment = sprayqtn_range_size >> 3;
4206 	kmem_large_ranges[KMEM_RANGE_ID_SPRAYQTN].min_address =
4207 	    kmem_ranges[KMEM_RANGE_ID_SPRAYQTN].min_address + range_adjustment;
4208 	kmem_large_ranges[KMEM_RANGE_ID_SPRAYQTN].max_address =
4209 	    kmem_ranges[KMEM_RANGE_ID_SPRAYQTN].max_address;
4210 
4211 	range_adjustment = data_range_size >> 3;
4212 	kmem_large_ranges[KMEM_RANGE_ID_DATA].min_address =
4213 	    kmem_ranges[KMEM_RANGE_ID_DATA].min_address + range_adjustment;
4214 	kmem_large_ranges[KMEM_RANGE_ID_DATA].max_address =
4215 	    kmem_ranges[KMEM_RANGE_ID_DATA].max_address;
4216 
4217 	pmap_init();
4218 	kmem_metadata_init();
4219 	kmem_sizeclass_init();
4220 
4221 #if DEBUG || DEVELOPMENT
4222 	for (kmem_range_id_t i = 1; i < KMEM_RANGE_COUNT; i++) {
4223 		vm_size_t range_size = mach_vm_range_size(&kmem_large_ranges[i]);
4224 		printf("kmem_large_ranges[%d]    : %p - %p (%u%c)\n", i,
4225 		    (void *)kmem_large_ranges[i].min_address,
4226 		    (void *)kmem_large_ranges[i].max_address,
4227 		    mach_vm_size_pretty(range_size),
4228 		    mach_vm_size_unit(range_size));
4229 	}
4230 #endif
4231 }
4232 STARTUP(KMEM, STARTUP_RANK_THIRD, kmem_range_init);
4233 
4234 #if DEBUG || DEVELOPMENT
4235 __startup_func
4236 static void
kmem_log_init(void)4237 kmem_log_init(void)
4238 {
4239 	/*
4240 	 * Log can only be created after the the kmem subsystem is initialized as
4241 	 * btlog creation uses kmem
4242 	 */
4243 	kmem_outlier_log = btlog_create(BTLOG_LOG, KMEM_OUTLIER_LOG_SIZE, 0);
4244 }
4245 STARTUP(ZALLOC, STARTUP_RANK_FIRST, kmem_log_init);
4246 
4247 kmem_gobj_stats
kmem_get_gobj_stats(void)4248 kmem_get_gobj_stats(void)
4249 {
4250 	kmem_gobj_stats stats = {};
4251 
4252 	vm_map_lock(kernel_map);
4253 	for (uint8_t i = 0; i < kmem_ptr_ranges; i++) {
4254 		kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST + i;
4255 		struct mach_vm_range range = kmem_ranges[range_id];
4256 		struct kmem_page_meta *meta = kmem_meta_hwm[kmem_get_front(range_id, 0)];
4257 		struct kmem_page_meta *meta_end;
4258 		uint64_t meta_idx = meta - kmem_meta_base[range_id];
4259 		vm_map_size_t used = 0, va = 0, meta_sz = 0, pte_sz = 0;
4260 		vm_map_offset_t addr;
4261 		vm_map_entry_t entry;
4262 
4263 		/*
4264 		 * Left front
4265 		 */
4266 		va = (meta_idx * KMEM_CHUNK_SIZE_MIN);
4267 		meta_sz = round_page(meta_idx * sizeof(struct kmem_page_meta));
4268 
4269 		/*
4270 		 * Right front
4271 		 */
4272 		meta = kmem_meta_hwm[kmem_get_front(range_id, 1)];
4273 		meta_end = kmem_addr_to_meta(range.max_address, range_id, &addr,
4274 		    &meta_idx);
4275 		meta_idx = meta_end - meta;
4276 		meta_sz += round_page(meta_idx * sizeof(struct kmem_page_meta));
4277 		va += (meta_idx * KMEM_CHUNK_SIZE_MIN);
4278 
4279 		/*
4280 		 * Compute VA allocated in entire range
4281 		 */
4282 		if (vm_map_lookup_entry(kernel_map, range.min_address, &entry) == false) {
4283 			entry = entry->vme_next;
4284 		}
4285 		while (entry != vm_map_to_entry(kernel_map) &&
4286 		    entry->vme_start < range.max_address) {
4287 			used += (entry->vme_end - entry->vme_start);
4288 			entry = entry->vme_next;
4289 		}
4290 
4291 		pte_sz = round_page(atop(va - used) * 8);
4292 
4293 		stats.total_used += used;
4294 		stats.total_va += va;
4295 		stats.pte_sz += pte_sz;
4296 		stats.meta_sz += meta_sz;
4297 	}
4298 	vm_map_unlock(kernel_map);
4299 
4300 	return stats;
4301 }
4302 
4303 #endif /* DEBUG || DEVELOPMENT */
4304 
4305 /*
4306  *	kmem_init:
4307  *
4308  *	Initialize the kernel's virtual memory map, taking
4309  *	into account all memory allocated up to this time.
4310  */
4311 __startup_func
4312 void
kmem_init(vm_offset_t start,vm_offset_t end)4313 kmem_init(
4314 	vm_offset_t     start,
4315 	vm_offset_t     end)
4316 {
4317 	vm_map_offset_t map_start;
4318 	vm_map_offset_t map_end;
4319 
4320 	map_start = vm_map_trunc_page(start,
4321 	    VM_MAP_PAGE_MASK(kernel_map));
4322 	map_end = vm_map_round_page(end,
4323 	    VM_MAP_PAGE_MASK(kernel_map));
4324 
4325 	vm_map_will_allocate_early_map(&kernel_map);
4326 #if defined(__arm64__)
4327 	kernel_map = vm_map_create_options(pmap_kernel(),
4328 	    VM_MIN_KERNEL_AND_KEXT_ADDRESS,
4329 	    VM_MAX_KERNEL_ADDRESS,
4330 	    VM_MAP_CREATE_DEFAULT);
4331 	/*
4332 	 *	Reserve virtual memory allocated up to this time.
4333 	 */
4334 	{
4335 		unsigned int    region_select = 0;
4336 		vm_map_offset_t region_start;
4337 		vm_map_size_t   region_size;
4338 		vm_map_offset_t map_addr;
4339 		kern_return_t kr;
4340 
4341 		while (pmap_virtual_region(region_select, &region_start, &region_size)) {
4342 			map_addr = region_start;
4343 			kr = vm_map_enter(kernel_map, &map_addr,
4344 			    vm_map_round_page(region_size,
4345 			    VM_MAP_PAGE_MASK(kernel_map)),
4346 			    (vm_map_offset_t) 0,
4347 			    VM_MAP_KERNEL_FLAGS_FIXED_PERMANENT(
4348 				    .vmkf_no_pmap_check = true,
4349 				    .vmkf_no_soft_limit = true),
4350 			    VM_OBJECT_NULL,
4351 			    (vm_object_offset_t) 0, FALSE, VM_PROT_NONE, VM_PROT_NONE,
4352 			    VM_INHERIT_DEFAULT);
4353 
4354 			if (kr != KERN_SUCCESS) {
4355 				panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
4356 				    (uint64_t) start, (uint64_t) end, (uint64_t) region_start,
4357 				    (uint64_t) region_size, kr);
4358 			}
4359 
4360 			region_select++;
4361 		}
4362 	}
4363 #else
4364 	kernel_map = vm_map_create_options(pmap_kernel(),
4365 	    VM_MIN_KERNEL_AND_KEXT_ADDRESS, map_end,
4366 	    VM_MAP_CREATE_DEFAULT);
4367 	/*
4368 	 *	Reserve virtual memory allocated up to this time.
4369 	 */
4370 	if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
4371 		vm_map_offset_t map_addr;
4372 		kern_return_t kr;
4373 
4374 		map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
4375 		kr = vm_map_enter(kernel_map,
4376 		    &map_addr,
4377 		    (vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
4378 		    (vm_map_offset_t) 0,
4379 		    VM_MAP_KERNEL_FLAGS_FIXED(.vmkf_no_pmap_check = true),
4380 		    VM_OBJECT_NULL,
4381 		    (vm_object_offset_t) 0, FALSE,
4382 		    VM_PROT_NONE, VM_PROT_NONE,
4383 		    VM_INHERIT_DEFAULT);
4384 
4385 		if (kr != KERN_SUCCESS) {
4386 			panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
4387 			    (uint64_t) start, (uint64_t) end,
4388 			    (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS,
4389 			    (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
4390 			    kr);
4391 		}
4392 	}
4393 #endif
4394 
4395 	kmem_set_user_wire_limits();
4396 }
4397 
4398 
4399 #pragma mark map copyio
4400 static inline void
current_thread_set_sec_override(bool val)4401 current_thread_set_sec_override(bool val)
4402 {
4403 #pragma unused(val)
4404 }
4405 
4406 /*
4407  * Note: semantic types aren't used as `copyio` already validates.
4408  */
4409 
4410 kern_return_t
copyinmap(vm_map_t map,vm_map_offset_t fromaddr,void * todata,vm_size_t length)4411 copyinmap(
4412 	vm_map_t                map,
4413 	vm_map_offset_t         fromaddr,
4414 	void                   *todata,
4415 	vm_size_t               length)
4416 {
4417 	kern_return_t kr = KERN_SUCCESS;
4418 	vm_map_switch_context_t switch_ctx;
4419 
4420 	if (vm_map_pmap(map) == pmap_kernel()) {
4421 		/* assume a correct copy */
4422 		memcpy(todata, CAST_DOWN(void *, fromaddr), length);
4423 	} else if (current_map() == map) {
4424 		if (copyin(fromaddr, todata, length) != 0) {
4425 			kr = KERN_INVALID_ADDRESS;
4426 		}
4427 	} else {
4428 		vm_map_reference(map);
4429 		current_thread_set_sec_override(true);
4430 		switch_ctx = vm_map_switch_to(map);
4431 		if (copyin(fromaddr, todata, length) != 0) {
4432 			kr = KERN_INVALID_ADDRESS;
4433 		}
4434 		current_thread_set_sec_override(false);
4435 		vm_map_switch_back(switch_ctx);
4436 		vm_map_deallocate(map);
4437 	}
4438 	return kr;
4439 }
4440 
4441 kern_return_t
copyoutmap(vm_map_t map,void * fromdata,vm_map_address_t toaddr,vm_size_t length)4442 copyoutmap(
4443 	vm_map_t                map,
4444 	void                   *fromdata,
4445 	vm_map_address_t        toaddr,
4446 	vm_size_t               length)
4447 {
4448 	kern_return_t kr = KERN_SUCCESS;
4449 	vm_map_switch_context_t switch_ctx;
4450 
4451 	if (vm_map_pmap(map) == pmap_kernel()) {
4452 		/* assume a correct copy */
4453 		memcpy(CAST_DOWN(void *, toaddr), fromdata, length);
4454 	} else if (current_map() == map) {
4455 		if (copyout(fromdata, toaddr, length) != 0) {
4456 			ktriage_record(thread_tid(current_thread()),
4457 			    KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
4458 			    KDBG_TRIAGE_RESERVED,
4459 			    KDBG_TRIAGE_VM_COPYOUTMAP_SAMEMAP_ERROR),
4460 			    KERN_INVALID_ADDRESS /* arg */);
4461 			kr = KERN_INVALID_ADDRESS;
4462 		}
4463 	} else {
4464 		vm_map_reference(map);
4465 		current_thread_set_sec_override(true);
4466 		switch_ctx = vm_map_switch_to(map);
4467 		if (copyout(fromdata, toaddr, length) != 0) {
4468 			ktriage_record(thread_tid(current_thread()),
4469 			    KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
4470 			    KDBG_TRIAGE_RESERVED,
4471 			    KDBG_TRIAGE_VM_COPYOUTMAP_DIFFERENTMAP_ERROR),
4472 			    KERN_INVALID_ADDRESS /* arg */);
4473 			kr = KERN_INVALID_ADDRESS;
4474 		}
4475 		current_thread_set_sec_override(false);
4476 		vm_map_switch_back(switch_ctx);
4477 		vm_map_deallocate(map);
4478 	}
4479 	return kr;
4480 }
4481 
4482 kern_return_t
copyoutmap_atomic32(vm_map_t map,uint32_t value,vm_map_address_t toaddr)4483 copyoutmap_atomic32(
4484 	vm_map_t                map,
4485 	uint32_t                value,
4486 	vm_map_address_t        toaddr)
4487 {
4488 	kern_return_t kr = KERN_SUCCESS;
4489 	vm_map_switch_context_t switch_ctx;
4490 
4491 	if (vm_map_pmap(map) == pmap_kernel()) {
4492 		/* assume a correct toaddr */
4493 		*(uint32_t *)toaddr = value;
4494 	} else if (current_map() == map) {
4495 		if (copyout_atomic32(value, toaddr) != 0) {
4496 			kr = KERN_INVALID_ADDRESS;
4497 		}
4498 	} else {
4499 		vm_map_reference(map);
4500 		current_thread_set_sec_override(true);
4501 		switch_ctx = vm_map_switch_to(map);
4502 		if (copyout_atomic32(value, toaddr) != 0) {
4503 			kr = KERN_INVALID_ADDRESS;
4504 		}
4505 		current_thread_set_sec_override(false);
4506 		vm_map_switch_back(switch_ctx);
4507 		vm_map_deallocate(map);
4508 	}
4509 	return kr;
4510 }
4511 
4512 kern_return_t
copyoutmap_atomic64(vm_map_t map,uint64_t value,vm_map_address_t toaddr)4513 copyoutmap_atomic64(
4514 	vm_map_t                map,
4515 	uint64_t                value,
4516 	vm_map_address_t        toaddr)
4517 {
4518 	kern_return_t kr = KERN_SUCCESS;
4519 	vm_map_switch_context_t switch_ctx;
4520 
4521 	if (vm_map_pmap(map) == pmap_kernel()) {
4522 		/* assume a correct toaddr */
4523 		*(uint64_t *)toaddr = value;
4524 	} else if (current_map() == map) {
4525 		if (copyout_atomic64(value, toaddr) != 0) {
4526 			kr = KERN_INVALID_ADDRESS;
4527 		}
4528 	} else {
4529 		vm_map_reference(map);
4530 		current_thread_set_sec_override(true);
4531 		switch_ctx = vm_map_switch_to(map);
4532 		if (copyout_atomic64(value, toaddr) != 0) {
4533 			kr = KERN_INVALID_ADDRESS;
4534 		}
4535 		current_thread_set_sec_override(false);
4536 		vm_map_switch_back(switch_ctx);
4537 		vm_map_deallocate(map);
4538 	}
4539 	return kr;
4540 }
4541 
4542 
4543 #pragma mark pointer obfuscation / packing
4544 
4545 /*
4546  *
4547  *	The following two functions are to be used when exposing kernel
4548  *	addresses to userspace via any of the various debug or info
4549  *	facilities that exist. These are basically the same as VM_KERNEL_ADDRPERM()
4550  *	and VM_KERNEL_UNSLIDE_OR_PERM() except they use a different random seed and
4551  *	are exported to KEXTs.
4552  *
4553  *	NOTE: USE THE MACRO VERSIONS OF THESE FUNCTIONS (in vm_param.h) FROM WITHIN THE KERNEL
4554  */
4555 
4556 vm_offset_t
vm_kernel_addrhash_internal(vm_offset_t addr,uint64_t salt)4557 vm_kernel_addrhash_internal(vm_offset_t addr, uint64_t salt)
4558 {
4559 	assert(salt != 0);
4560 
4561 	if (addr == 0) {
4562 		return 0ul;
4563 	}
4564 
4565 	if (VM_KERNEL_IS_SLID(addr)) {
4566 		return VM_KERNEL_UNSLIDE(addr);
4567 	}
4568 
4569 	addr = VM_KERNEL_STRIP_UPTR(addr);
4570 
4571 	vm_offset_t sha_digest[SHA256_DIGEST_LENGTH / sizeof(vm_offset_t)];
4572 	SHA256_CTX sha_ctx;
4573 
4574 	SHA256_Init(&sha_ctx);
4575 	SHA256_Update(&sha_ctx, &salt, sizeof(salt));
4576 	SHA256_Update(&sha_ctx, &addr, sizeof(addr));
4577 	SHA256_Final(sha_digest, &sha_ctx);
4578 
4579 	return sha_digest[0];
4580 }
4581 
4582 __exported vm_offset_t
4583 vm_kernel_addrhash_external(vm_offset_t addr);
4584 vm_offset_t
vm_kernel_addrhash_external(vm_offset_t addr)4585 vm_kernel_addrhash_external(vm_offset_t addr)
4586 {
4587 	return vm_kernel_addrhash_internal(addr, vm_kernel_addrhash_salt_ext);
4588 }
4589 
4590 void
vm_kernel_addrhide(vm_offset_t addr,vm_offset_t * hide_addr)4591 vm_kernel_addrhide(
4592 	vm_offset_t addr,
4593 	vm_offset_t *hide_addr)
4594 {
4595 	*hide_addr = VM_KERNEL_ADDRHIDE(addr);
4596 }
4597 
4598 void
vm_kernel_addrperm_external(vm_offset_t addr,vm_offset_t * perm_addr)4599 vm_kernel_addrperm_external(
4600 	vm_offset_t addr,
4601 	vm_offset_t *perm_addr)
4602 {
4603 	addr = VM_KERNEL_STRIP_UPTR(addr);
4604 
4605 	if (VM_KERNEL_IS_SLID(addr)) {
4606 		*perm_addr = VM_KERNEL_UNSLIDE(addr);
4607 	} else if (VM_KERNEL_ADDRESS(addr)) {
4608 		*perm_addr = ML_ADDRPERM(addr, vm_kernel_addrperm_ext);
4609 	} else {
4610 		*perm_addr = addr;
4611 	}
4612 }
4613 
4614 void
vm_kernel_unslide_or_perm_external(vm_offset_t addr,vm_offset_t * up_addr)4615 vm_kernel_unslide_or_perm_external(
4616 	vm_offset_t addr,
4617 	vm_offset_t *up_addr)
4618 {
4619 	vm_kernel_addrperm_external(addr, up_addr);
4620 }
4621 
4622 void
vm_packing_pointer_invalid(vm_offset_t ptr,vm_packing_params_t params)4623 vm_packing_pointer_invalid(vm_offset_t ptr, vm_packing_params_t params)
4624 {
4625 	if (ptr & ((1ul << params.vmpp_shift) - 1)) {
4626 		panic("pointer %p can't be packed: low %d bits aren't 0",
4627 		    (void *)ptr, params.vmpp_shift);
4628 	} else if (ptr <= params.vmpp_base) {
4629 		panic("pointer %p can't be packed: below base %p",
4630 		    (void *)ptr, (void *)params.vmpp_base);
4631 	} else {
4632 		panic("pointer %p can't be packed: maximum encodable pointer is %p",
4633 		    (void *)ptr, (void *)vm_packing_max_packable(params));
4634 	}
4635 }
4636 
4637 void
vm_packing_verify_range(const char * subsystem,vm_offset_t min_address,vm_offset_t max_address,vm_packing_params_t params)4638 vm_packing_verify_range(
4639 	const char *subsystem,
4640 	vm_offset_t min_address,
4641 	vm_offset_t max_address,
4642 	vm_packing_params_t params)
4643 {
4644 	if (min_address > max_address) {
4645 		panic("%s: %s range invalid min:%p > max:%p",
4646 		    __func__, subsystem, (void *)min_address, (void *)max_address);
4647 	}
4648 
4649 	if (!params.vmpp_base_relative) {
4650 		return;
4651 	}
4652 
4653 	if (min_address <= params.vmpp_base) {
4654 		panic("%s: %s range invalid min:%p <= base:%p",
4655 		    __func__, subsystem, (void *)min_address, (void *)params.vmpp_base);
4656 	}
4657 
4658 	if (max_address > vm_packing_max_packable(params)) {
4659 		panic("%s: %s range invalid max:%p >= max packable:%p",
4660 		    __func__, subsystem, (void *)max_address,
4661 		    (void *)vm_packing_max_packable(params));
4662 	}
4663 }
4664 
4665 #pragma mark tests
4666 #if MACH_ASSERT
4667 #include <sys/errno.h>
4668 
4669 static void
4670 kmem_test_for_entry(
4671 	vm_map_t                map,
4672 	vm_offset_t             addr,
4673 	void                  (^block)(vm_map_entry_t))
4674 {
4675 	vm_map_entry_t entry;
4676 
4677 	vm_map_lock(map);
4678 	block(vm_map_lookup_entry(map, addr, &entry) ? entry : NULL);
4679 	vm_map_unlock(map);
4680 }
4681 
4682 #define kmem_test_assert_map(map, pg, entries) ({ \
4683 	assert3u((map)->size, ==, ptoa(pg)); \
4684 	assert3u((map)->hdr.nentries, ==, entries); \
4685 })
4686 
4687 static bool
can_write_at(vm_offset_t offs,uint32_t page)4688 can_write_at(vm_offset_t offs, uint32_t page)
4689 {
4690 	static const int zero;
4691 
4692 	return verify_write(&zero, (void *)(offs + ptoa(page) + 128), 1) == 0;
4693 }
4694 #define assert_writeable(offs, page) \
4695 	assertf(can_write_at(offs, page), \
4696 	    "can write at %p + ptoa(%d)", (void *)offs, page)
4697 
4698 #define assert_faults(offs, page) \
4699 	assertf(!can_write_at(offs, page), \
4700 	    "can write at %p + ptoa(%d)", (void *)offs, page)
4701 
4702 #define peek(offs, page) \
4703 	(*(uint32_t *)((offs) + ptoa(page)))
4704 
4705 #define poke(offs, page, v) \
4706 	(*(uint32_t *)((offs) + ptoa(page)) = (v))
4707 
4708 #if CONFIG_SPTM
4709 __attribute__((noinline))
4710 static void
kmem_test_verify_type_policy(vm_offset_t addr,kmem_flags_t flags)4711 kmem_test_verify_type_policy(vm_offset_t addr, kmem_flags_t flags)
4712 {
4713 	extern bool use_xnu_restricted;
4714 	pmap_mapping_type_t expected_type = PMAP_MAPPING_TYPE_RESTRICTED;
4715 
4716 	/* Explicitly state the expected policy */
4717 	if (flags & (KMEM_DATA | KMEM_COMPRESSOR | KMEM_DATA_SHARED)) {
4718 		expected_type = PMAP_MAPPING_TYPE_DEFAULT;
4719 	}
4720 
4721 	/* If X_K_R is disabled, DEFAULT is the only possible mapping */
4722 	if (!use_xnu_restricted) {
4723 		expected_type = PMAP_MAPPING_TYPE_DEFAULT;
4724 	}
4725 
4726 	/* Verify if derived correctly */
4727 	assert3u(expected_type, ==, __kmem_mapping_type(flags));
4728 
4729 	pmap_paddr_t pa = kvtophys(addr);
4730 	if (pa == 0) {
4731 		return;
4732 	}
4733 
4734 	/* Verify if the mapped address actually got the expected type */
4735 	assert3u(expected_type, ==, sptm_get_frame_type(pa));
4736 }
4737 #endif /* CONFIG_SPTM */
4738 
4739 __attribute__((noinline))
4740 static void
kmem_alloc_basic_test(vm_map_t map)4741 kmem_alloc_basic_test(vm_map_t map)
4742 {
4743 	kmem_guard_t guard = {
4744 		.kmg_tag = VM_KERN_MEMORY_DIAG,
4745 	};
4746 	vm_offset_t addr;
4747 
4748 	/*
4749 	 * Test wired basics:
4750 	 * - KMA_KOBJECT
4751 	 * - KMA_GUARD_FIRST, KMA_GUARD_LAST
4752 	 * - allocation alignment
4753 	 */
4754 	addr = kmem_alloc_guard(map, ptoa(10), ptoa(2) - 1,
4755 	    KMA_KOBJECT | KMA_GUARD_FIRST | KMA_GUARD_LAST, guard).kmr_address;
4756 	assertf(addr != 0ull, "kma(%p, 10p, 0, KO | GF | GL)", map);
4757 	assert3u((addr + PAGE_SIZE) % ptoa(2), ==, 0);
4758 	kmem_test_assert_map(map, 10, 1);
4759 
4760 	kmem_test_for_entry(map, addr, ^(__assert_only vm_map_entry_t e){
4761 		assertf(e, "unable to find address %p in map %p", (void *)addr, map);
4762 		assert(e->vme_kernel_object);
4763 		assert(!e->vme_atomic);
4764 		assert3u(e->vme_start, <=, addr);
4765 		assert3u(addr + ptoa(10), <=, e->vme_end);
4766 	});
4767 
4768 	assert_faults(addr, 0);
4769 	for (int i = 1; i < 9; i++) {
4770 		assert_writeable(addr, i);
4771 	}
4772 	assert_faults(addr, 9);
4773 
4774 	kmem_free(map, addr, ptoa(10));
4775 	kmem_test_assert_map(map, 0, 0);
4776 
4777 	/*
4778 	 * Test pageable basics.
4779 	 */
4780 	addr = kmem_alloc_guard(map, ptoa(10), 0,
4781 	    KMA_PAGEABLE, guard).kmr_address;
4782 	assertf(addr != 0ull, "kma(%p, 10p, 0, KO | PG)", map);
4783 	kmem_test_assert_map(map, 10, 1);
4784 
4785 	for (int i = 0; i < 9; i++) {
4786 		assert_faults(addr, i);
4787 		poke(addr, i, 42);
4788 		assert_writeable(addr, i);
4789 	}
4790 
4791 	kmem_free_guard(map, addr, ptoa(10),
4792 	    KMF_GUARD_FIRST | KMF_GUARD_LAST, guard);
4793 	kmem_test_assert_map(map, 0, 0);
4794 }
4795 
4796 __attribute__((noinline))
4797 static void
kmem_realloc_basic_test(vm_map_t map,kmr_flags_t kind)4798 kmem_realloc_basic_test(vm_map_t map, kmr_flags_t kind)
4799 {
4800 	kmem_guard_t guard = {
4801 		.kmg_atomic  = !(kind & KMR_DATA),
4802 		.kmg_tag     = VM_KERN_MEMORY_DIAG,
4803 		.kmg_context = 0xefface,
4804 	};
4805 	vm_offset_t addr, newaddr;
4806 	const int N = 10;
4807 
4808 	/*
4809 	 *	This isn't something kmem_realloc_guard() _needs_ to do,
4810 	 *	we could conceive an implementation where it grows in place
4811 	 *	if there's space after it.
4812 	 *
4813 	 *	However, this is what the implementation does today.
4814 	 */
4815 	bool realloc_growth_changes_address = true;
4816 	bool GF = (kind & KMR_GUARD_FIRST);
4817 	bool GL = (kind & KMR_GUARD_LAST);
4818 
4819 	/*
4820 	 *	Initial N page allocation
4821 	 */
4822 	addr = kmem_alloc_guard(map, ptoa(N), 0,
4823 	    (kind & ~KMEM_FREEOLD) | KMA_ZERO, guard).kmr_address;
4824 	assert3u(addr, !=, 0);
4825 
4826 	kmem_test_assert_map(map, N, 1);
4827 	for (int pg = GF; pg < N - GL; pg++) {
4828 		poke(addr, pg, 42 + pg);
4829 	}
4830 	for (int pg = N - GL; pg < N; pg++) {
4831 		assert_faults(addr, pg);
4832 	}
4833 
4834 #if CONFIG_SPTM
4835 	kmem_test_verify_type_policy(addr, ANYF(kind));
4836 #endif /* CONFIG_SPTM */
4837 	/*
4838 	 *	Grow to N + 3 pages
4839 	 */
4840 	newaddr = kmem_realloc_guard(map, addr, ptoa(N), ptoa(N + 3),
4841 	    kind | KMR_ZERO, guard).kmr_address;
4842 	assert3u(newaddr, !=, 0);
4843 	if (realloc_growth_changes_address) {
4844 		assert3u(addr, !=, newaddr);
4845 	}
4846 	if ((kind & KMR_FREEOLD) || (addr == newaddr)) {
4847 		kmem_test_assert_map(map, N + 3, 1);
4848 	} else {
4849 		kmem_test_assert_map(map, 2 * N + 3, 2);
4850 	}
4851 	for (int pg = GF; pg < N - GL; pg++) {
4852 		assert3u(peek(newaddr, pg), ==, 42 + pg);
4853 	}
4854 	if ((kind & KMR_FREEOLD) == 0) {
4855 		for (int pg = GF; pg < N - GL; pg++) {
4856 			assert3u(peek(addr, pg), ==, 42 + pg);
4857 		}
4858 		/* check for tru-share */
4859 		poke(addr + 16, 0, 1234);
4860 		assert3u(peek(newaddr + 16, 0), ==, 1234);
4861 		kmem_free_guard(map, addr, ptoa(N),
4862 		    kind & (KMF_TAG | KMF_GUARD_FIRST | KMF_GUARD_LAST), guard);
4863 		kmem_test_assert_map(map, N + 3, 1);
4864 	}
4865 	if (addr != newaddr) {
4866 		for (int pg = GF; pg < N - GL; pg++) {
4867 			assert_faults(addr, pg);
4868 		}
4869 	}
4870 	for (int pg = N - GL; pg < N + 3 - GL; pg++) {
4871 		assert3u(peek(newaddr, pg), ==, 0);
4872 	}
4873 	for (int pg = N + 3 - GL; pg < N + 3; pg++) {
4874 		assert_faults(newaddr, pg);
4875 	}
4876 	addr = newaddr;
4877 
4878 
4879 	/*
4880 	 *	Shrink to N - 2 pages
4881 	 */
4882 	newaddr = kmem_realloc_guard(map, addr, ptoa(N + 3), ptoa(N - 2),
4883 	    kind | KMR_ZERO, guard).kmr_address;
4884 	assert3u(map->size, ==, ptoa(N - 2));
4885 	assert3u(newaddr, ==, addr);
4886 	kmem_test_assert_map(map, N - 2, 1);
4887 
4888 	for (int pg = GF; pg < N - 2 - GL; pg++) {
4889 		assert3u(peek(addr, pg), ==, 42 + pg);
4890 	}
4891 	for (int pg = N - 2 - GL; pg < N + 3; pg++) {
4892 		assert_faults(addr, pg);
4893 	}
4894 
4895 	kmem_free_guard(map, addr, ptoa(N - 2),
4896 	    kind & (KMF_TAG | KMF_GUARD_FIRST | KMF_GUARD_LAST), guard);
4897 	kmem_test_assert_map(map, 0, 0);
4898 }
4899 
4900 static int
kmem_basic_test(__unused int64_t in,int64_t * out)4901 kmem_basic_test(__unused int64_t in, int64_t *out)
4902 {
4903 	mach_vm_offset_t addr;
4904 	vm_map_t map;
4905 
4906 	printf("%s: test running\n", __func__);
4907 
4908 	map = kmem_suballoc(kernel_map, &addr, 64U << 20,
4909 	        VM_MAP_CREATE_DEFAULT, VM_FLAGS_ANYWHERE,
4910 	        KMS_NOFAIL | KMS_DATA, VM_KERN_MEMORY_DIAG).kmr_submap;
4911 
4912 	printf("%s: kmem_alloc ...\n", __func__);
4913 	kmem_alloc_basic_test(map);
4914 	printf("%s:     PASS\n", __func__);
4915 
4916 	printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD) ...\n", __func__);
4917 	kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD);
4918 	printf("%s:     PASS\n", __func__);
4919 
4920 	printf("%s: kmem_realloc (KMR_FREEOLD) ...\n", __func__);
4921 	kmem_realloc_basic_test(map, KMR_FREEOLD);
4922 	printf("%s:     PASS\n", __func__);
4923 
4924 	printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST) ...\n", __func__);
4925 	kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST);
4926 	printf("%s:     PASS\n", __func__);
4927 
4928 	printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_LAST) ...\n", __func__);
4929 	kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_LAST);
4930 	printf("%s:     PASS\n", __func__);
4931 
4932 	printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST) ...\n", __func__);
4933 	kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST);
4934 	printf("%s:     PASS\n", __func__);
4935 
4936 	printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_FIRST) ...\n", __func__);
4937 	kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_FIRST);
4938 	printf("%s:     PASS\n", __func__);
4939 
4940 	printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_LAST) ...\n", __func__);
4941 	kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_LAST);
4942 	printf("%s:     PASS\n", __func__);
4943 
4944 	printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST) ...\n", __func__);
4945 	kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST);
4946 	printf("%s:     PASS\n", __func__);
4947 
4948 
4949 	/* using KMR_DATA signals to test the non atomic realloc path */
4950 	printf("%s: kmem_realloc (KMR_DATA | KMR_FREEOLD) ...\n", __func__);
4951 	kmem_realloc_basic_test(map, KMR_DATA | KMR_FREEOLD);
4952 	printf("%s:     PASS\n", __func__);
4953 
4954 	printf("%s: kmem_realloc (KMR_DATA) ...\n", __func__);
4955 	kmem_realloc_basic_test(map, KMR_DATA);
4956 	printf("%s:     PASS\n", __func__);
4957 
4958 	/* test KMR_SHARED_DATA for the new shared kheap */
4959 	printf("%s: kmem_realloc (KMR_DATA_SHARED) ...\n", __func__);
4960 	kmem_realloc_basic_test(map, KMR_DATA_SHARED);
4961 	printf("%s:     PASS\n", __func__);
4962 
4963 	kmem_free_guard(kernel_map, addr, 64U << 20, KMF_NONE, KMEM_GUARD_SUBMAP);
4964 	vm_map_deallocate(map);
4965 
4966 	printf("%s: test passed\n", __func__);
4967 	*out = 1;
4968 	return 0;
4969 }
4970 SYSCTL_TEST_REGISTER(kmem_basic, kmem_basic_test);
4971 
4972 static void
kmem_test_get_size_idx_for_chunks(uint32_t chunks)4973 kmem_test_get_size_idx_for_chunks(uint32_t chunks)
4974 {
4975 	__assert_only uint32_t idx = kmem_get_size_idx_for_chunks(chunks);
4976 
4977 	assert(chunks >= kmem_size_array[idx].ks_num_chunk);
4978 }
4979 
4980 __attribute__((noinline))
4981 static void
kmem_test_get_size_idx_for_all_chunks()4982 kmem_test_get_size_idx_for_all_chunks()
4983 {
4984 	for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) {
4985 		uint32_t chunks = kmem_size_array[i].ks_num_chunk;
4986 
4987 		if (chunks != 1) {
4988 			kmem_test_get_size_idx_for_chunks(chunks - 1);
4989 		}
4990 		kmem_test_get_size_idx_for_chunks(chunks);
4991 		kmem_test_get_size_idx_for_chunks(chunks + 1);
4992 	}
4993 }
4994 
4995 static int
kmem_guard_obj_test(__unused int64_t in,int64_t * out)4996 kmem_guard_obj_test(__unused int64_t in, int64_t *out)
4997 {
4998 	printf("%s: test running\n", __func__);
4999 
5000 	printf("%s: kmem_get_size_idx_for_chunks\n", __func__);
5001 	kmem_test_get_size_idx_for_all_chunks();
5002 	printf("%s:     PASS\n", __func__);
5003 
5004 	printf("%s: test passed\n", __func__);
5005 	*out = 1;
5006 	return 0;
5007 }
5008 SYSCTL_TEST_REGISTER(kmem_guard_obj, kmem_guard_obj_test);
5009 
5010 
5011 #endif /* MACH_ASSERT */
5012