xref: /xnu-12377.1.9/osfmk/vm/vm_kern.c (revision f6217f891ac0bb64f3d375211650a4c1ff8ca1ea)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 /*
59  *	File:	vm/vm_kern.c
60  *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
61  *	Date:	1985
62  *
63  *	Kernel memory management.
64  */
65 
66 #include <mach/kern_return.h>
67 #include <mach/vm_param.h>
68 #include <kern/assert.h>
69 #include <kern/thread.h>
70 #include <vm/vm_kern_internal.h>
71 #include <vm/vm_map_internal.h>
72 #include <vm/vm_object_internal.h>
73 #include <vm/vm_page_internal.h>
74 #include <vm/vm_compressor_xnu.h>
75 #include <vm/vm_pageout_xnu.h>
76 #include <vm/vm_init_xnu.h>
77 #include <vm/vm_fault.h>
78 #include <vm/vm_memtag.h>
79 #include <vm/vm_far.h>
80 #include <kern/misc_protos.h>
81 #include <vm/cpm_internal.h>
82 #include <kern/ledger.h>
83 #include <kern/bits.h>
84 #include <kern/startup.h>
85 #include <kern/telemetry.h>
86 
87 #include <string.h>
88 
89 #include <libkern/OSDebug.h>
90 #include <libkern/crypto/sha2.h>
91 #include <libkern/section_keywords.h>
92 #include <sys/kdebug.h>
93 #include <sys/kdebug_triage.h>
94 
95 #include <san/kasan.h>
96 #include <kern/kext_alloc.h>
97 #include <kern/backtrace.h>
98 #include <os/hash.h>
99 #include <kern/zalloc_internal.h>
100 #include <libkern/crypto/rand.h>
101 
102 /*
103  *	Variables exported by this module.
104  */
105 
106 SECURITY_READ_ONLY_LATE(vm_map_t) kernel_map;
107 SECURITY_READ_ONLY_LATE(struct mach_vm_range) kmem_ranges[KMEM_RANGE_COUNT];
108 SECURITY_READ_ONLY_LATE(struct mach_vm_range) kmem_large_ranges[KMEM_RANGE_COUNT];
109 
110 static TUNABLE(uint32_t, kmem_ptr_ranges, "kmem_ptr_ranges",
111     KMEM_RANGE_ID_NUM_PTR);
112 #define KMEM_GOBJ_THRESHOLD   (32ULL << 20)
113 #if DEBUG || DEVELOPMENT
114 #define KMEM_OUTLIER_LOG_SIZE (16ULL << 10)
115 #define KMEM_OUTLIER_SIZE      0
116 #define KMEM_OUTLIER_ALIGN     1
117 btlog_t kmem_outlier_log;
118 #endif /* DEBUG || DEVELOPMENT */
119 
120 __startup_data static vm_map_size_t data_range_size;
121 __startup_data static vm_map_size_t shared_data_range_size;
122 __startup_data static vm_map_size_t ptr_range_size;
123 __startup_data static vm_map_size_t sprayqtn_range_size;
124 
125 #pragma mark helpers
126 
127 __attribute__((overloadable))
128 __header_always_inline kmem_flags_t
ANYF(kma_flags_t flags)129 ANYF(kma_flags_t flags)
130 {
131 	return (kmem_flags_t)flags;
132 }
133 
134 __attribute__((overloadable))
135 __header_always_inline kmem_flags_t
ANYF(kmr_flags_t flags)136 ANYF(kmr_flags_t flags)
137 {
138 	return (kmem_flags_t)flags;
139 }
140 
141 __attribute__((overloadable))
142 __header_always_inline kmem_flags_t
ANYF(kmf_flags_t flags)143 ANYF(kmf_flags_t flags)
144 {
145 	return (kmem_flags_t)flags;
146 }
147 
148 __abortlike
149 static void
__kmem_invalid_size_panic(vm_map_t map,vm_size_t size,uint32_t flags)150 __kmem_invalid_size_panic(
151 	vm_map_t        map,
152 	vm_size_t       size,
153 	uint32_t        flags)
154 {
155 	panic("kmem(map=%p, flags=0x%x): invalid size %zd",
156 	    map, flags, (size_t)size);
157 }
158 
159 __abortlike
160 static void
__kmem_invalid_arguments_panic(const char * what,vm_map_t map,vm_address_t address,vm_size_t size,uint32_t flags)161 __kmem_invalid_arguments_panic(
162 	const char     *what,
163 	vm_map_t        map,
164 	vm_address_t    address,
165 	vm_size_t       size,
166 	uint32_t        flags)
167 {
168 	panic("kmem_%s(map=%p, addr=%p, size=%zd, flags=0x%x): "
169 	    "invalid arguments passed",
170 	    what, map, (void *)address, (size_t)size, flags);
171 }
172 
173 __abortlike
174 static void
__kmem_failed_panic(vm_map_t map,vm_size_t size,uint32_t flags,kern_return_t kr,const char * what)175 __kmem_failed_panic(
176 	vm_map_t        map,
177 	vm_size_t       size,
178 	uint32_t        flags,
179 	kern_return_t   kr,
180 	const char     *what)
181 {
182 	panic("kmem_%s(%p, %zd, 0x%x): failed with %d",
183 	    what, map, (size_t)size, flags, kr);
184 }
185 
186 __abortlike
187 static void
__kmem_entry_not_found_panic(vm_map_t map,vm_offset_t addr)188 __kmem_entry_not_found_panic(
189 	vm_map_t        map,
190 	vm_offset_t     addr)
191 {
192 	panic("kmem(map=%p) no entry found at %p", map, (void *)addr);
193 }
194 
195 static inline vm_object_t
__kmem_object(kmem_flags_t flags)196 __kmem_object(kmem_flags_t flags)
197 {
198 	if (flags & KMEM_COMPRESSOR) {
199 		if (flags & KMEM_KOBJECT) {
200 			panic("both KMEM_KOBJECT and KMEM_COMPRESSOR specified");
201 		}
202 		return compressor_object;
203 	}
204 	if (!(flags & KMEM_KOBJECT)) {
205 		panic("KMEM_KOBJECT or KMEM_COMPRESSOR is required");
206 	}
207 	return kernel_object_default;
208 }
209 
210 static inline pmap_mapping_type_t
__kmem_mapping_type(kmem_flags_t flags)211 __kmem_mapping_type(kmem_flags_t flags)
212 {
213 	if (flags & (KMEM_COMPRESSOR | KMEM_DATA_SHARED)) {
214 		return PMAP_MAPPING_TYPE_DEFAULT;
215 	} else if (flags & KMEM_DATA) {
216 		return kalloc_is_restricted_data_mode_enforced() ?
217 		       PMAP_MAPPING_TYPE_RESTRICTED : PMAP_MAPPING_TYPE_DEFAULT;
218 	} else {
219 		return PMAP_MAPPING_TYPE_RESTRICTED;
220 	}
221 }
222 
223 static inline vm_size_t
__kmem_guard_left(kmem_flags_t flags)224 __kmem_guard_left(kmem_flags_t flags)
225 {
226 	return (flags & KMEM_GUARD_FIRST) ? PAGE_SIZE : 0;
227 }
228 
229 static inline vm_size_t
__kmem_guard_right(kmem_flags_t flags)230 __kmem_guard_right(kmem_flags_t flags)
231 {
232 	return (flags & KMEM_GUARD_LAST) ? PAGE_SIZE : 0;
233 }
234 
235 static inline vm_size_t
__kmem_guard_size(kmem_flags_t flags)236 __kmem_guard_size(kmem_flags_t flags)
237 {
238 	return __kmem_guard_left(flags) + __kmem_guard_right(flags);
239 }
240 
241 __pure2
242 static inline vm_size_t
__kmem_entry_orig_size(vm_map_entry_t entry)243 __kmem_entry_orig_size(vm_map_entry_t entry)
244 {
245 	vm_object_t object = VME_OBJECT(entry);
246 
247 	if (entry->vme_kernel_object) {
248 		return entry->vme_end - entry->vme_start -
249 		       entry->vme_object_or_delta;
250 	} else {
251 		return object->vo_size - object->vo_size_delta;
252 	}
253 }
254 
255 
256 #pragma mark kmem range methods
257 
258 #define mach_vm_range_load(r, rmin, rmax) \
259 	({ (rmin) = (r)->min_address; (rmax) = (r)->max_address; })
260 
261 __abortlike
262 static void
__mach_vm_range_overflow(mach_vm_offset_t addr,mach_vm_offset_t size)263 __mach_vm_range_overflow(
264 	mach_vm_offset_t        addr,
265 	mach_vm_offset_t        size)
266 {
267 	panic("invalid vm range: [0x%llx, 0x%llx + 0x%llx) wraps around",
268 	    addr, addr, size);
269 }
270 
271 __abortlike
272 static void
__mach_vm_range_invalid(mach_vm_offset_t min_address,mach_vm_offset_t max_address)273 __mach_vm_range_invalid(
274 	mach_vm_offset_t        min_address,
275 	mach_vm_offset_t        max_address)
276 {
277 	panic("invalid vm range: [0x%llx, 0x%llx) wraps around",
278 	    min_address, max_address);
279 }
280 
281 __header_always_inline mach_vm_size_t
mach_vm_range_size(const struct mach_vm_range * r)282 mach_vm_range_size(const struct mach_vm_range *r)
283 {
284 	mach_vm_offset_t rmin, rmax;
285 
286 	mach_vm_range_load(r, rmin, rmax);
287 	return rmax - rmin;
288 }
289 
290 __attribute__((overloadable))
291 __header_always_inline bool
mach_vm_range_contains(const struct mach_vm_range * r,mach_vm_offset_t addr)292 mach_vm_range_contains(const struct mach_vm_range *r, mach_vm_offset_t addr)
293 {
294 	mach_vm_offset_t rmin, rmax;
295 	/*
296 	 * The `&` is not a typo: we really expect the check to pass,
297 	 * so encourage the compiler to eagerly load and test without branches
298 	 */
299 	mach_vm_range_load(r, rmin, rmax);
300 	return (addr >= rmin) & (addr < rmax);
301 }
302 
303 __attribute__((overloadable))
304 __header_always_inline bool
mach_vm_range_contains(const struct mach_vm_range * r,mach_vm_offset_t addr,mach_vm_offset_t size)305 mach_vm_range_contains(
306 	const struct mach_vm_range *r,
307 	mach_vm_offset_t        addr,
308 	mach_vm_offset_t        size)
309 {
310 	mach_vm_offset_t rmin, rmax;
311 	mach_vm_offset_t end;
312 
313 	if (__improbable(os_add_overflow(addr, size, &end))) {
314 		return false;
315 	}
316 
317 	/*
318 	 *	 The `&` is not a typo: we really expect the check to pass,
319 	 *   so encourage the compiler to eagerly load and test without branches
320 	 */
321 	mach_vm_range_load(r, rmin, rmax);
322 	return (addr >= rmin) & (end >= rmin) & (end <= rmax);
323 }
324 
325 __attribute__((overloadable))
326 __header_always_inline bool
mach_vm_range_intersects(const struct mach_vm_range * r1,const struct mach_vm_range * r2)327 mach_vm_range_intersects(
328 	const struct mach_vm_range *r1,
329 	const struct mach_vm_range *r2)
330 {
331 	mach_vm_offset_t r1_min, r1_max;
332 	mach_vm_offset_t r2_min, r2_max;
333 
334 	mach_vm_range_load(r1, r1_min, r1_max);
335 	r2_min = r2->min_address;
336 	r2_max = r2->max_address;
337 
338 	if (r1_min > r1_max) {
339 		__mach_vm_range_invalid(r1_min, r1_max);
340 	}
341 
342 	if (r2_min > r2_max) {
343 		__mach_vm_range_invalid(r2_min, r2_max);
344 	}
345 
346 	return r1_max > r2_min && r1_min < r2_max;
347 }
348 
349 __attribute__((overloadable))
350 __header_always_inline bool
mach_vm_range_intersects(const struct mach_vm_range * r1,mach_vm_offset_t addr,mach_vm_offset_t size)351 mach_vm_range_intersects(
352 	const struct mach_vm_range *r1,
353 	mach_vm_offset_t        addr,
354 	mach_vm_offset_t        size)
355 {
356 	struct mach_vm_range r2;
357 
358 	r2.min_address = addr;
359 	if (os_add_overflow(addr, size, &r2.max_address)) {
360 		__mach_vm_range_overflow(addr, size);
361 	}
362 
363 	return mach_vm_range_intersects(r1, &r2);
364 }
365 
366 bool
kmem_range_id_contains(kmem_range_id_t range_id,vm_map_offset_t addr,vm_map_size_t size)367 kmem_range_id_contains(
368 	kmem_range_id_t         range_id,
369 	vm_map_offset_t         addr,
370 	vm_map_size_t           size)
371 {
372 	return mach_vm_range_contains(&kmem_ranges[range_id], vm_memtag_canonicalize_kernel(addr), size);
373 }
374 
375 __abortlike
376 static void
kmem_range_invalid_panic(kmem_range_id_t range_id,vm_map_offset_t addr,vm_map_size_t size)377 kmem_range_invalid_panic(
378 	kmem_range_id_t         range_id,
379 	vm_map_offset_t         addr,
380 	vm_map_size_t           size)
381 {
382 	const struct mach_vm_range *r = &kmem_ranges[range_id];
383 	mach_vm_offset_t rmin, rmax;
384 
385 	mach_vm_range_load(r, rmin, rmax);
386 	if (addr + size < rmin) {
387 		panic("addr %p + size %llu overflows %p", (void *)addr, size,
388 		    (void *)(addr + size));
389 	}
390 	panic("addr %p + size %llu doesnt fit in one range (id: %u min: %p max: %p)",
391 	    (void *)addr, size, range_id, (void *)rmin, (void *)rmax);
392 }
393 
394 /*
395  * Return whether the entire allocation is contained in the given range
396  */
397 static bool
kmem_range_contains_fully(kmem_range_id_t range_id,vm_map_offset_t addr,vm_map_size_t size)398 kmem_range_contains_fully(
399 	kmem_range_id_t         range_id,
400 	vm_map_offset_t         addr,
401 	vm_map_size_t           size)
402 {
403 	const struct mach_vm_range *r = &kmem_ranges[range_id];
404 	mach_vm_offset_t rmin, rmax;
405 	bool result = false;
406 
407 	if (VM_KERNEL_ADDRESS(addr)) {
408 		addr = vm_memtag_canonicalize_kernel(addr);
409 	}
410 
411 	/*
412 	 * The `&` is not a typo: we really expect the check to pass,
413 	 * so encourage the compiler to eagerly load and test without branches
414 	 */
415 	mach_vm_range_load(r, rmin, rmax);
416 	result = (addr >= rmin) & (addr < rmax);
417 	if (__improbable(result
418 	    && ((addr + size < rmin) || (addr + size > rmax)))) {
419 		kmem_range_invalid_panic(range_id, addr, size);
420 	}
421 	return result;
422 }
423 
424 vm_map_size_t
kmem_range_id_size(kmem_range_id_t range_id)425 kmem_range_id_size(kmem_range_id_t range_id)
426 {
427 	return mach_vm_range_size(&kmem_ranges[range_id]);
428 }
429 
430 kmem_range_id_t
kmem_addr_get_range(vm_map_offset_t addr,vm_map_size_t size)431 kmem_addr_get_range(vm_map_offset_t addr, vm_map_size_t size)
432 {
433 	kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST;
434 
435 	for (; range_id < KMEM_RANGE_COUNT; range_id++) {
436 		if (kmem_range_contains_fully(range_id, addr, size)) {
437 			return range_id;
438 		}
439 	}
440 	return KMEM_RANGE_ID_NONE;
441 }
442 
443 bool
kmem_is_ptr_range(vm_map_range_id_t range_id)444 kmem_is_ptr_range(vm_map_range_id_t range_id)
445 {
446 	return (range_id >= KMEM_RANGE_ID_FIRST) &&
447 	       (range_id <= KMEM_RANGE_ID_NUM_PTR);
448 }
449 
450 __abortlike
451 static void
kmem_range_invalid_for_overwrite(vm_map_offset_t addr)452 kmem_range_invalid_for_overwrite(vm_map_offset_t addr)
453 {
454 	panic("Can't overwrite mappings (addr: %p) in kmem ptr ranges",
455 	    (void *)addr);
456 }
457 
458 mach_vm_range_t
kmem_validate_range_for_overwrite(vm_map_offset_t addr,vm_map_size_t size)459 kmem_validate_range_for_overwrite(
460 	vm_map_offset_t         addr,
461 	vm_map_size_t           size)
462 {
463 	vm_map_range_id_t range_id = kmem_addr_get_range(addr, size);
464 
465 	if (kmem_is_ptr_range(range_id)) {
466 		kmem_range_invalid_for_overwrite(addr);
467 	}
468 
469 	return &kmem_ranges[range_id];
470 }
471 
472 
473 #pragma mark entry parameters
474 
475 
476 __abortlike
477 static void
__kmem_entry_validate_panic(vm_map_t map,vm_map_entry_t entry,vm_offset_t addr,vm_size_t size,uint32_t flags,kmem_guard_t guard)478 __kmem_entry_validate_panic(
479 	vm_map_t        map,
480 	vm_map_entry_t  entry,
481 	vm_offset_t     addr,
482 	vm_size_t       size,
483 	uint32_t        flags,
484 	kmem_guard_t    guard)
485 {
486 	const char *what = "???";
487 
488 	if (entry->vme_atomic != guard.kmg_atomic) {
489 		what = "atomicity";
490 	} else if (entry->is_sub_map != guard.kmg_submap) {
491 		what = "objectness";
492 	} else if (addr != entry->vme_start) {
493 		what = "left bound";
494 	} else if ((flags & KMF_GUESS_SIZE) == 0 && addr + size != entry->vme_end) {
495 		what = "right bound";
496 	} else if (guard.kmg_context != entry->vme_context) {
497 		what = "guard";
498 	}
499 
500 	panic("kmem(map=%p, addr=%p, size=%zd, flags=0x%x): "
501 	    "entry:%p %s mismatch guard(0x%08x)",
502 	    map, (void *)addr, size, flags, entry,
503 	    what, guard.kmg_context);
504 }
505 
506 static bool
__kmem_entry_validate_guard(vm_map_entry_t entry,vm_offset_t addr,vm_size_t size,kmem_flags_t flags,kmem_guard_t guard)507 __kmem_entry_validate_guard(
508 	vm_map_entry_t  entry,
509 	vm_offset_t     addr,
510 	vm_size_t       size,
511 	kmem_flags_t    flags,
512 	kmem_guard_t    guard)
513 {
514 	if (entry->vme_atomic != guard.kmg_atomic) {
515 		return false;
516 	}
517 
518 	if (!guard.kmg_atomic) {
519 		return true;
520 	}
521 
522 	if (entry->is_sub_map != guard.kmg_submap) {
523 		return false;
524 	}
525 
526 	if (addr != entry->vme_start) {
527 		return false;
528 	}
529 
530 	if ((flags & KMEM_GUESS_SIZE) == 0 && addr + size != entry->vme_end) {
531 		return false;
532 	}
533 
534 	if (!guard.kmg_submap && guard.kmg_context != entry->vme_context) {
535 		return false;
536 	}
537 
538 	return true;
539 }
540 
541 void
kmem_entry_validate_guard(vm_map_t map,vm_map_entry_t entry,vm_offset_t addr,vm_size_t size,kmem_guard_t guard)542 kmem_entry_validate_guard(
543 	vm_map_t        map,
544 	vm_map_entry_t  entry,
545 	vm_offset_t     addr,
546 	vm_size_t       size,
547 	kmem_guard_t    guard)
548 {
549 	if (!__kmem_entry_validate_guard(entry, addr, size, KMEM_NONE, guard)) {
550 		__kmem_entry_validate_panic(map, entry, addr, size, KMEM_NONE, guard);
551 	}
552 }
553 
554 __abortlike
555 static void
__kmem_entry_validate_object_panic(vm_map_t map,vm_map_entry_t entry,kmem_flags_t flags)556 __kmem_entry_validate_object_panic(
557 	vm_map_t        map,
558 	vm_map_entry_t  entry,
559 	kmem_flags_t    flags)
560 {
561 	const char *what;
562 	const char *verb;
563 
564 	if (entry->is_sub_map) {
565 		panic("kmem(map=%p) entry %p is a submap", map, entry);
566 	}
567 
568 	if (flags & KMEM_KOBJECT) {
569 		what = "kernel";
570 		verb = "isn't";
571 	} else if (flags & KMEM_COMPRESSOR) {
572 		what = "compressor";
573 		verb = "isn't";
574 	} else if (entry->vme_kernel_object) {
575 		what = "kernel";
576 		verb = "is unexpectedly";
577 	} else {
578 		what = "compressor";
579 		verb = "is unexpectedly";
580 	}
581 
582 	panic("kmem(map=%p, flags=0x%x): entry %p %s for the %s object",
583 	    map, flags, entry, verb, what);
584 }
585 
586 static bool
__kmem_entry_validate_object(vm_map_entry_t entry,kmem_flags_t flags)587 __kmem_entry_validate_object(
588 	vm_map_entry_t  entry,
589 	kmem_flags_t    flags)
590 {
591 	if (entry->is_sub_map) {
592 		return false;
593 	}
594 	if ((bool)(flags & KMEM_KOBJECT) != entry->vme_kernel_object) {
595 		return false;
596 	}
597 
598 	return (bool)(flags & KMEM_COMPRESSOR) ==
599 	       (VME_OBJECT(entry) == compressor_object);
600 }
601 
602 vm_size_t
kmem_size_guard(vm_map_t map,vm_offset_t addr,kmem_guard_t guard)603 kmem_size_guard(
604 	vm_map_t        map,
605 	vm_offset_t     addr,
606 	kmem_guard_t    guard)
607 {
608 	kmem_flags_t flags = KMEM_GUESS_SIZE;
609 	vm_map_entry_t entry;
610 	vm_size_t size;
611 
612 	vmlp_api_start(KMEM_SIZE_GUARD);
613 
614 	vm_map_lock_read(map);
615 
616 #if KASAN_CLASSIC
617 	addr -= PAGE_SIZE;
618 #endif /* KASAN_CLASSIC */
619 	addr = vm_memtag_canonicalize_kernel(addr);
620 
621 	if (!vm_map_lookup_entry(map, addr, &entry)) {
622 		__kmem_entry_not_found_panic(map, addr);
623 	}
624 
625 	vmlp_range_event_entry(map, entry);
626 
627 	if (!__kmem_entry_validate_guard(entry, addr, 0, flags, guard)) {
628 		__kmem_entry_validate_panic(map, entry, addr, 0, flags, guard);
629 	}
630 
631 	size = __kmem_entry_orig_size(entry);
632 
633 	vm_map_unlock_read(map);
634 
635 	vmlp_api_end(KMEM_SIZE_GUARD, 0);
636 	return size;
637 }
638 
639 static inline uint16_t
kmem_hash_backtrace(void * fp)640 kmem_hash_backtrace(
641 	void                     *fp)
642 {
643 	uint64_t  bt_count;
644 	uintptr_t bt[8] = {};
645 
646 	struct backtrace_control ctl = {
647 		.btc_frame_addr = (uintptr_t)fp,
648 	};
649 
650 	bt_count = backtrace(bt, sizeof(bt) / sizeof(bt[0]), &ctl, NULL);
651 	return (uint16_t) os_hash_jenkins(bt, bt_count * sizeof(bt[0]));
652 }
653 
654 static_assert(KMEM_RANGE_ID_DATA_SHARED - 1 <= KMEM_RANGE_MASK,
655     "Insufficient bits to represent ptr ranges");
656 
657 kmem_range_id_t
kmem_adjust_range_id(uint32_t hash)658 kmem_adjust_range_id(
659 	uint32_t                  hash)
660 {
661 	return (kmem_range_id_t) (KMEM_RANGE_ID_PTR_0 +
662 	       (hash & KMEM_RANGE_MASK) % kmem_ptr_ranges);
663 }
664 
665 static bool
kmem_use_sprayqtn(kma_flags_t kma_flags,vm_map_size_t map_size,vm_offset_t mask)666 kmem_use_sprayqtn(
667 	kma_flags_t               kma_flags,
668 	vm_map_size_t             map_size,
669 	vm_offset_t               mask)
670 {
671 	/*
672 	 * Pointer allocations that are above the guard objects threshold or have
673 	 * leading guard pages with non standard alignment requests are redirected
674 	 * to the sprayqtn range.
675 	 */
676 #if DEBUG || DEVELOPMENT
677 	btref_get_flags_t flags = (kma_flags & KMA_NOPAGEWAIT) ?
678 	    BTREF_GET_NOWAIT : 0;
679 
680 	if ((kma_flags & KMA_SPRAYQTN) == 0) {
681 		if (map_size > KMEM_GOBJ_THRESHOLD) {
682 			btlog_record(kmem_outlier_log, (void *)map_size, KMEM_OUTLIER_SIZE,
683 			    btref_get(__builtin_frame_address(0), flags));
684 		} else if ((kma_flags & KMA_GUARD_FIRST) && (mask > PAGE_MASK)) {
685 			btlog_record(kmem_outlier_log, (void *)mask, KMEM_OUTLIER_ALIGN,
686 			    btref_get(__builtin_frame_address(0), flags));
687 		}
688 	}
689 #endif /* DEBUG || DEVELOPMENT */
690 
691 	return (kma_flags & KMA_SPRAYQTN) ||
692 	       (map_size > KMEM_GOBJ_THRESHOLD) ||
693 	       ((kma_flags & KMA_GUARD_FIRST) && (mask > PAGE_MASK));
694 }
695 
696 static void
kmem_apply_security_policy(vm_map_t map,kma_flags_t kma_flags,kmem_guard_t guard,vm_map_size_t map_size,vm_offset_t mask,vm_map_kernel_flags_t * vmk_flags,bool assert_dir __unused)697 kmem_apply_security_policy(
698 	vm_map_t                  map,
699 	kma_flags_t               kma_flags,
700 	kmem_guard_t              guard,
701 	vm_map_size_t             map_size,
702 	vm_offset_t               mask,
703 	vm_map_kernel_flags_t    *vmk_flags,
704 	bool                      assert_dir __unused)
705 {
706 	kmem_range_id_t range_id;
707 	bool from_right;
708 	uint16_t type_hash = guard.kmg_type_hash;
709 
710 	if (startup_phase < STARTUP_SUB_KMEM || map != kernel_map) {
711 		return;
712 	}
713 
714 	/*
715 	 * A non-zero type-hash must be passed by krealloc_type
716 	 */
717 #if (DEBUG || DEVELOPMENT)
718 	if (assert_dir && !(kma_flags & (KMA_DATA | KMA_DATA_SHARED))) {
719 		assert(type_hash != 0);
720 	}
721 #endif
722 
723 	if (kma_flags & (KMA_DATA | KMA_DATA_SHARED)) {
724 		/*
725 		 * Choose the specific which data range.
726 		 */
727 		if (kma_flags & KMA_DATA) {
728 			range_id  = KMEM_RANGE_ID_DATA;
729 		} else {
730 			range_id  = kmem_needs_data_share_range() ?
731 			    KMEM_RANGE_ID_DATA_SHARED : KMEM_RANGE_ID_DATA;
732 		}
733 
734 		/*
735 		 * As an optimization in KMA_DATA to avoid fragmentation,
736 		 * allocate static carveouts at the end of the DATA range.
737 		 */
738 		from_right = (bool)(kma_flags & KMA_PERMANENT);
739 	} else if (kmem_use_sprayqtn(kma_flags, map_size, mask)) {
740 		range_id = KMEM_RANGE_ID_SPRAYQTN;
741 		from_right = (bool)(kma_flags & KMA_PERMANENT);
742 	} else if (type_hash) {
743 		range_id  = (kmem_range_id_t)(type_hash & KMEM_RANGE_MASK);
744 		from_right = type_hash & KMEM_DIRECTION_MASK;
745 	} else {
746 		/*
747 		 * Range id needs to correspond to one of the PTR ranges
748 		 */
749 		type_hash = (uint16_t) kmem_hash_backtrace(__builtin_frame_address(0));
750 		range_id  = kmem_adjust_range_id(type_hash);
751 		from_right = type_hash & KMEM_DIRECTION_MASK;
752 	}
753 
754 	vmk_flags->vmkf_range_id = range_id;
755 	vmk_flags->vmkf_last_free = from_right;
756 }
757 
758 #pragma mark allocation
759 
760 static kmem_return_t
761 kmem_alloc_guard_internal(
762 	vm_map_t                map,
763 	vm_size_t               size,
764 	vm_offset_t             mask,
765 	kma_flags_t             flags,
766 	kmem_guard_t            guard,
767 	kern_return_t         (^alloc_pages)(vm_size_t, kma_flags_t, vm_page_t *))
768 {
769 	vm_object_t             object;
770 	vm_offset_t             delta = 0;
771 	vm_map_entry_t          entry = NULL;
772 	vm_map_offset_t         map_addr, fill_start;
773 	vm_map_size_t           map_size, fill_size;
774 	vm_page_t               guard_left = VM_PAGE_NULL;
775 	vm_page_t               guard_right = VM_PAGE_NULL;
776 	vm_page_t               wired_page_list = VM_PAGE_NULL;
777 	vm_map_kernel_flags_t   vmk_flags = VM_MAP_KERNEL_FLAGS_ANYWHERE();
778 	bool                    skip_guards;
779 	kmem_return_t           kmr = { };
780 
781 	vmlp_api_start(KMEM_ALLOC_GUARD_INTERNAL);
782 
783 	assert(kernel_map && map->pmap == kernel_pmap);
784 
785 	/* DATA and DATA_SHARED are mutually exclusive */
786 	assert((flags & (KMA_DATA | KMA_DATA_SHARED)) != (KMA_DATA | KMA_DATA_SHARED));
787 
788 #if defined(__arm64__)
789 	/*
790 	 * Pageable allocations should be marked as shared.
791 	 *
792 	 * Only assert this on arm64 architectures, since we do not
793 	 * adopt the shared heap on older ones.
794 	 */
795 	assert((flags & (KMA_PAGEABLE | KMA_DATA)) != (KMA_PAGEABLE | KMA_DATA));
796 #endif /* defined(__arm64__) */
797 
798 #if DEBUG || DEVELOPMENT
799 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_START,
800 	    size, 0, 0, 0);
801 #endif
802 
803 
804 	if (size == 0 ||
805 	    (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) ||
806 	    (size < __kmem_guard_size(ANYF(flags)))) {
807 		__kmem_invalid_size_panic(map, size, flags);
808 	}
809 
810 	/*
811 	 * limit the size of a single extent of wired memory
812 	 * to try and limit the damage to the system if
813 	 * too many pages get wired down
814 	 * limit raised to 2GB with 128GB max physical limit,
815 	 * but scaled by installed memory above this
816 	 *
817 	 * Note: kmem_alloc_contig_guard() is immune to this check.
818 	 */
819 	if (__improbable(!(flags & (KMA_VAONLY | KMA_PAGEABLE)) &&
820 	    alloc_pages == NULL &&
821 	    size > MAX(1ULL << 31, sane_size / 64))) {
822 		kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
823 		goto out_error;
824 	}
825 
826 	/*
827 	 * Guard pages:
828 	 *
829 	 * Guard pages are implemented as fictitious pages.
830 	 *
831 	 * However, some maps, and some objects are known
832 	 * to manage their memory explicitly, and do not need
833 	 * those to be materialized, which saves memory.
834 	 *
835 	 * By placing guard pages on either end of a stack,
836 	 * they can help detect cases where a thread walks
837 	 * off either end of its stack.
838 	 *
839 	 * They are allocated and set up here and attempts
840 	 * to access those pages are trapped in vm_fault_page().
841 	 *
842 	 * The map_size we were passed may include extra space for
843 	 * guard pages. fill_size represents the actual size to populate.
844 	 * Similarly, fill_start indicates where the actual pages
845 	 * will begin in the range.
846 	 */
847 
848 	map_size   = round_page(size);
849 	fill_start = 0;
850 	fill_size  = map_size - __kmem_guard_size(ANYF(flags));
851 
852 #if KASAN_CLASSIC
853 	if (flags & KMA_KASAN_GUARD) {
854 		assert((flags & (KMA_GUARD_FIRST | KMA_GUARD_LAST)) == 0);
855 		flags |= KMA_GUARD_FIRST | KMEM_GUARD_LAST;
856 		delta     = ptoa(2);
857 		map_size += delta;
858 	}
859 #else
860 	(void)delta;
861 #endif /* KASAN_CLASSIC */
862 
863 	skip_guards = (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) ||
864 	    map->never_faults;
865 
866 	if (flags & KMA_GUARD_FIRST) {
867 		vmk_flags.vmkf_guard_before = true;
868 		fill_start += PAGE_SIZE;
869 	}
870 	if (flags & KMA_NOSOFTLIMIT) {
871 		vmk_flags.vmkf_no_soft_limit = true;
872 	}
873 	if ((flags & KMA_GUARD_FIRST) && !skip_guards) {
874 		guard_left = vm_page_create_guard((flags & KMA_NOPAGEWAIT) == 0);
875 		if (__improbable(guard_left == VM_PAGE_NULL)) {
876 			kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
877 			goto out_error;
878 		}
879 	}
880 	if ((flags & KMA_GUARD_LAST) && !skip_guards) {
881 		guard_right = vm_page_create_guard((flags & KMA_NOPAGEWAIT) == 0);
882 		if (__improbable(guard_right == VM_PAGE_NULL)) {
883 			kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
884 			goto out_error;
885 		}
886 	}
887 
888 	if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
889 		if (alloc_pages) {
890 			kmr.kmr_return = alloc_pages(fill_size, flags,
891 			    &wired_page_list);
892 		} else {
893 			kmr.kmr_return = vm_page_alloc_list(atop(fill_size), flags,
894 			    &wired_page_list);
895 		}
896 		if (__improbable(kmr.kmr_return != KERN_SUCCESS)) {
897 			goto out_error;
898 		}
899 	}
900 
901 	/*
902 	 *	Allocate a new object (if necessary).  We must do this before
903 	 *	locking the map, or risk deadlock with the default pager.
904 	 */
905 	if (flags & KMA_KOBJECT) {
906 		{
907 			object = kernel_object_default;
908 		}
909 		vm_object_reference(object);
910 	} else if (flags & KMA_COMPRESSOR) {
911 		object = compressor_object;
912 		vm_object_reference(object);
913 	} else {
914 		object = vm_object_allocate(map_size, map->serial_id);
915 		vm_object_lock(object);
916 		vm_object_set_size(object, map_size, size);
917 		/* stabilize the object to prevent shadowing */
918 		object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
919 		VM_OBJECT_SET_TRUE_SHARE(object, TRUE);
920 		vm_object_unlock(object);
921 	}
922 
923 	if (flags & KMA_LAST_FREE) {
924 		vmk_flags.vmkf_last_free = true;
925 	}
926 	if (flags & KMA_PERMANENT) {
927 		vmk_flags.vmf_permanent = true;
928 	}
929 	kmem_apply_security_policy(map, flags, guard, map_size, mask, &vmk_flags,
930 	    false);
931 
932 	kmr.kmr_return = vm_map_find_space(map, 0, map_size, mask,
933 	    vmk_flags, &entry);
934 	if (__improbable(KERN_SUCCESS != kmr.kmr_return)) {
935 		vm_object_deallocate(object);
936 		goto out_error;
937 	}
938 
939 	vmlp_range_event_entry(map, entry);
940 
941 	map_addr = entry->vme_start;
942 	VME_OBJECT_SET(entry, object, guard.kmg_atomic, guard.kmg_context);
943 	VME_ALIAS_SET(entry, guard.kmg_tag);
944 	if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
945 		VME_OFFSET_SET(entry, map_addr);
946 	}
947 
948 #if KASAN
949 	if ((flags & KMA_KOBJECT) && guard.kmg_atomic) {
950 		entry->vme_object_or_delta = (-size & PAGE_MASK) + delta;
951 	}
952 #endif /* KASAN */
953 
954 	if (!(flags & (KMA_COMPRESSOR | KMA_PAGEABLE))) {
955 		entry->wired_count = 1;
956 		vme_btref_consider_and_set(entry, __builtin_frame_address(0));
957 	}
958 
959 	if (guard_left || guard_right || wired_page_list) {
960 		vm_object_offset_t offset = 0ull;
961 
962 		vm_object_lock(object);
963 		vm_map_unlock(map);
964 
965 		if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
966 			offset = map_addr;
967 		}
968 
969 		if (guard_left) {
970 			vm_page_insert(guard_left, object, offset);
971 			guard_left->vmp_busy = FALSE;
972 			guard_left = VM_PAGE_NULL;
973 		}
974 
975 		if (guard_right) {
976 			vm_page_insert(guard_right, object,
977 			    offset + fill_start + fill_size);
978 			guard_right->vmp_busy = FALSE;
979 			guard_right = VM_PAGE_NULL;
980 		}
981 
982 		if (wired_page_list) {
983 			kernel_memory_populate_object_and_unlock(object,
984 			    map_addr + fill_start, offset + fill_start, fill_size,
985 			    wired_page_list, flags, guard.kmg_tag, VM_PROT_DEFAULT,
986 			    __kmem_mapping_type(ANYF(flags)));
987 		} else {
988 			vm_object_unlock(object);
989 		}
990 	} else {
991 		vm_map_unlock(map);
992 	}
993 
994 	/*
995 	 * now that the pages are wired, we no longer have to fear coalesce
996 	 */
997 	if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
998 		vm_map_simplify(map, map_addr);
999 	}
1000 
1001 #if DEBUG || DEVELOPMENT
1002 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
1003 	    atop(fill_size), 0, 0, 0);
1004 #endif /* DEBUG || DEVELOPMENT */
1005 	kmr.kmr_address = CAST_DOWN(vm_offset_t, map_addr);
1006 
1007 #if KASAN
1008 	if (flags & (KMA_KASAN_GUARD | KMA_PAGEABLE)) {
1009 		/*
1010 		 * We need to allow the range for pageable memory,
1011 		 * or faulting will not be allowed.
1012 		 */
1013 		kasan_notify_address(map_addr, map_size);
1014 	}
1015 #endif /* KASAN */
1016 #if KASAN_CLASSIC
1017 	if (flags & KMA_KASAN_GUARD) {
1018 		kmr.kmr_address += PAGE_SIZE;
1019 		kasan_alloc_large(kmr.kmr_address, size);
1020 	}
1021 #endif /* KASAN_CLASSIC */
1022 #if CONFIG_KERNEL_TAGGING
1023 	if (!(flags & KMA_VAONLY) && (flags & KMA_TAG)) {
1024 		kmr.kmr_ptr = vm_memtag_generate_and_store_tag((caddr_t)kmr.kmr_address + fill_start, fill_size);
1025 		kmr.kmr_ptr = (caddr_t)kmr.kmr_ptr - fill_start;
1026 #if KASAN_TBI
1027 		kasan_tbi_retag_unused_space(kmr.kmr_ptr, map_size, size);
1028 #endif /* KASAN_TBI */
1029 	}
1030 #endif /* CONFIG_KERNEL_TAGGING */
1031 	vmlp_api_end(KMEM_ALLOC_GUARD_INTERNAL, kmr.kmr_return);
1032 	return kmr;
1033 
1034 out_error:
1035 	if (flags & KMA_NOFAIL) {
1036 		__kmem_failed_panic(map, size, flags, kmr.kmr_return, "alloc");
1037 	}
1038 	if (guard_left) {
1039 		guard_left->vmp_snext = wired_page_list;
1040 		wired_page_list = guard_left;
1041 	}
1042 	if (guard_right) {
1043 		guard_right->vmp_snext = wired_page_list;
1044 		wired_page_list = guard_right;
1045 	}
1046 	if (wired_page_list) {
1047 		vm_page_free_list(wired_page_list, FALSE);
1048 	}
1049 
1050 #if DEBUG || DEVELOPMENT
1051 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
1052 	    0, 0, 0, 0);
1053 #endif /* DEBUG || DEVELOPMENT */
1054 
1055 	vmlp_api_end(KMEM_ALLOC_GUARD_INTERNAL, kmr.kmr_return);
1056 	return kmr;
1057 }
1058 
1059 __mockable kmem_return_t
kmem_alloc_guard(vm_map_t map,vm_size_t size,vm_offset_t mask,kma_flags_t flags,kmem_guard_t guard)1060 kmem_alloc_guard(
1061 	vm_map_t        map,
1062 	vm_size_t       size,
1063 	vm_offset_t     mask,
1064 	kma_flags_t     flags,
1065 	kmem_guard_t    guard)
1066 {
1067 	return kmem_alloc_guard_internal(map, size, mask, flags, guard, NULL);
1068 }
1069 
1070 kmem_return_t
kmem_alloc_contig_guard(vm_map_t map,vm_size_t size,vm_offset_t mask,ppnum_t max_pnum,ppnum_t pnum_mask,kma_flags_t flags,kmem_guard_t guard)1071 kmem_alloc_contig_guard(
1072 	vm_map_t                map,
1073 	vm_size_t               size,
1074 	vm_offset_t             mask,
1075 	ppnum_t                 max_pnum,
1076 	ppnum_t                 pnum_mask,
1077 	kma_flags_t             flags,
1078 	kmem_guard_t            guard)
1079 {
1080 	__auto_type alloc_pages = ^(vm_size_t fill_size, kma_flags_t kma_flags, vm_page_t *pages) {
1081 		return cpm_allocate(fill_size, pages, max_pnum, pnum_mask, FALSE, kma_flags);
1082 	};
1083 
1084 	return kmem_alloc_guard_internal(map, size, mask, flags, guard, alloc_pages);
1085 }
1086 
1087 kmem_return_t
kmem_suballoc(vm_map_t parent,mach_vm_offset_t * addr,vm_size_t size,vm_map_create_options_t vmc_options,int vm_flags,kms_flags_t flags,vm_tag_t tag)1088 kmem_suballoc(
1089 	vm_map_t                parent,
1090 	mach_vm_offset_t       *addr,
1091 	vm_size_t               size,
1092 	vm_map_create_options_t vmc_options,
1093 	int                     vm_flags,
1094 	kms_flags_t             flags,
1095 	vm_tag_t                tag)
1096 {
1097 	vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1098 	vm_map_offset_t map_addr = 0;
1099 	kmem_return_t kmr = { };
1100 	vm_map_t map;
1101 
1102 	assert(page_aligned(size));
1103 	assert(parent->pmap == kernel_pmap);
1104 
1105 	vm_map_kernel_flags_set_vmflags(&vmk_flags, vm_flags, tag);
1106 
1107 	if (parent == kernel_map) {
1108 		assert(vmk_flags.vmf_overwrite || (flags & (KMS_DATA | KMS_DATA_SHARED)));
1109 	}
1110 
1111 	if (vmk_flags.vmf_fixed) {
1112 		map_addr = trunc_page(*addr);
1113 	}
1114 
1115 	pmap_reference(vm_map_pmap(parent));
1116 	map = vm_map_create_options(vm_map_pmap(parent), 0, size, vmc_options);
1117 
1118 	/*
1119 	 * 1. vm_map_enter() will consume one ref on success.
1120 	 *
1121 	 * 2. make the entry atomic as kernel submaps should never be split.
1122 	 *
1123 	 * 3. instruct vm_map_enter() that it is a fresh submap
1124 	 *    that needs to be taught its bounds as it inserted.
1125 	 */
1126 	vm_map_reference(map);
1127 
1128 	vmk_flags.vmkf_submap = true;
1129 	if ((flags & (KMS_DATA | KMS_DATA_SHARED)) == 0) {
1130 		/* FIXME: IOKit submaps get fragmented and can't be atomic */
1131 		vmk_flags.vmkf_submap_atomic = true;
1132 	}
1133 	vmk_flags.vmkf_submap_adjust = true;
1134 	if (flags & KMS_LAST_FREE) {
1135 		vmk_flags.vmkf_last_free = true;
1136 	}
1137 	if (flags & KMS_PERMANENT) {
1138 		vmk_flags.vmf_permanent = true;
1139 	}
1140 	if (flags & (KMS_DATA | KMS_DATA_SHARED)) {
1141 		if (flags & KMS_DATA) {
1142 			vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
1143 		} else {
1144 			vmk_flags.vmkf_range_id = kmem_needs_data_share_range() ?
1145 			    KMEM_RANGE_ID_DATA_SHARED : KMEM_RANGE_ID_DATA;
1146 		}
1147 	}
1148 	if (flags & KMS_NOSOFTLIMIT) {
1149 		vmk_flags.vmkf_no_soft_limit = true;
1150 	}
1151 
1152 	kmr.kmr_return = vm_map_enter(parent, &map_addr, size, 0,
1153 	    vmk_flags, (vm_object_t)map, 0, FALSE,
1154 	    VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1155 
1156 	if (kmr.kmr_return != KERN_SUCCESS) {
1157 		if (flags & KMS_NOFAIL) {
1158 			panic("kmem_suballoc(map=%p, size=%zd) failed with %d",
1159 			    parent, size, kmr.kmr_return);
1160 		}
1161 		assert(os_ref_get_count_raw(&map->map_refcnt) == 2);
1162 		vm_map_deallocate(map);
1163 		vm_map_deallocate(map); /* also removes ref to pmap */
1164 		return kmr;
1165 	}
1166 
1167 	/*
1168 	 * For kmem_suballocs that register a claim and are assigned a range, ensure
1169 	 * that the exact same range is returned.
1170 	 */
1171 	if (*addr != 0 && parent == kernel_map &&
1172 	    startup_phase > STARTUP_SUB_KMEM) {
1173 		assert(CAST_DOWN(vm_offset_t, map_addr) == *addr);
1174 	} else {
1175 		*addr = map_addr;
1176 	}
1177 
1178 	kmr.kmr_submap = map;
1179 	return kmr;
1180 }
1181 
1182 /*
1183  *	kmem_alloc:
1184  *
1185  *	Allocate wired-down memory in the kernel's address map
1186  *	or a submap.  The memory is not zero-filled.
1187  */
1188 
1189 __exported kern_return_t
1190 kmem_alloc_external(
1191 	vm_map_t        map,
1192 	vm_offset_t     *addrp,
1193 	vm_size_t       size);
1194 kern_return_t
kmem_alloc_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1195 kmem_alloc_external(
1196 	vm_map_t        map,
1197 	vm_offset_t     *addrp,
1198 	vm_size_t       size)
1199 {
1200 	if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
1201 		return kmem_alloc(map, addrp, size, KMA_NONE, vm_tag_bt());
1202 	}
1203 	/* Maintain ABI compatibility: invalid sizes used to be allowed */
1204 	return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
1205 }
1206 
1207 
1208 /*
1209  *	kmem_alloc_kobject:
1210  *
1211  *	Allocate wired-down memory in the kernel's address map
1212  *	or a submap.  The memory is not zero-filled.
1213  *
1214  *	The memory is allocated in the kernel_object.
1215  *	It may not be copied with vm_map_copy, and
1216  *	it may not be reallocated with kmem_realloc.
1217  */
1218 
1219 __exported kern_return_t
1220 kmem_alloc_kobject_external(
1221 	vm_map_t        map,
1222 	vm_offset_t     *addrp,
1223 	vm_size_t       size);
1224 kern_return_t
kmem_alloc_kobject_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1225 kmem_alloc_kobject_external(
1226 	vm_map_t        map,
1227 	vm_offset_t     *addrp,
1228 	vm_size_t       size)
1229 {
1230 	if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
1231 		return kmem_alloc(map, addrp, size, KMA_KOBJECT, vm_tag_bt());
1232 	}
1233 	/* Maintain ABI compatibility: invalid sizes used to be allowed */
1234 	return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
1235 }
1236 
1237 /*
1238  *	kmem_alloc_pageable:
1239  *
1240  *	Allocate pageable memory in the kernel's address map.
1241  */
1242 
1243 __exported kern_return_t
1244 kmem_alloc_pageable_external(
1245 	vm_map_t        map,
1246 	vm_offset_t     *addrp,
1247 	vm_size_t       size);
1248 kern_return_t
kmem_alloc_pageable_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1249 kmem_alloc_pageable_external(
1250 	vm_map_t        map,
1251 	vm_offset_t     *addrp,
1252 	vm_size_t       size)
1253 {
1254 	if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
1255 		return kmem_alloc(map, addrp, size, KMA_PAGEABLE | KMA_DATA_SHARED, vm_tag_bt());
1256 	}
1257 	/* Maintain ABI compatibility: invalid sizes used to be allowed */
1258 	return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
1259 }
1260 
1261 static __attribute__((always_inline, warn_unused_result))
1262 kern_return_t
mach_vm_allocate_kernel_sanitize(vm_map_t map,mach_vm_offset_ut addr_u,mach_vm_size_ut size_u,vm_map_kernel_flags_t vmk_flags,vm_map_offset_t * map_addr,vm_map_size_t * map_size)1263 mach_vm_allocate_kernel_sanitize(
1264 	vm_map_t                map,
1265 	mach_vm_offset_ut       addr_u,
1266 	mach_vm_size_ut         size_u,
1267 	vm_map_kernel_flags_t   vmk_flags,
1268 	vm_map_offset_t        *map_addr,
1269 	vm_map_size_t          *map_size)
1270 {
1271 	kern_return_t   result;
1272 	vm_map_offset_t map_end;
1273 
1274 	if (vmk_flags.vmf_fixed) {
1275 		result = vm_sanitize_addr_size(addr_u, size_u,
1276 		    VM_SANITIZE_CALLER_VM_ALLOCATE_FIXED,
1277 		    map,
1278 		    VM_SANITIZE_FLAGS_SIZE_ZERO_SUCCEEDS | VM_SANITIZE_FLAGS_REALIGN_START,
1279 		    map_addr, &map_end, map_size);
1280 		if (__improbable(result != KERN_SUCCESS)) {
1281 			return result;
1282 		}
1283 	} else {
1284 		*map_addr = 0;
1285 		result = vm_sanitize_size(0, size_u,
1286 		    VM_SANITIZE_CALLER_VM_ALLOCATE_ANYWHERE, map,
1287 		    VM_SANITIZE_FLAGS_SIZE_ZERO_SUCCEEDS,
1288 		    map_size);
1289 		if (__improbable(result != KERN_SUCCESS)) {
1290 			return result;
1291 		}
1292 	}
1293 
1294 	return KERN_SUCCESS;
1295 }
1296 
1297 kern_return_t
mach_vm_allocate_kernel(vm_map_t map,mach_vm_offset_ut * addr_u,mach_vm_size_ut size_u,vm_map_kernel_flags_t vmk_flags)1298 mach_vm_allocate_kernel(
1299 	vm_map_t                map,
1300 	mach_vm_offset_ut      *addr_u,
1301 	mach_vm_size_ut         size_u,
1302 	vm_map_kernel_flags_t   vmk_flags)
1303 {
1304 	vm_map_offset_t map_addr;
1305 	vm_map_size_t   map_size;
1306 	kern_return_t   result;
1307 
1308 	if (map == VM_MAP_NULL) {
1309 		ktriage_record(thread_tid(current_thread()),
1310 		    KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
1311 		    KDBG_TRIAGE_RESERVED,
1312 		    KDBG_TRIAGE_VM_ALLOCATE_KERNEL_BADMAP_ERROR),
1313 		    KERN_INVALID_ARGUMENT /* arg */);
1314 		return KERN_INVALID_ARGUMENT;
1315 	}
1316 
1317 	if (!vm_map_kernel_flags_check_vm_and_kflags(vmk_flags,
1318 	    VM_FLAGS_USER_ALLOCATE)) {
1319 		return KERN_INVALID_ARGUMENT;
1320 	}
1321 
1322 	result = mach_vm_allocate_kernel_sanitize(map,
1323 	    *addr_u,
1324 	    size_u,
1325 	    vmk_flags,
1326 	    &map_addr,
1327 	    &map_size);
1328 	if (__improbable(result != KERN_SUCCESS)) {
1329 		result = vm_sanitize_get_kr(result);
1330 		if (result == KERN_SUCCESS) {
1331 			*addr_u = vm_sanitize_wrap_addr(0);
1332 		} else {
1333 			ktriage_record(thread_tid(current_thread()),
1334 			    KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
1335 			    KDBG_TRIAGE_RESERVED,
1336 			    KDBG_TRIAGE_VM_ALLOCATE_KERNEL_BADSIZE_ERROR),
1337 			    KERN_INVALID_ARGUMENT /* arg */);
1338 		}
1339 		return result;
1340 	}
1341 
1342 	vm_map_kernel_flags_update_range_id(&vmk_flags, map, map_size);
1343 
1344 	result = vm_map_enter(
1345 		map,
1346 		&map_addr,
1347 		map_size,
1348 		(vm_map_offset_t)0,
1349 		vmk_flags,
1350 		VM_OBJECT_NULL,
1351 		(vm_object_offset_t)0,
1352 		FALSE,
1353 		VM_PROT_DEFAULT,
1354 		VM_PROT_ALL,
1355 		VM_INHERIT_DEFAULT);
1356 
1357 	if (result == KERN_SUCCESS) {
1358 #if KASAN
1359 		if (map->pmap == kernel_pmap) {
1360 			kasan_notify_address(map_addr, map_size);
1361 		}
1362 #endif
1363 		*addr_u = vm_sanitize_wrap_addr(map_addr);
1364 	} else {
1365 		ktriage_record(thread_tid(current_thread()),
1366 		    KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
1367 		    KDBG_TRIAGE_RESERVED,
1368 		    KDBG_TRIAGE_VM_ALLOCATE_KERNEL_VMMAPENTER_ERROR),
1369 		    result /* arg */);
1370 	}
1371 	return result;
1372 }
1373 
1374 #pragma mark population
1375 
1376 static void
kernel_memory_populate_pmap_enter(vm_object_t object,vm_address_t addr,vm_object_offset_t offset,vm_page_t mem,vm_prot_t prot,int pe_flags,pmap_mapping_type_t mapping_type)1377 kernel_memory_populate_pmap_enter(
1378 	vm_object_t             object,
1379 	vm_address_t            addr,
1380 	vm_object_offset_t      offset,
1381 	vm_page_t               mem,
1382 	vm_prot_t               prot,
1383 	int                     pe_flags,
1384 	pmap_mapping_type_t     mapping_type)
1385 {
1386 	kern_return_t   pe_result;
1387 	int             pe_options;
1388 
1389 	if (VMP_ERROR_GET(mem)) {
1390 		panic("VM page %p should not have an error", mem);
1391 	}
1392 
1393 	pe_options = PMAP_OPTIONS_NOWAIT;
1394 	if (object->internal) {
1395 		pe_options |= PMAP_OPTIONS_INTERNAL;
1396 	}
1397 	if (mem->vmp_reusable || object->all_reusable) {
1398 		pe_options |= PMAP_OPTIONS_REUSABLE;
1399 	}
1400 
1401 	pe_result = pmap_enter_options(kernel_pmap, addr + offset,
1402 	    VM_PAGE_GET_PHYS_PAGE(mem), prot, VM_PROT_NONE,
1403 	    pe_flags, /* wired */ TRUE, pe_options, NULL, mapping_type);
1404 
1405 	if (pe_result == KERN_RESOURCE_SHORTAGE) {
1406 		vm_object_unlock(object);
1407 
1408 		pe_options &= ~PMAP_OPTIONS_NOWAIT;
1409 
1410 		pe_result = pmap_enter_options(kernel_pmap, addr + offset,
1411 		    VM_PAGE_GET_PHYS_PAGE(mem), prot, VM_PROT_NONE,
1412 		    pe_flags, /* wired */ TRUE, pe_options, NULL, mapping_type);
1413 
1414 		vm_object_lock(object);
1415 	}
1416 
1417 	assert(pe_result == KERN_SUCCESS);
1418 }
1419 
1420 void
kernel_memory_populate_object_and_unlock(vm_object_t object,vm_address_t addr,vm_offset_t offset,vm_size_t size,vm_page_t page_list,kma_flags_t flags,vm_tag_t tag,vm_prot_t prot,pmap_mapping_type_t mapping_type)1421 kernel_memory_populate_object_and_unlock(
1422 	vm_object_t             object, /* must be locked */
1423 	vm_address_t            addr,
1424 	vm_offset_t             offset,
1425 	vm_size_t               size,
1426 	vm_page_t               page_list,
1427 	kma_flags_t             flags,
1428 	vm_tag_t                tag,
1429 	vm_prot_t               prot,
1430 	pmap_mapping_type_t     mapping_type)
1431 {
1432 	vm_page_t       mem;
1433 	int             pe_flags;
1434 	bool            gobbled_list = page_list && page_list->vmp_gobbled;
1435 
1436 	assert(((flags & KMA_KOBJECT) != 0) == (is_kernel_object(object) != 0));
1437 	assert3u((bool)(flags & KMA_COMPRESSOR), ==, object == compressor_object);
1438 
1439 
1440 	if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
1441 		assert3u(offset, ==, addr);
1442 	} else {
1443 		/*
1444 		 * kernel_memory_populate_pmap_enter() might drop the object
1445 		 * lock, and the caller might not own a reference anymore
1446 		 * and rely on holding the vm object lock for liveness.
1447 		 */
1448 		vm_object_reference_locked(object);
1449 	}
1450 
1451 	if (flags & KMA_KSTACK) {
1452 		pe_flags = VM_MEM_STACK;
1453 	} else {
1454 		pe_flags = 0;
1455 	}
1456 
1457 
1458 	for (vm_object_offset_t pg_offset = 0;
1459 	    pg_offset < size;
1460 	    pg_offset += PAGE_SIZE_64) {
1461 		if (page_list == NULL) {
1462 			panic("%s: page_list too short", __func__);
1463 		}
1464 
1465 		mem = page_list;
1466 		page_list = mem->vmp_snext;
1467 		mem->vmp_snext = NULL;
1468 
1469 		assert(mem->vmp_wire_count == 0);
1470 		assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
1471 		assert(vm_page_is_canonical(mem));
1472 
1473 		if (flags & KMA_COMPRESSOR) {
1474 			mem->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
1475 			/*
1476 			 * Background processes doing I/O accounting can call
1477 			 * into NVME driver to do some work which results in
1478 			 * an allocation here and so we want to make sure
1479 			 * that the pages used by compressor, regardless of
1480 			 * process context, are never on the special Q.
1481 			 */
1482 			mem->vmp_on_specialq = VM_PAGE_SPECIAL_Q_EMPTY;
1483 
1484 			vm_page_insert(mem, object, offset + pg_offset);
1485 		} else {
1486 			mem->vmp_q_state = VM_PAGE_IS_WIRED;
1487 			mem->vmp_wire_count = 1;
1488 
1489 
1490 			vm_page_insert_wired(mem, object, offset + pg_offset, tag);
1491 		}
1492 
1493 		mem->vmp_gobbled = false;
1494 		mem->vmp_busy = false;
1495 		mem->vmp_pmapped = true;
1496 		mem->vmp_wpmapped = true;
1497 
1498 		/*
1499 		 * Manual PMAP_ENTER_OPTIONS() with shortcuts
1500 		 * for the kernel and compressor objects.
1501 		 */
1502 		kernel_memory_populate_pmap_enter(object, addr, pg_offset,
1503 		    mem, prot, pe_flags, mapping_type);
1504 
1505 		if (flags & KMA_NOENCRYPT) {
1506 			pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
1507 		}
1508 	}
1509 
1510 	if (page_list) {
1511 		panic("%s: page_list too long", __func__);
1512 	}
1513 
1514 	vm_object_unlock(object);
1515 	if ((flags & (KMA_KOBJECT | KMA_COMPRESSOR)) == 0) {
1516 		vm_object_deallocate(object);
1517 	}
1518 
1519 	/*
1520 	 * Update the accounting:
1521 	 * - the compressor "wired" pages don't really count as wired
1522 	 * - kmem_alloc_contig_guard() gives gobbled pages,
1523 	 *   which already count as wired but need to be ungobbled.
1524 	 */
1525 	if (gobbled_list) {
1526 		vm_page_lockspin_queues();
1527 		if (flags & KMA_COMPRESSOR) {
1528 			vm_page_wire_count -= atop(size);
1529 		}
1530 		vm_page_gobble_count -= atop(size);
1531 		vm_page_unlock_queues();
1532 	} else if ((flags & KMA_COMPRESSOR) == 0) {
1533 		vm_page_lockspin_queues();
1534 		vm_page_wire_count += atop(size);
1535 		vm_page_unlock_queues();
1536 	}
1537 
1538 	if (flags & KMA_KOBJECT) {
1539 		/* vm_page_insert_wired() handles regular objects already */
1540 		vm_tag_update_size(tag, size, NULL);
1541 	}
1542 
1543 #if KASAN
1544 	if (flags & KMA_COMPRESSOR) {
1545 		kasan_notify_address_nopoison(addr, size);
1546 	} else {
1547 		kasan_notify_address(addr, size);
1548 	}
1549 #endif /* KASAN */
1550 }
1551 
1552 
1553 kern_return_t
kernel_memory_populate(vm_offset_t addr,vm_size_t size,kma_flags_t flags,vm_tag_t tag)1554 kernel_memory_populate(
1555 	vm_offset_t     addr,
1556 	vm_size_t       size,
1557 	kma_flags_t     flags,
1558 	vm_tag_t        tag)
1559 {
1560 	kern_return_t   kr = KERN_SUCCESS;
1561 	vm_page_t       page_list = NULL;
1562 	vm_size_t       page_count = atop_64(size);
1563 	vm_object_t     object = __kmem_object(ANYF(flags));
1564 
1565 #if DEBUG || DEVELOPMENT
1566 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_START,
1567 	    size, 0, 0, 0);
1568 #endif /* DEBUG || DEVELOPMENT */
1569 
1570 
1571 	kr = vm_page_alloc_list(page_count, flags, &page_list);
1572 	if (kr == KERN_SUCCESS) {
1573 		vm_object_lock(object);
1574 		kernel_memory_populate_object_and_unlock(object, addr,
1575 		    addr, size, page_list, flags, tag, VM_PROT_DEFAULT,
1576 		    __kmem_mapping_type(ANYF(flags)));
1577 	}
1578 
1579 #if DEBUG || DEVELOPMENT
1580 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
1581 	    page_count, 0, 0, 0);
1582 #endif /* DEBUG || DEVELOPMENT */
1583 	return kr;
1584 }
1585 
1586 void
kernel_memory_depopulate(vm_offset_t addr,vm_size_t size,kma_flags_t flags,vm_tag_t tag)1587 kernel_memory_depopulate(
1588 	vm_offset_t        addr,
1589 	vm_size_t          size,
1590 	kma_flags_t        flags,
1591 	vm_tag_t           tag)
1592 {
1593 	vm_object_t        object = __kmem_object(ANYF(flags));
1594 	vm_object_offset_t offset = addr;
1595 	vm_page_t          mem;
1596 	vm_page_t          local_freeq = NULL;
1597 	unsigned int       pages_unwired = 0;
1598 
1599 	vm_object_lock(object);
1600 
1601 	pmap_protect(kernel_pmap, offset, offset + size, VM_PROT_NONE);
1602 
1603 	for (vm_object_offset_t pg_offset = 0;
1604 	    pg_offset < size;
1605 	    pg_offset += PAGE_SIZE_64) {
1606 		mem = vm_page_lookup(object, offset + pg_offset);
1607 
1608 		assert(mem);
1609 
1610 		if (flags & KMA_COMPRESSOR) {
1611 			assert(mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR);
1612 		} else {
1613 			assert(mem->vmp_q_state == VM_PAGE_IS_WIRED);
1614 			pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
1615 			pages_unwired++;
1616 		}
1617 
1618 		mem->vmp_busy = TRUE;
1619 
1620 		assert(mem->vmp_tabled);
1621 		vm_page_remove(mem, TRUE);
1622 		assert(mem->vmp_busy);
1623 
1624 		assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
1625 
1626 		mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
1627 		mem->vmp_snext = local_freeq;
1628 		local_freeq = mem;
1629 	}
1630 
1631 	vm_object_unlock(object);
1632 
1633 	vm_page_free_list(local_freeq, TRUE);
1634 
1635 	if (!(flags & KMA_COMPRESSOR)) {
1636 		vm_page_lockspin_queues();
1637 		vm_page_wire_count -= pages_unwired;
1638 		vm_page_unlock_queues();
1639 	}
1640 
1641 	if (flags & KMA_KOBJECT) {
1642 		/* vm_page_remove() handles regular objects already */
1643 		vm_tag_update_size(tag, -ptoa_64(pages_unwired), NULL);
1644 	}
1645 }
1646 
1647 #pragma mark reallocation
1648 
1649 __abortlike
1650 static void
__kmem_realloc_invalid_object_size_panic(vm_map_t map,vm_address_t address,vm_size_t size,vm_map_entry_t entry)1651 __kmem_realloc_invalid_object_size_panic(
1652 	vm_map_t                map,
1653 	vm_address_t            address,
1654 	vm_size_t               size,
1655 	vm_map_entry_t          entry)
1656 {
1657 	vm_object_t object  = VME_OBJECT(entry);
1658 	vm_size_t   objsize = __kmem_entry_orig_size(entry);
1659 
1660 	panic("kmem_realloc(map=%p, addr=%p, size=%zd, entry=%p): "
1661 	    "object %p has unexpected size %ld",
1662 	    map, (void *)address, (size_t)size, entry, object, objsize);
1663 }
1664 
1665 __abortlike
1666 static void
__kmem_realloc_invalid_pager_panic(vm_map_t map,vm_address_t address,vm_size_t size,vm_map_entry_t entry)1667 __kmem_realloc_invalid_pager_panic(
1668 	vm_map_t                map,
1669 	vm_address_t            address,
1670 	vm_size_t               size,
1671 	vm_map_entry_t          entry)
1672 {
1673 	vm_object_t object     = VME_OBJECT(entry);
1674 	memory_object_t pager  = object->pager;
1675 	bool pager_created     = object->pager_created;
1676 	bool pager_initialized = object->pager_initialized;
1677 	bool pager_ready       = object->pager_ready;
1678 
1679 	panic("kmem_realloc(map=%p, addr=%p, size=%zd, entry=%p): "
1680 	    "object %p has unexpected pager %p (%d,%d,%d)",
1681 	    map, (void *)address, (size_t)size, entry, object,
1682 	    pager, pager_created, pager_initialized, pager_ready);
1683 }
1684 
1685 static kmem_return_t
kmem_realloc_shrink_guard(vm_map_t map,vm_offset_t req_oldaddr,vm_size_t req_oldsize,vm_size_t req_newsize,kmr_flags_t flags,kmem_guard_t guard,vm_map_entry_t entry)1686 kmem_realloc_shrink_guard(
1687 	vm_map_t                map,
1688 	vm_offset_t             req_oldaddr,
1689 	vm_size_t               req_oldsize,
1690 	vm_size_t               req_newsize,
1691 	kmr_flags_t             flags,
1692 	kmem_guard_t            guard,
1693 	vm_map_entry_t          entry)
1694 {
1695 	vmr_flags_t             vmr_flags = VM_MAP_REMOVE_KUNWIRE;
1696 	vm_object_t             object;
1697 	vm_offset_t             delta = 0;
1698 	kmem_return_t           kmr;
1699 	bool                    was_atomic;
1700 	vm_size_t               oldsize = round_page(req_oldsize);
1701 	vm_size_t               newsize = round_page(req_newsize);
1702 	vm_address_t            oldaddr = req_oldaddr;
1703 
1704 #if KASAN_CLASSIC
1705 	if (flags & KMR_KASAN_GUARD) {
1706 		assert((flags & (KMR_GUARD_FIRST | KMR_GUARD_LAST)) == 0);
1707 		flags   |= KMR_GUARD_FIRST | KMR_GUARD_LAST;
1708 		oldaddr -= PAGE_SIZE;
1709 		delta    = ptoa(2);
1710 		oldsize += delta;
1711 		newsize += delta;
1712 	}
1713 #endif /* KASAN_CLASSIC */
1714 
1715 	if (flags & KMR_TAG) {
1716 		oldaddr = vm_memtag_canonicalize_kernel(req_oldaddr);
1717 	}
1718 
1719 	vm_map_lock_assert_exclusive(map);
1720 
1721 	if ((flags & KMR_KOBJECT) == 0) {
1722 		object = VME_OBJECT(entry);
1723 		vm_object_reference(object);
1724 	}
1725 
1726 	/*
1727 	 *	Shrinking an atomic entry starts with splitting it,
1728 	 *	and removing the second half.
1729 	 */
1730 	was_atomic = entry->vme_atomic;
1731 	entry->vme_atomic = false;
1732 	vm_map_clip_end(map, entry, entry->vme_start + newsize);
1733 	entry->vme_atomic = was_atomic;
1734 
1735 #if KASAN
1736 	if (entry->vme_kernel_object && was_atomic) {
1737 		entry->vme_object_or_delta = (-req_newsize & PAGE_MASK) + delta;
1738 	}
1739 #if KASAN_CLASSIC
1740 	if (flags & KMR_KASAN_GUARD) {
1741 		kasan_poison_range(oldaddr + newsize, oldsize - newsize,
1742 		    ASAN_VALID);
1743 	}
1744 #endif
1745 #if KASAN_TBI
1746 	if (flags & KMR_TAG) {
1747 		kasan_tbi_mark_free_space((caddr_t)req_oldaddr + newsize, oldsize - newsize);
1748 	}
1749 #endif /* KASAN_TBI */
1750 #endif /* KASAN */
1751 	(void)vm_map_remove_and_unlock(map,
1752 	    oldaddr + newsize, oldaddr + oldsize,
1753 	    vmr_flags, KMEM_GUARD_NONE);
1754 
1755 
1756 	/*
1757 	 *	Lastly, if there are guard pages, deal with them.
1758 	 *
1759 	 *	The kernel object just needs to depopulate,
1760 	 *	regular objects require freeing the last page
1761 	 *	and replacing it with a guard.
1762 	 */
1763 	if (flags & KMR_KOBJECT) {
1764 		if (flags & KMR_GUARD_LAST) {
1765 			kma_flags_t dflags = KMA_KOBJECT;
1766 			kernel_memory_depopulate(oldaddr + newsize - PAGE_SIZE,
1767 			    PAGE_SIZE, dflags, guard.kmg_tag);
1768 		}
1769 	} else {
1770 		vm_page_t guard_right = VM_PAGE_NULL;
1771 		vm_offset_t remove_start = newsize;
1772 
1773 		if (flags & KMR_GUARD_LAST) {
1774 			if (!map->never_faults) {
1775 				guard_right = vm_page_create_guard(true);
1776 			}
1777 			remove_start -= PAGE_SIZE;
1778 		}
1779 
1780 		vm_object_lock(object);
1781 
1782 		if (object->vo_size != oldsize) {
1783 			__kmem_realloc_invalid_object_size_panic(map,
1784 			    req_oldaddr, req_oldsize + delta, entry);
1785 		}
1786 		vm_object_set_size(object, newsize, req_newsize);
1787 
1788 		vm_object_page_remove(object, remove_start, oldsize);
1789 
1790 		if (guard_right) {
1791 			vm_page_insert(guard_right, object, newsize - PAGE_SIZE);
1792 			guard_right->vmp_busy = false;
1793 		}
1794 		vm_object_unlock(object);
1795 		vm_object_deallocate(object);
1796 	}
1797 
1798 	kmr.kmr_address = req_oldaddr;
1799 	kmr.kmr_return  = 0;
1800 #if KASAN_CLASSIC
1801 	if (flags & KMA_KASAN_GUARD) {
1802 		kasan_alloc_large(kmr.kmr_address, req_newsize);
1803 	}
1804 #endif /* KASAN_CLASSIC */
1805 #if KASAN_TBI
1806 	if ((flags & KMR_TAG) && (flags & KMR_FREEOLD)) {
1807 		kmr.kmr_ptr = vm_memtag_generate_and_store_tag(kmr.kmr_ptr, req_newsize);
1808 		kasan_tbi_retag_unused_space(kmr.kmr_ptr, newsize, req_newsize);
1809 	}
1810 #endif /* KASAN_TBI */
1811 
1812 	return kmr;
1813 }
1814 
1815 kmem_return_t
kmem_realloc_guard(vm_map_t map,vm_offset_t req_oldaddr,vm_size_t req_oldsize,vm_size_t req_newsize,kmr_flags_t flags,kmem_guard_t guard)1816 kmem_realloc_guard(
1817 	vm_map_t                map,
1818 	vm_offset_t             req_oldaddr,
1819 	vm_size_t               req_oldsize,
1820 	vm_size_t               req_newsize,
1821 	kmr_flags_t             flags,
1822 	kmem_guard_t            guard)
1823 {
1824 	vm_object_t             object;
1825 	vm_size_t               oldsize;
1826 	vm_size_t               newsize;
1827 	vm_offset_t             delta = 0;
1828 	vm_map_offset_t         oldaddr;
1829 	vm_map_offset_t         newaddr;
1830 	vm_object_offset_t      newoffs;
1831 	vm_map_entry_t          oldentry;
1832 	vm_map_entry_t          newentry;
1833 	vm_page_t               page_list = NULL;
1834 	bool                    needs_wakeup = false;
1835 	kmem_return_t           kmr = { };
1836 	unsigned int            last_timestamp;
1837 	vm_map_kernel_flags_t   vmk_flags = {
1838 		.vmkf_last_free = (bool)(flags & KMR_LAST_FREE),
1839 	};
1840 
1841 	vmlp_api_start(KMEM_REALLOC_GUARD);
1842 
1843 	assert(KMEM_REALLOC_FLAGS_VALID(flags));
1844 
1845 	if (!guard.kmg_atomic) {
1846 		if (!(flags & (KMR_DATA | KMR_DATA_SHARED))) {
1847 			__kmem_invalid_arguments_panic("realloc", map, req_oldaddr,
1848 			    req_oldsize, flags);
1849 		}
1850 
1851 		if (flags & KMR_KOBJECT) {
1852 			__kmem_invalid_arguments_panic("realloc", map, req_oldaddr,
1853 			    req_oldsize, flags);
1854 		}
1855 	}
1856 
1857 	if (req_oldaddr == 0ul) {
1858 		kmem_return_t ret = kmem_alloc_guard(map, req_newsize, 0, (kma_flags_t)flags, guard);
1859 		vmlp_api_end(KMEM_REALLOC_GUARD, ret.kmr_return);
1860 		return ret;
1861 	}
1862 
1863 	if (req_newsize == 0ul) {
1864 		kmem_free_guard(map, req_oldaddr, req_oldsize,
1865 		    (kmf_flags_t)flags, guard);
1866 		vmlp_api_end(KMEM_REALLOC_GUARD, kmr.kmr_return);
1867 		return kmr;
1868 	}
1869 
1870 	if (req_newsize >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) {
1871 		__kmem_invalid_size_panic(map, req_newsize, flags);
1872 	}
1873 	if (req_newsize < __kmem_guard_size(ANYF(flags))) {
1874 		__kmem_invalid_size_panic(map, req_newsize, flags);
1875 	}
1876 
1877 	oldsize = round_page(req_oldsize);
1878 	newsize = round_page(req_newsize);
1879 	oldaddr = req_oldaddr;
1880 #if KASAN_CLASSIC
1881 	if (flags & KMR_KASAN_GUARD) {
1882 		flags   |= KMR_GUARD_FIRST | KMR_GUARD_LAST;
1883 		oldaddr -= PAGE_SIZE;
1884 		delta    = ptoa(2);
1885 		oldsize += delta;
1886 		newsize += delta;
1887 	}
1888 #endif /* KASAN_CLASSIC */
1889 #if CONFIG_KERNEL_TAGGING
1890 	if (flags & KMR_TAG) {
1891 		vm_memtag_verify_tag(req_oldaddr + __kmem_guard_left(ANYF(flags)));
1892 		oldaddr = vm_memtag_canonicalize_kernel(req_oldaddr);
1893 	}
1894 #endif /* CONFIG_KERNEL_TAGGING */
1895 
1896 #if !KASAN
1897 	/*
1898 	 *	If not on a KASAN variant and no difference in requested size,
1899 	 *  just return.
1900 	 *
1901 	 *	Otherwise we want to validate the size and re-tag for KASAN_TBI.
1902 	 */
1903 	if (oldsize == newsize) {
1904 		kmr.kmr_address = req_oldaddr;
1905 		vmlp_api_end(KMEM_REALLOC_GUARD, kmr.kmr_return);
1906 		return kmr;
1907 	}
1908 #endif /* !KASAN */
1909 
1910 	/*
1911 	 *	If we're growing the allocation,
1912 	 *	then reserve the pages we'll need,
1913 	 *	and find a spot for its new place.
1914 	 */
1915 	if (oldsize < newsize) {
1916 #if DEBUG || DEVELOPMENT
1917 		VM_DEBUG_CONSTANT_EVENT(vm_kern_request,
1918 		    DBG_VM_KERN_REQUEST, DBG_FUNC_START,
1919 		    newsize - oldsize, 0, 0, 0);
1920 #endif /* DEBUG || DEVELOPMENT */
1921 		kmr.kmr_return = vm_page_alloc_list(atop(newsize - oldsize),
1922 		    (kma_flags_t)flags, &page_list);
1923 		if (kmr.kmr_return == KERN_SUCCESS) {
1924 			kmem_apply_security_policy(map, (kma_flags_t)flags, guard,
1925 			    newsize, 0, &vmk_flags, true);
1926 			kmr.kmr_return = vm_map_find_space(map, 0, newsize, 0,
1927 			    vmk_flags, &newentry);
1928 		}
1929 		if (__improbable(kmr.kmr_return != KERN_SUCCESS)) {
1930 			if (flags & KMR_REALLOCF) {
1931 				kmem_free_guard(map, req_oldaddr, req_oldsize,
1932 				    flags & (KMF_TAG | KMF_GUARD_FIRST |
1933 				    KMF_GUARD_LAST | KMF_KASAN_GUARD), guard);
1934 			}
1935 			if (page_list) {
1936 				vm_page_free_list(page_list, FALSE);
1937 			}
1938 #if DEBUG || DEVELOPMENT
1939 			VM_DEBUG_CONSTANT_EVENT(vm_kern_request,
1940 			    DBG_VM_KERN_REQUEST, DBG_FUNC_END,
1941 			    0, 0, 0, 0);
1942 #endif /* DEBUG || DEVELOPMENT */
1943 			vmlp_api_end(KMEM_REALLOC_GUARD, kmr.kmr_return);
1944 			return kmr;
1945 		}
1946 
1947 		/* map is locked */
1948 	} else {
1949 		vm_map_lock(map);
1950 	}
1951 
1952 
1953 	/*
1954 	 *	Locate the entry:
1955 	 *	- wait for it to quiesce.
1956 	 *	- validate its guard,
1957 	 *	- learn its correct tag,
1958 	 */
1959 again:
1960 	if (!vm_map_lookup_entry(map, oldaddr, &oldentry)) {
1961 		__kmem_entry_not_found_panic(map, req_oldaddr);
1962 	}
1963 
1964 	vmlp_range_event_entry(map, oldentry);
1965 
1966 	if ((flags & KMR_KOBJECT) && oldentry->in_transition) {
1967 		oldentry->needs_wakeup = true;
1968 		vm_map_entry_wait(map, THREAD_UNINT);
1969 		goto again;
1970 	}
1971 	kmem_entry_validate_guard(map, oldentry, oldaddr, oldsize, guard);
1972 	if (!__kmem_entry_validate_object(oldentry, ANYF(flags))) {
1973 		__kmem_entry_validate_object_panic(map, oldentry, ANYF(flags));
1974 	}
1975 	/*
1976 	 *	TODO: We should validate for non atomic entries that the range
1977 	 *	      we are acting on is what we expect here.
1978 	 */
1979 #if KASAN
1980 	if (__kmem_entry_orig_size(oldentry) != req_oldsize) {
1981 		__kmem_realloc_invalid_object_size_panic(map,
1982 		    req_oldaddr, req_oldsize + delta, oldentry);
1983 	}
1984 
1985 	if (oldsize == newsize) {
1986 		kmr.kmr_address = req_oldaddr;
1987 		if (oldentry->vme_kernel_object) {
1988 			oldentry->vme_object_or_delta = delta +
1989 			    (-req_newsize & PAGE_MASK);
1990 		} else {
1991 			object = VME_OBJECT(oldentry);
1992 			vm_object_lock(object);
1993 			vm_object_set_size(object, newsize, req_newsize);
1994 			vm_object_unlock(object);
1995 		}
1996 		vm_map_unlock(map);
1997 
1998 #if KASAN_CLASSIC
1999 		if (flags & KMA_KASAN_GUARD) {
2000 			kasan_alloc_large(kmr.kmr_address, req_newsize);
2001 		}
2002 #endif /* KASAN_CLASSIC */
2003 #if KASAN_TBI
2004 		if ((flags & KMR_TAG) && (flags & KMR_FREEOLD)) {
2005 			kmr.kmr_ptr = vm_memtag_generate_and_store_tag(kmr.kmr_ptr, req_newsize);
2006 			kasan_tbi_retag_unused_space(kmr.kmr_ptr, newsize, req_newsize);
2007 		}
2008 #endif /* KASAN_TBI */
2009 		vmlp_api_end(KMEM_REALLOC_GUARD, kmr.kmr_return);
2010 		return kmr;
2011 	}
2012 #endif /* KASAN */
2013 
2014 	guard.kmg_tag = VME_ALIAS(oldentry);
2015 
2016 	if (newsize < oldsize) {
2017 		kmem_return_t ret = kmem_realloc_shrink_guard(map, req_oldaddr,
2018 		    req_oldsize, req_newsize, flags, guard, oldentry);
2019 		vmlp_api_end(KMEM_REALLOC_GUARD, kmr.kmr_return);
2020 		return ret;
2021 	}
2022 
2023 
2024 	/*
2025 	 *	We are growing the entry
2026 	 *
2027 	 *	For regular objects we use the object `vo_size` updates
2028 	 *	as a guarantee that no 2 kmem_realloc() can happen
2029 	 *	concurrently (by doing it before the map is unlocked.
2030 	 *
2031 	 *	For the kernel object, prevent the entry from being
2032 	 *	reallocated or changed by marking it "in_transition".
2033 	 */
2034 
2035 	object = VME_OBJECT(oldentry);
2036 	vm_object_lock(object);
2037 	vm_object_reference_locked(object);
2038 
2039 	newaddr = newentry->vme_start;
2040 	newoffs = oldsize;
2041 
2042 	vmlp_range_event_entry(map, newentry);
2043 
2044 	VME_OBJECT_SET(newentry, object, guard.kmg_atomic, guard.kmg_context);
2045 	VME_ALIAS_SET(newentry, guard.kmg_tag);
2046 	if (flags & KMR_KOBJECT) {
2047 		oldentry->in_transition = true;
2048 		VME_OFFSET_SET(newentry, newaddr);
2049 		newentry->wired_count = 1;
2050 		vme_btref_consider_and_set(newentry, __builtin_frame_address(0));
2051 		newoffs = newaddr + oldsize;
2052 #if KASAN
2053 		newentry->vme_object_or_delta = delta +
2054 		    (-req_newsize & PAGE_MASK);
2055 #endif /* KASAN */
2056 	} else {
2057 		if (object->pager_created || object->pager) {
2058 			/*
2059 			 * We can't "realloc/grow" the pager, so pageable
2060 			 * allocations should not go through this path.
2061 			 */
2062 			__kmem_realloc_invalid_pager_panic(map,
2063 			    req_oldaddr, req_oldsize + delta, oldentry);
2064 		}
2065 		if (object->vo_size != oldsize) {
2066 			__kmem_realloc_invalid_object_size_panic(map,
2067 			    req_oldaddr, req_oldsize + delta, oldentry);
2068 		}
2069 		vm_object_set_size(object, newsize, req_newsize);
2070 	}
2071 
2072 	last_timestamp = map->timestamp;
2073 	vm_map_unlock(map);
2074 
2075 
2076 	/*
2077 	 *	Now proceed with the population of pages.
2078 	 *
2079 	 *	Kernel objects can use the kmem population helpers.
2080 	 *
2081 	 *	Regular objects will insert pages manually,
2082 	 *	then wire the memory into the new range.
2083 	 */
2084 
2085 	vm_size_t guard_right_size = __kmem_guard_right(ANYF(flags));
2086 
2087 	if (flags & KMR_KOBJECT) {
2088 		pmap_mapping_type_t mapping_type = __kmem_mapping_type(ANYF(flags));
2089 
2090 		pmap_protect(kernel_pmap,
2091 		    oldaddr, oldaddr + oldsize - guard_right_size,
2092 		    VM_PROT_NONE);
2093 
2094 		for (vm_object_offset_t offset = 0;
2095 		    offset < oldsize - guard_right_size;
2096 		    offset += PAGE_SIZE_64) {
2097 			vm_page_t mem;
2098 
2099 			mem = vm_page_lookup(object, oldaddr + offset);
2100 			if (mem == VM_PAGE_NULL) {
2101 				continue;
2102 			}
2103 
2104 			pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
2105 
2106 			mem->vmp_busy = true;
2107 			vm_page_remove(mem, true);
2108 			vm_page_insert_wired(mem, object, newaddr + offset,
2109 			    guard.kmg_tag);
2110 			mem->vmp_busy = false;
2111 
2112 			kernel_memory_populate_pmap_enter(object, newaddr,
2113 			    offset, mem, VM_PROT_DEFAULT, 0, mapping_type);
2114 		}
2115 
2116 		kernel_memory_populate_object_and_unlock(object,
2117 		    newaddr + oldsize - guard_right_size,
2118 		    newoffs - guard_right_size,
2119 		    newsize - oldsize,
2120 		    page_list, (kma_flags_t)flags,
2121 		    guard.kmg_tag, VM_PROT_DEFAULT, mapping_type);
2122 	} else {
2123 		vm_page_t guard_right = VM_PAGE_NULL;
2124 
2125 		/*
2126 		 *	Note: we are borrowing the new entry reference
2127 		 *	on the object for the duration of this code,
2128 		 *	which works because we keep the object locked
2129 		 *	throughout.
2130 		 */
2131 		if ((flags & KMR_GUARD_LAST) && !map->never_faults) {
2132 			guard_right = vm_page_lookup(object, oldsize - PAGE_SIZE);
2133 			assert(vm_page_is_guard(guard_right));
2134 			guard_right->vmp_busy = true;
2135 			vm_page_remove(guard_right, true);
2136 		}
2137 
2138 		if (flags & KMR_FREEOLD) {
2139 			/*
2140 			 * Freeing the old mapping will make
2141 			 * the old pages become pageable until
2142 			 * the new mapping makes them wired again.
2143 			 * Let's take an extra "wire_count" to
2144 			 * prevent any accidental "page out".
2145 			 * We'll have to undo that after wiring
2146 			 * the new mapping.
2147 			 */
2148 			vm_object_reference_locked(object); /* keep object alive */
2149 			for (vm_object_offset_t offset = 0;
2150 			    offset < oldsize - guard_right_size;
2151 			    offset += PAGE_SIZE_64) {
2152 				vm_page_t mem;
2153 
2154 				mem = vm_page_lookup(object, offset);
2155 				assert(mem != VM_PAGE_NULL);
2156 				assertf(!VM_PAGE_PAGEABLE(mem),
2157 				    "mem %p qstate %d",
2158 				    mem, mem->vmp_q_state);
2159 				if (vm_page_is_guard(mem)) {
2160 					/* guard pages are not wired */
2161 				} else {
2162 					assertf(VM_PAGE_WIRED(mem),
2163 					    "mem %p qstate %d wirecount %d",
2164 					    mem,
2165 					    mem->vmp_q_state,
2166 					    mem->vmp_wire_count);
2167 					assertf(mem->vmp_wire_count >= 1,
2168 					    "mem %p wirecount %d",
2169 					    mem, mem->vmp_wire_count);
2170 					mem->vmp_wire_count++;
2171 				}
2172 			}
2173 		}
2174 
2175 		for (vm_object_offset_t offset = oldsize - guard_right_size;
2176 		    offset < newsize - guard_right_size;
2177 		    offset += PAGE_SIZE_64) {
2178 			vm_page_t mem = page_list;
2179 
2180 			page_list = mem->vmp_snext;
2181 			mem->vmp_snext = VM_PAGE_NULL;
2182 			assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
2183 			assert(!VM_PAGE_PAGEABLE(mem));
2184 
2185 			vm_page_insert(mem, object, offset);
2186 			mem->vmp_busy = false;
2187 		}
2188 
2189 		if (guard_right) {
2190 			vm_page_insert(guard_right, object, newsize - PAGE_SIZE);
2191 			guard_right->vmp_busy = false;
2192 		}
2193 
2194 		vm_object_unlock(object);
2195 	}
2196 
2197 	/*
2198 	 *	Mark the entry as idle again,
2199 	 *	and honor KMR_FREEOLD if needed.
2200 	 */
2201 
2202 	vm_map_lock(map);
2203 	if (last_timestamp + 1 != map->timestamp &&
2204 	    !vm_map_lookup_entry(map, oldaddr, &oldentry)) {
2205 		__kmem_entry_not_found_panic(map, req_oldaddr);
2206 	}
2207 
2208 	if (flags & KMR_KOBJECT) {
2209 		assert(oldentry->in_transition);
2210 		oldentry->in_transition = false;
2211 		if (oldentry->needs_wakeup) {
2212 			needs_wakeup = true;
2213 			oldentry->needs_wakeup = false;
2214 		}
2215 	}
2216 
2217 	if (flags & KMR_FREEOLD) {
2218 		vmr_flags_t vmr_flags = VM_MAP_REMOVE_KUNWIRE;
2219 
2220 #if KASAN_CLASSIC
2221 		if (flags & KMR_KASAN_GUARD) {
2222 			kasan_poison_range(oldaddr, oldsize, ASAN_VALID);
2223 		}
2224 #endif
2225 #if KASAN_TBI
2226 		if (flags & KMR_TAG) {
2227 			kasan_tbi_mark_free_space((caddr_t)req_oldaddr, oldsize);
2228 		}
2229 #endif /* KASAN_TBI */
2230 		if (flags & KMR_GUARD_LAST) {
2231 			vmr_flags |= VM_MAP_REMOVE_NOKUNWIRE_LAST;
2232 		}
2233 		(void)vm_map_remove_and_unlock(map,
2234 		    oldaddr, oldaddr + oldsize,
2235 		    vmr_flags, guard);
2236 	} else {
2237 		vm_map_unlock(map);
2238 	}
2239 
2240 	if ((flags & KMR_KOBJECT) == 0) {
2241 		kern_return_t kr;
2242 		/*
2243 		 * This must happen _after_ we do the KMR_FREEOLD,
2244 		 * because wiring the pages will call into the pmap,
2245 		 * and if the pages are typed XNU_KERNEL_RESTRICTED,
2246 		 * this would cause a second mapping of the page and panic.
2247 		 */
2248 		kr = vm_map_wire_kernel(map,
2249 		    vm_sanitize_wrap_addr(newaddr),
2250 		    vm_sanitize_wrap_addr(newaddr + newsize),
2251 		    vm_sanitize_wrap_prot(VM_PROT_DEFAULT),
2252 		    guard.kmg_tag, FALSE);
2253 		assert(kr == KERN_SUCCESS);
2254 
2255 		if (flags & KMR_FREEOLD) {
2256 			/*
2257 			 * Undo the extra "wiring" we made above
2258 			 * and release the extra reference we took
2259 			 * on the object.
2260 			 */
2261 			vm_object_lock(object);
2262 			for (vm_object_offset_t offset = 0;
2263 			    offset < oldsize - guard_right_size;
2264 			    offset += PAGE_SIZE_64) {
2265 				vm_page_t mem;
2266 
2267 				mem = vm_page_lookup(object, offset);
2268 				assert(mem != VM_PAGE_NULL);
2269 				assertf(!VM_PAGE_PAGEABLE(mem),
2270 				    "mem %p qstate %d",
2271 				    mem, mem->vmp_q_state);
2272 				if (vm_page_is_guard(mem)) {
2273 					/* guard pages are not wired */
2274 				} else {
2275 					assertf(VM_PAGE_WIRED(mem),
2276 					    "mem %p qstate %d wirecount %d",
2277 					    mem,
2278 					    mem->vmp_q_state,
2279 					    mem->vmp_wire_count);
2280 					assertf(mem->vmp_wire_count >= 2,
2281 					    "mem %p wirecount %d",
2282 					    mem, mem->vmp_wire_count);
2283 					mem->vmp_wire_count--;
2284 					assert(VM_PAGE_WIRED(mem));
2285 					assert(mem->vmp_wire_count >= 1);
2286 				}
2287 			}
2288 			vm_object_unlock(object);
2289 			vm_object_deallocate(object); /* release extra ref */
2290 		}
2291 	}
2292 
2293 	if (needs_wakeup) {
2294 		vm_map_entry_wakeup(map);
2295 	}
2296 
2297 #if DEBUG || DEVELOPMENT
2298 	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
2299 	    atop(newsize - oldsize), 0, 0, 0);
2300 #endif /* DEBUG || DEVELOPMENT */
2301 	kmr.kmr_address = newaddr;
2302 
2303 #if KASAN
2304 	kasan_notify_address(kmr.kmr_address, newsize);
2305 #endif /* KASAN */
2306 #if KASAN_CLASSIC
2307 	if (flags & KMR_KASAN_GUARD) {
2308 		kmr.kmr_address += PAGE_SIZE;
2309 		kasan_alloc_large(kmr.kmr_address, req_newsize);
2310 	}
2311 #endif /* KASAN_CLASSIC */
2312 #if CONFIG_KERNEL_TAGGING
2313 	if (flags & KMR_TAG) {
2314 #if   KASAN_TBI
2315 		/*
2316 		 * Validate the current buffer, then generate a new tag,
2317 		 * even if the address is stable, it's a "new" allocation.
2318 		 */
2319 		__asan_loadN((vm_offset_t)kmr.kmr_address, oldsize);
2320 		kmr.kmr_ptr = vm_memtag_generate_and_store_tag(kmr.kmr_ptr, req_newsize);
2321 		kasan_tbi_retag_unused_space(kmr.kmr_ptr, newsize, req_newsize);
2322 #endif /* KASAN_TBI */
2323 	}
2324 #endif /* CONFIG_KERNEL_TAGGING */
2325 
2326 	vmlp_api_end(KMEM_REALLOC_GUARD, kmr.kmr_return);
2327 	return kmr;
2328 }
2329 
2330 #pragma mark map/remap/wire
2331 
2332 kern_return_t
mach_vm_map_kernel(vm_map_t target_map,mach_vm_offset_ut * address,mach_vm_size_ut initial_size,mach_vm_offset_ut mask,vm_map_kernel_flags_t vmk_flags,ipc_port_t port,memory_object_offset_ut offset,boolean_t copy,vm_prot_ut cur_protection,vm_prot_ut max_protection,vm_inherit_ut inheritance)2333 mach_vm_map_kernel(
2334 	vm_map_t                target_map,
2335 	mach_vm_offset_ut      *address,
2336 	mach_vm_size_ut         initial_size,
2337 	mach_vm_offset_ut       mask,
2338 	vm_map_kernel_flags_t   vmk_flags,
2339 	ipc_port_t              port,
2340 	memory_object_offset_ut offset,
2341 	boolean_t               copy,
2342 	vm_prot_ut              cur_protection,
2343 	vm_prot_ut              max_protection,
2344 	vm_inherit_ut           inheritance)
2345 {
2346 	/* range_id is set by vm_map_enter_mem_object */
2347 	return vm_map_enter_mem_object(target_map,
2348 	           address,
2349 	           initial_size,
2350 	           mask,
2351 	           vmk_flags,
2352 	           port,
2353 	           offset,
2354 	           copy,
2355 	           cur_protection,
2356 	           max_protection,
2357 	           inheritance,
2358 	           NULL,
2359 	           0);
2360 }
2361 
2362 kern_return_t
mach_vm_remap_new_kernel(vm_map_t target_map,mach_vm_offset_ut * address,mach_vm_size_ut size,mach_vm_offset_ut mask,vm_map_kernel_flags_t vmk_flags,vm_map_t src_map,mach_vm_offset_ut memory_address,boolean_t copy,vm_prot_ut * cur_protection,vm_prot_ut * max_protection,vm_inherit_ut inheritance)2363 mach_vm_remap_new_kernel(
2364 	vm_map_t                target_map,
2365 	mach_vm_offset_ut      *address,
2366 	mach_vm_size_ut         size,
2367 	mach_vm_offset_ut       mask,
2368 	vm_map_kernel_flags_t   vmk_flags,
2369 	vm_map_t                src_map,
2370 	mach_vm_offset_ut       memory_address,
2371 	boolean_t               copy,
2372 	vm_prot_ut             *cur_protection,   /* IN/OUT */
2373 	vm_prot_ut             *max_protection,   /* IN/OUT */
2374 	vm_inherit_ut           inheritance)
2375 {
2376 	if (!vm_map_kernel_flags_check_vm_and_kflags(vmk_flags,
2377 	    VM_FLAGS_USER_REMAP)) {
2378 		return KERN_INVALID_ARGUMENT;
2379 	}
2380 
2381 
2382 	vmk_flags.vmf_return_data_addr = true;
2383 
2384 	/* range_id is set by vm_map_remap */
2385 	return vm_map_remap(target_map,
2386 	           address,
2387 	           size,
2388 	           mask,
2389 	           vmk_flags,
2390 	           src_map,
2391 	           memory_address,
2392 	           copy,
2393 	           cur_protection,
2394 	           max_protection,
2395 	           inheritance);
2396 }
2397 
2398 #pragma mark free
2399 
2400 #if KASAN
2401 
2402 __abortlike
2403 static void
__kmem_free_invalid_object_size_panic(vm_map_t map,vm_address_t address,vm_size_t size,vm_map_entry_t entry)2404 __kmem_free_invalid_object_size_panic(
2405 	vm_map_t                map,
2406 	vm_address_t            address,
2407 	vm_size_t               size,
2408 	vm_map_entry_t          entry)
2409 {
2410 	vm_object_t object  = VME_OBJECT(entry);
2411 	vm_size_t   objsize = __kmem_entry_orig_size(entry);
2412 
2413 	panic("kmem_free(map=%p, addr=%p, size=%zd, entry=%p): "
2414 	    "object %p has unexpected size %ld",
2415 	    map, (void *)address, (size_t)size, entry, object, objsize);
2416 }
2417 
2418 #endif /* KASAN */
2419 
2420 __mockable vm_size_t
kmem_free_guard(vm_map_t map,vm_offset_t req_addr,vm_size_t req_size,kmf_flags_t flags,kmem_guard_t guard)2421 kmem_free_guard(
2422 	vm_map_t        map,
2423 	vm_offset_t     req_addr,
2424 	vm_size_t       req_size,
2425 	kmf_flags_t     flags,
2426 	kmem_guard_t    guard)
2427 {
2428 	vmr_flags_t     vmr_flags = VM_MAP_REMOVE_KUNWIRE;
2429 	vm_address_t    addr      = req_addr;
2430 	vm_offset_t     delta     = 0;
2431 	vm_size_t       size;
2432 #if KASAN
2433 	vm_map_entry_t  entry;
2434 #endif /* KASAN */
2435 
2436 	vmlp_api_start(KMEM_FREE_GUARD);
2437 
2438 	assert(map->pmap == kernel_pmap);
2439 
2440 #if KASAN_CLASSIC
2441 	if (flags & KMF_KASAN_GUARD) {
2442 		addr  -= PAGE_SIZE;
2443 		delta  = ptoa(2);
2444 	}
2445 #endif /* KASAN_CLASSIC */
2446 #if CONFIG_KERNEL_TAGGING
2447 	if (flags & KMF_TAG) {
2448 		vm_memtag_verify_tag(req_addr + __kmem_guard_left(ANYF(flags)));
2449 		addr = vm_memtag_canonicalize_kernel(req_addr);
2450 	}
2451 #endif /* CONFIG_KERNEL_TAGGING */
2452 
2453 	if (flags & KMF_GUESS_SIZE) {
2454 		vmr_flags |= VM_MAP_REMOVE_GUESS_SIZE;
2455 		size = PAGE_SIZE;
2456 	} else if (req_size == 0) {
2457 		__kmem_invalid_size_panic(map, req_size, flags);
2458 	} else {
2459 		size = round_page(req_size) + delta;
2460 	}
2461 
2462 	vm_map_lock(map);
2463 
2464 #if KASAN
2465 	if (!vm_map_lookup_entry(map, addr, &entry)) {
2466 		__kmem_entry_not_found_panic(map, req_addr);
2467 	}
2468 	if (flags & KMF_GUESS_SIZE) {
2469 		vmr_flags &= ~VM_MAP_REMOVE_GUESS_SIZE;
2470 		req_size = __kmem_entry_orig_size(entry);
2471 		size = round_page(req_size + delta);
2472 	} else if (guard.kmg_atomic && entry->vme_kernel_object &&
2473 	    __kmem_entry_orig_size(entry) != req_size) {
2474 		/*
2475 		 * We can't make a strict check for regular
2476 		 * VM objects because it could be:
2477 		 *
2478 		 * - the kmem_guard_free() of a kmem_realloc_guard() without
2479 		 *   KMR_FREEOLD, and in that case the object size won't match.
2480 		 *
2481 		 * - a submap, in which case there is no "orig size".
2482 		 */
2483 		__kmem_free_invalid_object_size_panic(map,
2484 		    req_addr, req_size + delta, entry);
2485 	}
2486 #endif /* KASAN */
2487 #if KASAN_CLASSIC
2488 	if (flags & KMR_KASAN_GUARD) {
2489 		kasan_poison_range(addr, size, ASAN_VALID);
2490 	}
2491 #endif
2492 #if KASAN_TBI
2493 	if (flags & KMF_TAG) {
2494 		kasan_tbi_mark_free_space((caddr_t)req_addr, size);
2495 	}
2496 #endif /* KASAN_TBI */
2497 
2498 	/*
2499 	 * vm_map_remove_and_unlock is called with VM_MAP_REMOVE_KUNWIRE, which
2500 	 * unwires the kernel mapping. The page won't be mapped any longer so
2501 	 * there is no extra step that is required for memory tagging to "clear"
2502 	 * it -- the page will be later laundered when reused.
2503 	 */
2504 	vmlp_range_event(map, addr, size);
2505 	vmlp_api_end(KMEM_FREE_GUARD, 0);
2506 	return vm_map_remove_and_unlock(map, addr, addr + size,
2507 	           vmr_flags, guard).kmr_size - delta;
2508 }
2509 
2510 __exported void
2511 kmem_free_external(
2512 	vm_map_t        map,
2513 	vm_offset_t     addr,
2514 	vm_size_t       size);
2515 void
kmem_free_external(vm_map_t map,vm_offset_t addr,vm_size_t size)2516 kmem_free_external(
2517 	vm_map_t        map,
2518 	vm_offset_t     addr,
2519 	vm_size_t       size)
2520 {
2521 	if (size) {
2522 		kmem_free(map, trunc_page(addr), size);
2523 #if MACH_ASSERT
2524 	} else {
2525 		printf("kmem_free(map=%p, addr=%p) called with size=0, lr: %p\n",
2526 		    map, (void *)addr, __builtin_return_address(0));
2527 #endif
2528 	}
2529 }
2530 
2531 #pragma mark kmem metadata
2532 
2533 /*
2534  * Guard objects for kmem pointer allocation:
2535  *
2536  * Guard objects introduce size slabs to kmem pointer allocations that are
2537  * allocated in chunks of n * sizeclass. When an allocation of a specific
2538  * sizeclass is requested a random slot from [0, n) is returned.
2539  * Allocations are returned from that chunk until m slots are left. The
2540  * remaining m slots are referred to as guard objects. They don't get
2541  * allocated and the chunk is now considered full. When an allocation is
2542  * freed to the chunk 1 slot is now available from m + 1 for the next
2543  * allocation of that sizeclass.
2544  *
2545  * Guard objects are intended to make exploitation of use after frees harder
2546  * as allocations that are freed can no longer be reliable reallocated.
2547  * They also make exploitation of OOBs harder as overflowing out of an
2548  * allocation can no longer be safe even with sufficient spraying.
2549  */
2550 
2551 #define KMEM_META_PRIMARY    UINT8_MAX
2552 #define KMEM_META_START     (UINT8_MAX - 1)
2553 #define KMEM_META_FREE      (UINT8_MAX - 2)
2554 #if __ARM_16K_PG__
2555 #define KMEM_MIN_SIZE        PAGE_SIZE
2556 #define KMEM_CHUNK_SIZE_MIN (KMEM_MIN_SIZE * 16)
2557 #else /* __ARM_16K_PG__ */
2558 /*
2559  * PAGE_SIZE isn't a compile time constant on some arm64 devices. Those
2560  * devices use 4k page size when their RAM is <= 1GB and 16k otherwise.
2561  * Therefore populate sizeclasses from 4k for those devices.
2562  */
2563 #define KMEM_MIN_SIZE       (4 * 1024)
2564 #define KMEM_CHUNK_SIZE_MIN (KMEM_MIN_SIZE * 32)
2565 #endif /* __ARM_16K_PG__ */
2566 #define KMEM_MAX_SIZE       (32ULL << 20)
2567 #define KMEM_START_IDX      (kmem_log2down(KMEM_MIN_SIZE))
2568 #define KMEM_LAST_IDX       (kmem_log2down(KMEM_MAX_SIZE))
2569 #define KMEM_NUM_SIZECLASS  (KMEM_LAST_IDX - KMEM_START_IDX + 1)
2570 #define KMEM_FRONTS         (KMEM_RANGE_ID_NUM_PTR * 2)
2571 #define KMEM_NUM_SLOTS       8
2572 #define KMEM_NUM_GUARDS      2
2573 #define KMEM_NUM_QUARANTINE  2
2574 
2575 struct kmem_page_meta {
2576 	union {
2577 		/*
2578 		 * On primary allocated chunk with KMEM_META_PRIMARY marker
2579 		 */
2580 		uint32_t km_bitmap;
2581 		/*
2582 		 * On start and end of free chunk with KMEM_META_FREE marker
2583 		 */
2584 		uint32_t km_free_chunks;
2585 	};
2586 	/*
2587 	 * KMEM_META_PRIMARY: Start meta of allocated chunk
2588 	 * KMEM_META_FREE   : Start and end meta of free chunk
2589 	 * KMEM_META_START  : Meta region start and end
2590 	 */
2591 	uint8_t  km_page_marker;
2592 	uint8_t  km_sizeclass;
2593 	union {
2594 		/*
2595 		 * On primary allocated chunk with KMEM_META_PRIMARY marker
2596 		 */
2597 		uint16_t km_chunk_len;
2598 		/*
2599 		 * On secondary allocated chunks
2600 		 */
2601 		uint16_t km_page_idx;
2602 	};
2603 	LIST_ENTRY(kmem_page_meta) km_link;
2604 } kmem_page_meta_t;
2605 
2606 typedef LIST_HEAD(kmem_list_head, kmem_page_meta) kmem_list_head_t;
2607 struct kmem_sizeclass {
2608 	vm_map_size_t                   ks_size;
2609 	uint32_t                        ks_num_chunk;
2610 	uint32_t                        ks_num_elem;
2611 	crypto_random_ctx_t __zpercpu   ks_rng_ctx;
2612 	kmem_list_head_t                ks_allfree_head[KMEM_FRONTS];
2613 	kmem_list_head_t                ks_partial_head[KMEM_FRONTS];
2614 	kmem_list_head_t                ks_full_head[KMEM_FRONTS];
2615 };
2616 
2617 static struct kmem_sizeclass kmem_size_array[KMEM_NUM_SIZECLASS];
2618 
2619 /*
2620  * Locks to synchronize metadata population
2621  */
2622 static LCK_GRP_DECLARE(kmem_locks_grp, "kmem_locks");
2623 static LCK_MTX_DECLARE(kmem_meta_region_lck, &kmem_locks_grp);
2624 #define kmem_meta_lock()   lck_mtx_lock(&kmem_meta_region_lck)
2625 #define kmem_meta_unlock() lck_mtx_unlock(&kmem_meta_region_lck)
2626 
2627 static SECURITY_READ_ONLY_LATE(struct mach_vm_range)
2628 kmem_meta_range[KMEM_RANGE_ID_NUM_PTR + 1];
2629 static SECURITY_READ_ONLY_LATE(struct kmem_page_meta *)
2630 kmem_meta_base[KMEM_RANGE_ID_NUM_PTR + 1];
2631 /*
2632  * Keeps track of metadata high water mark for each front
2633  */
2634 static struct kmem_page_meta *kmem_meta_hwm[KMEM_FRONTS];
2635 static SECURITY_READ_ONLY_LATE(vm_map_t)
2636 kmem_meta_map[KMEM_RANGE_ID_NUM_PTR + 1];
2637 static vm_map_size_t kmem_meta_size;
2638 
2639 static uint32_t
kmem_guard_count(struct kmem_sizeclass * kmem)2640 kmem_guard_count(struct kmem_sizeclass *kmem)
2641 {
2642 	return kmem->ks_num_elem * KMEM_NUM_GUARDS / KMEM_NUM_SLOTS;
2643 }
2644 
2645 static uint32_t
kmem_guard_and_quarantine_count(struct kmem_sizeclass * kmem)2646 kmem_guard_and_quarantine_count(struct kmem_sizeclass *kmem)
2647 {
2648 	return kmem->ks_num_elem * (KMEM_NUM_GUARDS + KMEM_NUM_QUARANTINE) /
2649 	       KMEM_NUM_SLOTS;
2650 }
2651 
2652 static uint32_t
kmem_get_front(kmem_range_id_t range_id,bool from_right)2653 kmem_get_front(
2654 	kmem_range_id_t         range_id,
2655 	bool                    from_right)
2656 {
2657 	assert((range_id >= KMEM_RANGE_ID_FIRST) &&
2658 	    (range_id <= KMEM_RANGE_ID_NUM_PTR));
2659 	return (range_id - KMEM_RANGE_ID_FIRST) * 2 + from_right;
2660 }
2661 
2662 static inline uint32_t
kmem_slot_idx_to_bit(uint32_t slot_idx,uint32_t size_idx __unused)2663 kmem_slot_idx_to_bit(
2664 	uint32_t                slot_idx,
2665 	uint32_t                size_idx __unused)
2666 {
2667 	assert(slot_idx < kmem_size_array[size_idx].ks_num_elem);
2668 	return 1ull << slot_idx;
2669 }
2670 
2671 static uint32_t
kmem_get_idx_from_size(vm_map_size_t size)2672 kmem_get_idx_from_size(vm_map_size_t size)
2673 {
2674 	assert(size >= KMEM_MIN_SIZE && size <= KMEM_MAX_SIZE);
2675 	return kmem_log2down(size - 1) - KMEM_START_IDX + 1;
2676 }
2677 
2678 __abortlike
2679 static void
kmem_invalid_size_idx(uint32_t idx)2680 kmem_invalid_size_idx(uint32_t idx)
2681 {
2682 	panic("Invalid sizeclass idx %u", idx);
2683 }
2684 
2685 static vm_map_size_t
kmem_get_size_from_idx(uint32_t idx)2686 kmem_get_size_from_idx(uint32_t idx)
2687 {
2688 	if (__improbable(idx >= KMEM_NUM_SIZECLASS)) {
2689 		kmem_invalid_size_idx(idx);
2690 	}
2691 	return 1ul << (idx + KMEM_START_IDX);
2692 }
2693 
2694 static inline uint16_t
kmem_get_page_idx(struct kmem_page_meta * meta)2695 kmem_get_page_idx(struct kmem_page_meta *meta)
2696 {
2697 	uint8_t page_marker = meta->km_page_marker;
2698 
2699 	return (page_marker == KMEM_META_PRIMARY) ? 0 : meta->km_page_idx;
2700 }
2701 
2702 __abortlike
2703 static void
kmem_invalid_chunk_len(struct kmem_page_meta * meta)2704 kmem_invalid_chunk_len(struct kmem_page_meta *meta)
2705 {
2706 	panic("Reading free chunks for meta %p where marker != KMEM_META_PRIMARY",
2707 	    meta);
2708 }
2709 
2710 static inline uint16_t
kmem_get_chunk_len(struct kmem_page_meta * meta)2711 kmem_get_chunk_len(struct kmem_page_meta *meta)
2712 {
2713 	if (__improbable(meta->km_page_marker != KMEM_META_PRIMARY)) {
2714 		kmem_invalid_chunk_len(meta);
2715 	}
2716 
2717 	return meta->km_chunk_len;
2718 }
2719 
2720 __abortlike
2721 static void
kmem_invalid_free_chunk_len(struct kmem_page_meta * meta)2722 kmem_invalid_free_chunk_len(struct kmem_page_meta *meta)
2723 {
2724 	panic("Reading free chunks for meta %p where marker != KMEM_META_FREE",
2725 	    meta);
2726 }
2727 
2728 static inline uint32_t
kmem_get_free_chunk_len(struct kmem_page_meta * meta)2729 kmem_get_free_chunk_len(struct kmem_page_meta *meta)
2730 {
2731 	if (__improbable(meta->km_page_marker != KMEM_META_FREE)) {
2732 		kmem_invalid_free_chunk_len(meta);
2733 	}
2734 
2735 	return meta->km_free_chunks;
2736 }
2737 
2738 /*
2739  * Return the metadata corresponding to the specified address
2740  */
2741 static struct kmem_page_meta *
kmem_addr_to_meta(vm_map_offset_t addr,vm_map_range_id_t range_id,vm_map_offset_t * range_start,uint64_t * meta_idx)2742 kmem_addr_to_meta(
2743 	vm_map_offset_t         addr,
2744 	vm_map_range_id_t       range_id,
2745 	vm_map_offset_t        *range_start,
2746 	uint64_t               *meta_idx)
2747 {
2748 	struct kmem_page_meta *meta_base = kmem_meta_base[range_id];
2749 
2750 	*range_start = kmem_ranges[range_id].min_address;
2751 	*meta_idx = (addr - *range_start) / KMEM_CHUNK_SIZE_MIN;
2752 	return VM_FAR_ADD_PTR_UNBOUNDED(meta_base, *meta_idx);
2753 }
2754 
2755 /*
2756  * Return the metadata start of the chunk that the address belongs to
2757  */
2758 static struct kmem_page_meta *
kmem_addr_to_meta_start(vm_address_t addr,vm_map_range_id_t range_id,vm_map_offset_t * chunk_start)2759 kmem_addr_to_meta_start(
2760 	vm_address_t            addr,
2761 	vm_map_range_id_t       range_id,
2762 	vm_map_offset_t        *chunk_start)
2763 {
2764 	vm_map_offset_t range_start;
2765 	uint64_t meta_idx;
2766 	struct kmem_page_meta *meta;
2767 
2768 	meta = kmem_addr_to_meta(addr, range_id, &range_start, &meta_idx);
2769 	meta_idx -= kmem_get_page_idx(meta);
2770 	meta = VM_FAR_ADD_PTR_UNBOUNDED(meta, -(ptrdiff_t)kmem_get_page_idx(meta));
2771 	assert(meta->km_page_marker == KMEM_META_PRIMARY);
2772 	*chunk_start = range_start + (meta_idx * KMEM_CHUNK_SIZE_MIN);
2773 	return meta;
2774 }
2775 
2776 __startup_func
2777 static void
kmem_init_meta_front(struct kmem_page_meta * meta,kmem_range_id_t range_id,bool from_right)2778 kmem_init_meta_front(
2779 	struct kmem_page_meta  *meta,
2780 	kmem_range_id_t         range_id,
2781 	bool                    from_right)
2782 {
2783 	kernel_memory_populate(trunc_page((vm_map_offset_t) meta), PAGE_SIZE,
2784 	    KMA_KOBJECT | KMA_ZERO | KMA_NOFAIL, VM_KERN_MEMORY_OSFMK);
2785 	meta->km_page_marker = KMEM_META_START;
2786 	if (!from_right) {
2787 		meta++;
2788 		kmem_meta_base[range_id] = meta;
2789 	}
2790 	kmem_meta_hwm[kmem_get_front(range_id, from_right)] = meta;
2791 }
2792 
2793 __startup_func
2794 static void
kmem_metadata_init(void)2795 kmem_metadata_init(void)
2796 {
2797 	for (kmem_range_id_t i = KMEM_RANGE_ID_FIRST; i <= kmem_ptr_ranges; i++) {
2798 		vm_map_offset_t addr = kmem_meta_range[i].min_address;
2799 		struct kmem_page_meta *meta;
2800 		uint64_t meta_idx;
2801 
2802 		vm_map_will_allocate_early_map(&kmem_meta_map[i]);
2803 		kmem_meta_map[i] = kmem_suballoc(kernel_map, &addr, kmem_meta_size,
2804 		    VM_MAP_CREATE_NEVER_FAULTS | VM_MAP_CREATE_DISABLE_HOLELIST,
2805 		    VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
2806 		    KMS_PERMANENT | KMS_NOFAIL | KMS_NOSOFTLIMIT,
2807 		    VM_KERN_MEMORY_OSFMK).kmr_submap;
2808 
2809 		kmem_meta_range[i].min_address = addr;
2810 		kmem_meta_range[i].max_address = addr + kmem_meta_size;
2811 
2812 		meta = (struct kmem_page_meta *) kmem_meta_range[i].min_address;
2813 		kmem_init_meta_front(meta, i, 0);
2814 
2815 		meta = kmem_addr_to_meta(kmem_ranges[i].max_address, i, &addr,
2816 		    &meta_idx);
2817 		kmem_init_meta_front(meta, i, 1);
2818 	}
2819 }
2820 
2821 __startup_func
2822 static void
kmem_init_front_head(struct kmem_sizeclass * ks,uint32_t front)2823 kmem_init_front_head(
2824 	struct kmem_sizeclass  *ks,
2825 	uint32_t                front)
2826 {
2827 	LIST_INIT(&ks->ks_allfree_head[front]);
2828 	LIST_INIT(&ks->ks_partial_head[front]);
2829 	LIST_INIT(&ks->ks_full_head[front]);
2830 }
2831 
2832 __startup_func
2833 static void
kmem_sizeclass_init(void)2834 kmem_sizeclass_init(void)
2835 {
2836 	for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) {
2837 		struct kmem_sizeclass *ks = &kmem_size_array[i];
2838 		kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST;
2839 
2840 		ks->ks_size = kmem_get_size_from_idx(i);
2841 		ks->ks_num_chunk = roundup(KMEM_NUM_SLOTS * ks->ks_size,
2842 		    KMEM_CHUNK_SIZE_MIN) / KMEM_CHUNK_SIZE_MIN;
2843 		ks->ks_num_elem = (ks->ks_num_chunk * KMEM_CHUNK_SIZE_MIN) / ks->ks_size;
2844 		assert(ks->ks_num_elem <=
2845 		    (sizeof(((struct kmem_page_meta *)0)->km_bitmap) * 8));
2846 		for (; range_id <= KMEM_RANGE_ID_NUM_PTR; range_id++) {
2847 			kmem_init_front_head(ks, kmem_get_front(range_id, 0));
2848 			kmem_init_front_head(ks, kmem_get_front(range_id, 1));
2849 		}
2850 	}
2851 }
2852 
2853 /*
2854  * This is done during EARLY_BOOT as it needs the corecrypto module to be
2855  * set up.
2856  */
2857 __startup_func
2858 static void
kmem_crypto_init(void)2859 kmem_crypto_init(void)
2860 {
2861 	vm_size_t ctx_size = crypto_random_kmem_ctx_size();
2862 
2863 	for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) {
2864 		struct kmem_sizeclass *ks = &kmem_size_array[i];
2865 
2866 		ks->ks_rng_ctx = zalloc_percpu_permanent(ctx_size, ZALIGN_PTR);
2867 		zpercpu_foreach(ctx, ks->ks_rng_ctx) {
2868 			crypto_random_kmem_init(ctx);
2869 		}
2870 	}
2871 }
2872 STARTUP(EARLY_BOOT, STARTUP_RANK_MIDDLE, kmem_crypto_init);
2873 
2874 __abortlike
2875 static void
kmem_validate_slot_panic(vm_map_offset_t addr,struct kmem_page_meta * meta,uint32_t slot_idx,uint32_t size_idx)2876 kmem_validate_slot_panic(
2877 	vm_map_offset_t         addr,
2878 	struct kmem_page_meta  *meta,
2879 	uint32_t                slot_idx,
2880 	uint32_t                size_idx)
2881 {
2882 	if (meta->km_page_marker != KMEM_META_PRIMARY) {
2883 		panic("Metadata (%p) for addr (%p) not primary", meta, (void *)addr);
2884 	}
2885 	if (meta->km_sizeclass != size_idx) {
2886 		panic("Metadata's (%p) sizeclass (%u != %u) changed during deletion",
2887 		    meta, meta->km_sizeclass, size_idx);
2888 	}
2889 	panic("Double free detected: Slot (%u) in meta (%p) for addr %p marked free",
2890 	    slot_idx, meta, (void *)addr);
2891 }
2892 
2893 __abortlike
2894 static void
kmem_invalid_slot_for_addr(mach_vm_range_t slot,vm_map_offset_t start,vm_map_offset_t end)2895 kmem_invalid_slot_for_addr(
2896 	mach_vm_range_t         slot,
2897 	vm_map_offset_t         start,
2898 	vm_map_offset_t         end)
2899 {
2900 	panic("Invalid kmem ptr slot [%p:%p] for allocation [%p:%p]",
2901 	    (void *)slot->min_address, (void *)slot->max_address,
2902 	    (void *)start, (void *)end);
2903 }
2904 
2905 void
kmem_validate_slot(vm_map_offset_t addr,struct kmem_page_meta * meta,uint32_t size_idx,uint32_t slot_idx)2906 kmem_validate_slot(
2907 	vm_map_offset_t         addr,
2908 	struct kmem_page_meta  *meta,
2909 	uint32_t                size_idx,
2910 	uint32_t                slot_idx)
2911 {
2912 	if ((meta->km_page_marker != KMEM_META_PRIMARY) ||
2913 	    (meta->km_sizeclass != size_idx) ||
2914 	    ((meta->km_bitmap & kmem_slot_idx_to_bit(slot_idx, size_idx)) != 0)) {
2915 		kmem_validate_slot_panic(addr, meta, size_idx, slot_idx);
2916 	}
2917 }
2918 
2919 static void
kmem_validate_slot_initial(mach_vm_range_t slot,vm_map_offset_t start,vm_map_offset_t end,struct kmem_page_meta * meta,uint32_t size_idx,uint32_t slot_idx)2920 kmem_validate_slot_initial(
2921 	mach_vm_range_t         slot,
2922 	vm_map_offset_t         start,
2923 	vm_map_offset_t         end,
2924 	struct kmem_page_meta  *meta,
2925 	uint32_t                size_idx,
2926 	uint32_t                slot_idx)
2927 {
2928 	if ((slot->min_address == 0) || (slot->max_address == 0) ||
2929 	    (start < slot->min_address) || (start >= slot->max_address) ||
2930 	    (end > slot->max_address)) {
2931 		kmem_invalid_slot_for_addr(slot, start, end);
2932 	}
2933 
2934 	kmem_validate_slot(start, meta, size_idx, slot_idx);
2935 }
2936 
2937 uint32_t
kmem_addr_get_slot_idx(vm_map_offset_t start,vm_map_offset_t end,vm_map_range_id_t range_id,struct kmem_page_meta ** meta,uint32_t * size_idx,mach_vm_range_t slot)2938 kmem_addr_get_slot_idx(
2939 	vm_map_offset_t         start,
2940 	vm_map_offset_t         end,
2941 	vm_map_range_id_t       range_id,
2942 	struct kmem_page_meta **meta,
2943 	uint32_t               *size_idx,
2944 	mach_vm_range_t         slot)
2945 {
2946 	vm_map_offset_t chunk_start;
2947 	vm_map_size_t slot_size;
2948 	uint32_t slot_idx;
2949 
2950 	*meta = kmem_addr_to_meta_start(start, range_id, &chunk_start);
2951 	*size_idx = (*meta)->km_sizeclass;
2952 	slot_size = kmem_get_size_from_idx(*size_idx);
2953 	slot_idx = (start - chunk_start) / slot_size;
2954 	slot->min_address = chunk_start + slot_idx * slot_size;
2955 	slot->max_address = slot->min_address + slot_size;
2956 
2957 	kmem_validate_slot_initial(slot, start, end, *meta, *size_idx, slot_idx);
2958 
2959 	return slot_idx;
2960 }
2961 
2962 static bool
kmem_populate_needed(vm_offset_t from,vm_offset_t to)2963 kmem_populate_needed(vm_offset_t from, vm_offset_t to)
2964 {
2965 #if KASAN
2966 #pragma unused(from, to)
2967 	return true;
2968 #else
2969 	vm_offset_t page_addr = trunc_page(from);
2970 
2971 	for (; page_addr < to; page_addr += PAGE_SIZE) {
2972 		/*
2973 		 * This can race with another thread doing a populate on the same metadata
2974 		 * page, where we see an updated pmap but unmapped KASan shadow, causing a
2975 		 * fault in the shadow when we first access the metadata page. Avoid this
2976 		 * by always synchronizing on the kmem_meta_lock with KASan.
2977 		 */
2978 		if (!pmap_find_phys(kernel_pmap, page_addr)) {
2979 			return true;
2980 		}
2981 	}
2982 
2983 	return false;
2984 #endif /* !KASAN */
2985 }
2986 
2987 static void
kmem_populate_meta_locked(vm_offset_t from,vm_offset_t to)2988 kmem_populate_meta_locked(vm_offset_t from, vm_offset_t to)
2989 {
2990 	vm_offset_t page_addr = trunc_page(from);
2991 
2992 	vmlp_api_start(KMEM_POPULATE_META_LOCKED);
2993 
2994 	vm_map_unlock(kernel_map);
2995 
2996 	vmlp_range_event(kernel_map, from, to - from);
2997 
2998 	for (; page_addr < to; page_addr += PAGE_SIZE) {
2999 		for (;;) {
3000 			kern_return_t ret = KERN_SUCCESS;
3001 
3002 			/*
3003 			 * All updates to kmem metadata are done under the kmem_meta_lock
3004 			 */
3005 			kmem_meta_lock();
3006 			if (0 == pmap_find_phys(kernel_pmap, page_addr)) {
3007 				ret = kernel_memory_populate(page_addr,
3008 				    PAGE_SIZE, KMA_NOPAGEWAIT | KMA_KOBJECT | KMA_ZERO,
3009 				    VM_KERN_MEMORY_OSFMK);
3010 			}
3011 			kmem_meta_unlock();
3012 
3013 			if (ret == KERN_SUCCESS) {
3014 				break;
3015 			}
3016 
3017 			/*
3018 			 * We can't pass KMA_NOPAGEWAIT under a global lock as it leads
3019 			 * to bad system deadlocks, so if the allocation failed,
3020 			 * we need to do the VM_PAGE_WAIT() outside of the lock.
3021 			 */
3022 			VM_PAGE_WAIT();
3023 		}
3024 	}
3025 
3026 	vm_map_lock(kernel_map);
3027 	vmlp_api_end(KMEM_POPULATE_META_LOCKED, 0);
3028 }
3029 
3030 __abortlike
3031 static void
kmem_invalid_meta_panic(struct kmem_page_meta * meta,uint32_t slot_idx,struct kmem_sizeclass sizeclass)3032 kmem_invalid_meta_panic(
3033 	struct kmem_page_meta  *meta,
3034 	uint32_t                slot_idx,
3035 	struct kmem_sizeclass   sizeclass)
3036 {
3037 	uint32_t size_idx = kmem_get_idx_from_size(sizeclass.ks_size);
3038 
3039 	if (slot_idx >= sizeclass.ks_num_elem) {
3040 		panic("Invalid slot idx %u [0:%u] for meta %p", slot_idx,
3041 		    sizeclass.ks_num_elem, meta);
3042 	}
3043 	if (meta->km_sizeclass != size_idx) {
3044 		panic("Invalid size_idx (%u != %u) in meta %p", size_idx,
3045 		    meta->km_sizeclass, meta);
3046 	}
3047 	panic("page_marker %u not primary in meta %p", meta->km_page_marker, meta);
3048 }
3049 
3050 __abortlike
3051 static void
kmem_slot_has_entry_panic(vm_map_entry_t entry,vm_map_offset_t addr)3052 kmem_slot_has_entry_panic(
3053 	vm_map_entry_t          entry,
3054 	vm_map_offset_t         addr)
3055 {
3056 	panic("Entry (%p) already exists for addr (%p) being returned",
3057 	    entry, (void *)addr);
3058 }
3059 
3060 __abortlike
3061 static void
kmem_slot_not_found(struct kmem_page_meta * meta,uint32_t slot_idx)3062 kmem_slot_not_found(
3063 	struct kmem_page_meta  *meta,
3064 	uint32_t                slot_idx)
3065 {
3066 	panic("%uth free slot not found for meta %p bitmap %u", slot_idx, meta,
3067 	    meta->km_bitmap);
3068 }
3069 
3070 /*
3071  * Returns a 16bit random number between 0 and
3072  * upper_limit (inclusive)
3073  */
3074 __startup_func
3075 uint16_t
kmem_get_random16(uint16_t upper_limit)3076 kmem_get_random16(
3077 	uint16_t                upper_limit)
3078 {
3079 	static uint64_t random_entropy;
3080 	assert(upper_limit < UINT16_MAX);
3081 	if (random_entropy == 0) {
3082 		random_entropy = early_random();
3083 	}
3084 	uint32_t result = random_entropy & UINT32_MAX;
3085 	random_entropy >>= 32;
3086 	return (uint16_t)(result % (upper_limit + 1));
3087 }
3088 
3089 static uint32_t
kmem_get_nth_free_slot(struct kmem_page_meta * meta,uint32_t n,uint32_t bitmap)3090 kmem_get_nth_free_slot(
3091 	struct kmem_page_meta  *meta,
3092 	uint32_t                n,
3093 	uint32_t                bitmap)
3094 {
3095 	uint32_t zeros_seen = 0, ones_seen = 0;
3096 
3097 	while (bitmap) {
3098 		uint32_t count = __builtin_ctz(bitmap);
3099 
3100 		zeros_seen += count;
3101 		bitmap >>= count;
3102 		if (__probable(~bitmap)) {
3103 			count = __builtin_ctz(~bitmap);
3104 		} else {
3105 			count = 32;
3106 		}
3107 		if (count + ones_seen > n) {
3108 			return zeros_seen + n;
3109 		}
3110 		ones_seen += count;
3111 		bitmap >>= count;
3112 	}
3113 
3114 	kmem_slot_not_found(meta, n);
3115 }
3116 
3117 
3118 static uint32_t
kmem_get_next_slot(struct kmem_page_meta * meta,struct kmem_sizeclass sizeclass,uint32_t bitmap)3119 kmem_get_next_slot(
3120 	struct kmem_page_meta  *meta,
3121 	struct kmem_sizeclass   sizeclass,
3122 	uint32_t                bitmap)
3123 {
3124 	uint32_t num_slots = __builtin_popcount(bitmap);
3125 	uint64_t slot_idx = 0;
3126 
3127 	assert(num_slots > 0);
3128 	if (__improbable(startup_phase < STARTUP_SUB_EARLY_BOOT)) {
3129 		/*
3130 		 * Use early random prior to early boot as the ks_rng_ctx requires
3131 		 * the corecrypto module to be setup before it is initialized and
3132 		 * used.
3133 		 *
3134 		 * num_slots can't be 0 as we take this path when we have more than
3135 		 * one slot left.
3136 		 */
3137 		slot_idx = kmem_get_random16((uint16_t)num_slots - 1);
3138 	} else {
3139 		crypto_random_uniform(zpercpu_get(sizeclass.ks_rng_ctx), num_slots,
3140 		    &slot_idx);
3141 	}
3142 
3143 	return kmem_get_nth_free_slot(meta, slot_idx, bitmap);
3144 }
3145 
3146 /*
3147  * Returns an unallocated slot from the given metadata
3148  */
3149 static vm_map_offset_t
kmem_get_addr_from_meta(struct kmem_page_meta * meta,vm_map_range_id_t range_id,struct kmem_sizeclass sizeclass,vm_map_entry_t * entry)3150 kmem_get_addr_from_meta(
3151 	struct kmem_page_meta  *meta,
3152 	vm_map_range_id_t       range_id,
3153 	struct kmem_sizeclass   sizeclass,
3154 	vm_map_entry_t         *entry)
3155 {
3156 	vm_map_offset_t addr;
3157 	vm_map_size_t size = sizeclass.ks_size;
3158 	uint32_t size_idx = kmem_get_idx_from_size(size);
3159 	uint64_t meta_idx = meta - kmem_meta_base[range_id];
3160 	mach_vm_offset_t range_start = kmem_ranges[range_id].min_address;
3161 	uint32_t slot_bit;
3162 	uint32_t slot_idx = kmem_get_next_slot(meta, sizeclass, meta->km_bitmap);
3163 
3164 	if ((slot_idx >= sizeclass.ks_num_elem) ||
3165 	    (meta->km_sizeclass != size_idx) ||
3166 	    (meta->km_page_marker != KMEM_META_PRIMARY)) {
3167 		kmem_invalid_meta_panic(meta, slot_idx, sizeclass);
3168 	}
3169 
3170 	slot_bit = kmem_slot_idx_to_bit(slot_idx, size_idx);
3171 	meta->km_bitmap &= ~slot_bit;
3172 
3173 	addr = range_start + (meta_idx * KMEM_CHUNK_SIZE_MIN) + (slot_idx * size);
3174 	assert(kmem_range_contains_fully(range_id, addr, size));
3175 	if (vm_map_lookup_entry(kernel_map, addr, entry)) {
3176 		kmem_slot_has_entry_panic(*entry, addr);
3177 	}
3178 	if ((*entry != vm_map_to_entry(kernel_map)) &&
3179 	    ((*entry)->vme_next != vm_map_to_entry(kernel_map)) &&
3180 	    ((*entry)->vme_next->vme_start < (addr + size))) {
3181 		kmem_slot_has_entry_panic(*entry, addr);
3182 	}
3183 	return addr;
3184 }
3185 
3186 __abortlike
3187 static void
kmem_range_out_of_va(kmem_range_id_t range_id,uint32_t num_chunks)3188 kmem_range_out_of_va(
3189 	kmem_range_id_t         range_id,
3190 	uint32_t                num_chunks)
3191 {
3192 	panic("No more VA to allocate %u chunks in range %u", num_chunks, range_id);
3193 }
3194 
3195 static void
kmem_init_allocated_chunk(struct kmem_page_meta * meta,struct kmem_sizeclass sizeclass,uint32_t size_idx)3196 kmem_init_allocated_chunk(
3197 	struct kmem_page_meta  *meta,
3198 	struct kmem_sizeclass   sizeclass,
3199 	uint32_t                size_idx)
3200 {
3201 	uint32_t meta_num = sizeclass.ks_num_chunk;
3202 	uint32_t num_elem = sizeclass.ks_num_elem;
3203 
3204 	meta->km_bitmap = (1ull << num_elem) - 1;
3205 	meta->km_chunk_len = (uint16_t)meta_num;
3206 	assert(LIST_NEXT(meta, km_link) == NULL);
3207 	assert(meta->km_link.le_prev == NULL);
3208 	meta->km_sizeclass = (uint8_t)size_idx;
3209 	meta->km_page_marker = KMEM_META_PRIMARY;
3210 	meta++;
3211 	for (uint32_t i = 1; i < meta_num; i++) {
3212 		meta->km_page_idx = (uint16_t)i;
3213 		meta->km_sizeclass = (uint8_t)size_idx;
3214 		meta->km_page_marker = 0;
3215 		meta->km_bitmap = 0;
3216 		meta++;
3217 	}
3218 }
3219 
3220 static uint32_t
kmem_get_additional_meta(struct kmem_page_meta * meta,uint32_t meta_req,bool from_right,struct kmem_page_meta ** adj_free_meta)3221 kmem_get_additional_meta(
3222 	struct kmem_page_meta  *meta,
3223 	uint32_t                meta_req,
3224 	bool                    from_right,
3225 	struct kmem_page_meta **adj_free_meta)
3226 {
3227 	struct kmem_page_meta *meta_prev = from_right ? meta : (meta - 1);
3228 
3229 	if (meta_prev->km_page_marker == KMEM_META_FREE) {
3230 		uint32_t chunk_len = kmem_get_free_chunk_len(meta_prev);
3231 
3232 		*adj_free_meta = from_right ? meta_prev : (meta_prev - chunk_len + 1);
3233 		meta_req -= chunk_len;
3234 	} else {
3235 		*adj_free_meta = NULL;
3236 	}
3237 
3238 	return meta_req;
3239 }
3240 
3241 
3242 static struct kmem_page_meta *
kmem_get_new_chunk(vm_map_range_id_t range_id,bool from_right,uint32_t size_idx)3243 kmem_get_new_chunk(
3244 	vm_map_range_id_t       range_id,
3245 	bool                    from_right,
3246 	uint32_t                size_idx)
3247 {
3248 	struct kmem_sizeclass sizeclass = kmem_size_array[size_idx];
3249 	struct kmem_page_meta *start, *end, *meta_update;
3250 	struct kmem_page_meta *adj_free_meta = NULL;
3251 	uint32_t meta_req = sizeclass.ks_num_chunk;
3252 
3253 	for (;;) {
3254 		struct kmem_page_meta *metaf = kmem_meta_hwm[kmem_get_front(range_id, 0)];
3255 		struct kmem_page_meta *metab = kmem_meta_hwm[kmem_get_front(range_id, 1)];
3256 		struct kmem_page_meta *meta;
3257 		vm_offset_t start_addr, end_addr;
3258 		uint32_t meta_num;
3259 
3260 		meta = from_right ? metab : metaf;
3261 		meta_num = kmem_get_additional_meta(meta, meta_req, from_right,
3262 		    &adj_free_meta);
3263 
3264 		if (metaf + meta_num >= metab) {
3265 			kmem_range_out_of_va(range_id, meta_num);
3266 		}
3267 
3268 		start = from_right ? (metab - meta_num) : metaf;
3269 		end = from_right ? metab : (metaf + meta_num);
3270 
3271 		start_addr = (vm_offset_t)start;
3272 		end_addr   = (vm_offset_t)end;
3273 
3274 		/*
3275 		 * If the new high watermark stays on the same page,
3276 		 * no need to populate and drop the lock.
3277 		 */
3278 		if (!page_aligned(from_right ? end_addr : start_addr) &&
3279 		    trunc_page(start_addr) == trunc_page(end_addr - 1)) {
3280 			break;
3281 		}
3282 		if (!kmem_populate_needed(start_addr, end_addr)) {
3283 			break;
3284 		}
3285 
3286 		kmem_populate_meta_locked(start_addr, end_addr);
3287 
3288 		/*
3289 		 * Since we dropped the lock, reassess conditions still hold:
3290 		 * - the HWM we are changing must not have moved
3291 		 * - the other HWM must not intersect with ours
3292 		 * - in case of coalescing, the adjacent free meta must still
3293 		 *   be free and of the same size.
3294 		 *
3295 		 * If we failed to grow, reevaluate whether freelists have
3296 		 * entries now by returning NULL.
3297 		 */
3298 		metaf = kmem_meta_hwm[kmem_get_front(range_id, 0)];
3299 		metab = kmem_meta_hwm[kmem_get_front(range_id, 1)];
3300 		if (meta != (from_right ? metab : metaf)) {
3301 			return NULL;
3302 		}
3303 		if (metaf + meta_num >= metab) {
3304 			kmem_range_out_of_va(range_id, meta_num);
3305 		}
3306 		if (adj_free_meta) {
3307 			if (adj_free_meta->km_page_marker != KMEM_META_FREE ||
3308 			    kmem_get_free_chunk_len(adj_free_meta) !=
3309 			    meta_req - meta_num) {
3310 				return NULL;
3311 			}
3312 		}
3313 
3314 		break;
3315 	}
3316 
3317 	/*
3318 	 * If there is an adjacent free chunk remove it from free list
3319 	 */
3320 	if (adj_free_meta) {
3321 		LIST_REMOVE(adj_free_meta, km_link);
3322 		LIST_NEXT(adj_free_meta, km_link) = NULL;
3323 		adj_free_meta->km_link.le_prev = NULL;
3324 	}
3325 
3326 	/*
3327 	 * Update hwm
3328 	 */
3329 	meta_update = from_right ? start : end;
3330 	kmem_meta_hwm[kmem_get_front(range_id, from_right)] = meta_update;
3331 
3332 	/*
3333 	 * Initialize metadata
3334 	 */
3335 	start = from_right ? start : (end - meta_req);
3336 	kmem_init_allocated_chunk(start, sizeclass, size_idx);
3337 
3338 	return start;
3339 }
3340 
3341 static void
kmem_requeue_meta(struct kmem_page_meta * meta,struct kmem_list_head * head)3342 kmem_requeue_meta(
3343 	struct kmem_page_meta  *meta,
3344 	struct kmem_list_head  *head)
3345 {
3346 	LIST_REMOVE(meta, km_link);
3347 	LIST_INSERT_HEAD(head, meta, km_link);
3348 }
3349 
3350 /*
3351  * Return corresponding sizeclass to stash free chunks in
3352  */
3353 __abortlike
3354 static void
kmem_invalid_chunk_num(uint32_t chunks)3355 kmem_invalid_chunk_num(uint32_t chunks)
3356 {
3357 	panic("Invalid number of chunks %u\n", chunks);
3358 }
3359 
3360 static uint32_t
kmem_get_size_idx_for_chunks(uint32_t chunks)3361 kmem_get_size_idx_for_chunks(uint32_t chunks)
3362 {
3363 	for (uint32_t i = KMEM_NUM_SIZECLASS - 1; i > 0; i--) {
3364 		if (chunks >= kmem_size_array[i].ks_num_chunk) {
3365 			return i;
3366 		}
3367 	}
3368 	kmem_invalid_chunk_num(chunks);
3369 }
3370 
3371 static void
kmem_clear_meta_range(struct kmem_page_meta * meta,uint32_t count)3372 kmem_clear_meta_range(struct kmem_page_meta *meta, uint32_t count)
3373 {
3374 	bzero(meta, count * sizeof(struct kmem_page_meta));
3375 }
3376 
3377 static void
kmem_check_meta_range_is_clear(struct kmem_page_meta * meta,uint32_t count)3378 kmem_check_meta_range_is_clear(struct kmem_page_meta *meta, uint32_t count)
3379 {
3380 #if MACH_ASSERT
3381 	size_t size = count * sizeof(struct kmem_page_meta);
3382 
3383 	assert(memcmp_zero_ptr_aligned(meta, size) == 0);
3384 #else
3385 #pragma unused(meta, count)
3386 #endif
3387 }
3388 
3389 /*!
3390  * @function kmem_init_free_chunk()
3391  *
3392  * @discussion
3393  * This function prepares a range of chunks to be put on a free list.
3394  * The first and last metadata might be dirty, but the "inner" ones
3395  * must be zero filled by the caller prior to calling this function.
3396  */
3397 static void
kmem_init_free_chunk(struct kmem_page_meta * meta,uint32_t num_chunks,uint32_t front)3398 kmem_init_free_chunk(
3399 	struct kmem_page_meta  *meta,
3400 	uint32_t                num_chunks,
3401 	uint32_t                front)
3402 {
3403 	struct kmem_sizeclass *sizeclass;
3404 	uint32_t size_idx = kmem_get_size_idx_for_chunks(num_chunks);
3405 
3406 	if (num_chunks > 2) {
3407 		kmem_check_meta_range_is_clear(meta + 1, num_chunks - 2);
3408 	}
3409 
3410 	meta[0] = (struct kmem_page_meta){
3411 		.km_free_chunks = num_chunks,
3412 		.km_page_marker = KMEM_META_FREE,
3413 		.km_sizeclass   = (uint8_t)size_idx,
3414 	};
3415 	if (num_chunks > 1) {
3416 		meta[num_chunks - 1] = (struct kmem_page_meta){
3417 			.km_free_chunks = num_chunks,
3418 			.km_page_marker = KMEM_META_FREE,
3419 			.km_sizeclass   = (uint8_t)size_idx,
3420 		};
3421 	}
3422 
3423 	sizeclass = &kmem_size_array[size_idx];
3424 	LIST_INSERT_HEAD(&sizeclass->ks_allfree_head[front], meta, km_link);
3425 }
3426 
3427 static struct kmem_page_meta *
kmem_get_free_chunk_from_list(struct kmem_sizeclass * org_sizeclass,uint32_t size_idx,uint32_t front)3428 kmem_get_free_chunk_from_list(
3429 	struct kmem_sizeclass  *org_sizeclass,
3430 	uint32_t                size_idx,
3431 	uint32_t                front)
3432 {
3433 	struct kmem_sizeclass *sizeclass;
3434 	uint32_t num_chunks = org_sizeclass->ks_num_chunk;
3435 	struct kmem_page_meta *meta;
3436 	uint32_t idx = size_idx;
3437 
3438 	while (idx < KMEM_NUM_SIZECLASS) {
3439 		sizeclass = &kmem_size_array[idx];
3440 		meta = LIST_FIRST(&sizeclass->ks_allfree_head[front]);
3441 		if (meta) {
3442 			break;
3443 		}
3444 		idx++;
3445 	}
3446 
3447 	/*
3448 	 * Trim if larger in size
3449 	 */
3450 	if (meta) {
3451 		uint32_t num_chunks_free = kmem_get_free_chunk_len(meta);
3452 
3453 		assert(meta->km_page_marker == KMEM_META_FREE);
3454 		LIST_REMOVE(meta, km_link);
3455 		LIST_NEXT(meta, km_link) = NULL;
3456 		meta->km_link.le_prev = NULL;
3457 		if (num_chunks_free > num_chunks) {
3458 			num_chunks_free -= num_chunks;
3459 			kmem_init_free_chunk(meta + num_chunks, num_chunks_free, front);
3460 		}
3461 
3462 		kmem_init_allocated_chunk(meta, *org_sizeclass, size_idx);
3463 	}
3464 
3465 	return meta;
3466 }
3467 
3468 kern_return_t
kmem_locate_space(vm_map_size_t size,vm_map_range_id_t range_id,bool from_right,vm_map_offset_t * start_inout,vm_map_entry_t * entry_out)3469 kmem_locate_space(
3470 	vm_map_size_t           size,
3471 	vm_map_range_id_t       range_id,
3472 	bool                    from_right,
3473 	vm_map_offset_t        *start_inout,
3474 	vm_map_entry_t         *entry_out)
3475 {
3476 	vm_map_entry_t entry;
3477 	uint32_t size_idx = kmem_get_idx_from_size(size);
3478 	uint32_t front = kmem_get_front(range_id, from_right);
3479 	struct kmem_sizeclass *sizeclass = &kmem_size_array[size_idx];
3480 	struct kmem_page_meta *meta;
3481 
3482 	assert(size <= sizeclass->ks_size);
3483 again:
3484 	if ((meta = LIST_FIRST(&sizeclass->ks_partial_head[front])) != NULL) {
3485 		*start_inout = kmem_get_addr_from_meta(meta, range_id, *sizeclass, &entry);
3486 		/*
3487 		 * Requeue to full if necessary
3488 		 */
3489 		assert(meta->km_page_marker == KMEM_META_PRIMARY);
3490 		if (__builtin_popcount(meta->km_bitmap) == kmem_guard_count(sizeclass)) {
3491 			kmem_requeue_meta(meta, &sizeclass->ks_full_head[front]);
3492 		}
3493 	} else if ((meta = kmem_get_free_chunk_from_list(sizeclass, size_idx,
3494 	    front)) != NULL) {
3495 		*start_inout = kmem_get_addr_from_meta(meta, range_id, *sizeclass, &entry);
3496 		/*
3497 		 * Queue to partial
3498 		 */
3499 		assert(meta->km_page_marker == KMEM_META_PRIMARY);
3500 		assert(__builtin_popcount(meta->km_bitmap) > kmem_guard_count(sizeclass));
3501 		LIST_INSERT_HEAD(&sizeclass->ks_partial_head[front], meta, km_link);
3502 	} else {
3503 		meta = kmem_get_new_chunk(range_id, from_right, size_idx);
3504 		if (meta == NULL) {
3505 			goto again;
3506 		}
3507 		*start_inout = kmem_get_addr_from_meta(meta, range_id, *sizeclass, &entry);
3508 		assert(meta->km_page_marker == KMEM_META_PRIMARY);
3509 		LIST_INSERT_HEAD(&sizeclass->ks_partial_head[front], meta, km_link);
3510 	}
3511 
3512 	if (entry_out) {
3513 		*entry_out = entry;
3514 	}
3515 
3516 	return KERN_SUCCESS;
3517 }
3518 
3519 /*
3520  * Determine whether the given metadata was allocated from the right
3521  */
3522 static bool
kmem_meta_is_from_right(kmem_range_id_t range_id,struct kmem_page_meta * meta)3523 kmem_meta_is_from_right(
3524 	kmem_range_id_t         range_id,
3525 	struct kmem_page_meta  *meta)
3526 {
3527 	struct kmem_page_meta *metaf = kmem_meta_hwm[kmem_get_front(range_id, 0)];
3528 	__assert_only struct kmem_page_meta *metab = kmem_meta_hwm[kmem_get_front(range_id, 1)];
3529 	struct kmem_page_meta *meta_base = kmem_meta_base[range_id];
3530 	struct kmem_page_meta *meta_end;
3531 
3532 	meta_end = (struct kmem_page_meta *)kmem_meta_range[range_id].max_address;
3533 
3534 	if ((meta >= meta_base) && (meta < metaf)) {
3535 		return false;
3536 	}
3537 
3538 	assert(meta >= metab && meta < meta_end);
3539 	return true;
3540 }
3541 
3542 static void
kmem_free_chunk(kmem_range_id_t range_id,struct kmem_page_meta * meta,bool from_right)3543 kmem_free_chunk(
3544 	kmem_range_id_t         range_id,
3545 	struct kmem_page_meta  *meta,
3546 	bool                    from_right)
3547 {
3548 	struct kmem_page_meta *meta_coalesce = meta - 1;
3549 	struct kmem_page_meta *meta_start = meta;
3550 	uint32_t num_chunks = kmem_get_chunk_len(meta);
3551 	uint32_t add_chunks;
3552 	struct kmem_page_meta *meta_end = meta + num_chunks;
3553 	struct kmem_page_meta *meta_hwm_l, *meta_hwm_r;
3554 	uint32_t front = kmem_get_front(range_id, from_right);
3555 
3556 	meta_hwm_l = kmem_meta_hwm[kmem_get_front(range_id, 0)];
3557 	meta_hwm_r = kmem_meta_hwm[kmem_get_front(range_id, 1)];
3558 
3559 	LIST_REMOVE(meta, km_link);
3560 	kmem_clear_meta_range(meta, num_chunks);
3561 
3562 	/*
3563 	 * Coalesce left
3564 	 */
3565 	if (((from_right && (meta_coalesce >= meta_hwm_r)) || !from_right) &&
3566 	    (meta_coalesce->km_page_marker == KMEM_META_FREE)) {
3567 		meta_start = meta_coalesce - kmem_get_free_chunk_len(meta_coalesce) + 1;
3568 		add_chunks = kmem_get_free_chunk_len(meta_start);
3569 		num_chunks += add_chunks;
3570 		LIST_REMOVE(meta_start, km_link);
3571 		kmem_clear_meta_range(meta_start + add_chunks - 1, 1);
3572 	}
3573 
3574 	/*
3575 	 * Coalesce right
3576 	 */
3577 	if (((!from_right && (meta_end < meta_hwm_l)) || from_right) &&
3578 	    (meta_end->km_page_marker == KMEM_META_FREE)) {
3579 		add_chunks = kmem_get_free_chunk_len(meta_end);
3580 		LIST_REMOVE(meta_end, km_link);
3581 		kmem_clear_meta_range(meta_end, 1);
3582 		meta_end = meta_end + add_chunks;
3583 		num_chunks += add_chunks;
3584 	}
3585 
3586 	kmem_init_free_chunk(meta_start, num_chunks, front);
3587 }
3588 
3589 static void
kmem_free_slot(kmem_range_id_t range_id,mach_vm_range_t slot)3590 kmem_free_slot(
3591 	kmem_range_id_t         range_id,
3592 	mach_vm_range_t         slot)
3593 {
3594 	struct kmem_page_meta *meta;
3595 	vm_map_offset_t chunk_start;
3596 	uint32_t size_idx, chunk_elem, slot_idx, num_elem;
3597 	struct kmem_sizeclass *sizeclass;
3598 	vm_map_size_t slot_size;
3599 
3600 	meta = kmem_addr_to_meta_start(slot->min_address, range_id, &chunk_start);
3601 	size_idx = meta->km_sizeclass;
3602 	slot_size = kmem_get_size_from_idx(size_idx);
3603 	slot_idx = (slot->min_address - chunk_start) / slot_size;
3604 	assert((meta->km_bitmap & kmem_slot_idx_to_bit(slot_idx, size_idx)) == 0);
3605 	meta->km_bitmap |= kmem_slot_idx_to_bit(slot_idx, size_idx);
3606 
3607 	sizeclass = &kmem_size_array[size_idx];
3608 	chunk_elem = sizeclass->ks_num_elem;
3609 	num_elem = __builtin_popcount(meta->km_bitmap);
3610 
3611 	if (num_elem == chunk_elem) {
3612 		/*
3613 		 * If entire chunk empty add to emtpy list
3614 		 */
3615 		bool from_right = kmem_meta_is_from_right(range_id, meta);
3616 
3617 		kmem_free_chunk(range_id, meta, from_right);
3618 	} else if (num_elem == kmem_guard_and_quarantine_count(sizeclass)) {
3619 		/*
3620 		 * If we freed to full chunk move it to partial
3621 		 */
3622 		uint32_t front = kmem_get_front(range_id,
3623 		    kmem_meta_is_from_right(range_id, meta));
3624 
3625 		kmem_requeue_meta(meta, &sizeclass->ks_partial_head[front]);
3626 	}
3627 }
3628 
3629 void
kmem_free_space(vm_map_offset_t start,vm_map_offset_t end,vm_map_range_id_t range_id,mach_vm_range_t slot)3630 kmem_free_space(
3631 	vm_map_offset_t         start,
3632 	vm_map_offset_t         end,
3633 	vm_map_range_id_t       range_id,
3634 	mach_vm_range_t         slot)
3635 {
3636 	bool entry_present = false;
3637 	vm_map_entry_t prev_entry;
3638 	vm_map_entry_t next_entry;
3639 
3640 	if ((slot->min_address == start) && (slot->max_address == end)) {
3641 		/*
3642 		 * Entire slot is being freed at once
3643 		 */
3644 		return kmem_free_slot(range_id, slot);
3645 	}
3646 
3647 	entry_present = vm_map_lookup_entry(kernel_map, start, &prev_entry);
3648 	assert(!entry_present);
3649 	next_entry = prev_entry->vme_next;
3650 
3651 	if (((prev_entry == vm_map_to_entry(kernel_map) ||
3652 	    prev_entry->vme_end <= slot->min_address)) &&
3653 	    (next_entry == vm_map_to_entry(kernel_map) ||
3654 	    (next_entry->vme_start >= slot->max_address))) {
3655 		/*
3656 		 * Free entire slot
3657 		 */
3658 		kmem_free_slot(range_id, slot);
3659 	}
3660 }
3661 
3662 #pragma mark kmem init
3663 
3664 /*
3665  * The default percentage of memory that can be mlocked is scaled based on the total
3666  * amount of memory in the system. These percentages are caclulated
3667  * offline and stored in this table. We index this table by
3668  * log2(max_mem) - VM_USER_WIREABLE_MIN_CONFIG. We clamp this index in the range
3669  * [0, sizeof(wire_limit_percents) / sizeof(vm_map_size_t))
3670  *
3671  * Note that these values were picked for mac.
3672  * If we ever have very large memory config arm devices, we may want to revisit
3673  * since the kernel overhead is smaller there due to the larger page size.
3674  */
3675 
3676 /* Start scaling iff we're managing > 2^32 = 4GB of RAM. */
3677 #define VM_USER_WIREABLE_MIN_CONFIG 32
3678 #if CONFIG_JETSAM
3679 /* Systems with jetsam can wire a bit more b/c the system can relieve wired
3680  * pressure.
3681  */
3682 static vm_map_size_t wire_limit_percents[] =
3683 { 80, 80, 80, 80, 82, 85, 88, 91, 94, 97};
3684 #else
3685 static vm_map_size_t wire_limit_percents[] =
3686 { 70, 73, 76, 79, 82, 85, 88, 91, 94, 97};
3687 #endif /* CONFIG_JETSAM */
3688 
3689 /* Set limit to 95% of DRAM if serverperfmode=1 */
3690 #define VM_USER_SERVERPERF_WIRE_LIMIT_PERCENT 95
3691 /* Use special serverperfmode behavior iff DRAM > 2^35 = 32GiB of RAM. */
3692 #define VM_USER_SERVERPERF_WIREABLE_MIN_CONFIG 35
3693 
3694 /*
3695  * Sets the default global user wire limit which limits the amount of
3696  * memory that can be locked via mlock() based on the above algorithm..
3697  * This can be overridden via a sysctl.
3698  */
3699 static void
kmem_set_user_wire_limits(void)3700 kmem_set_user_wire_limits(void)
3701 {
3702 	uint64_t available_mem_log;
3703 	uint64_t max_wire_percent;
3704 	size_t wire_limit_percents_length = sizeof(wire_limit_percents) /
3705 	    sizeof(vm_map_size_t);
3706 	vm_map_size_t limit;
3707 	uint64_t config_memsize = max_mem;
3708 #if defined(XNU_TARGET_OS_OSX)
3709 	config_memsize = max_mem_actual;
3710 #endif /* defined(XNU_TARGET_OS_OSX) */
3711 
3712 	available_mem_log = bit_floor(config_memsize);
3713 
3714 	if (serverperfmode &&
3715 	    (available_mem_log >= VM_USER_SERVERPERF_WIREABLE_MIN_CONFIG)) {
3716 		max_wire_percent = VM_USER_SERVERPERF_WIRE_LIMIT_PERCENT;
3717 	} else {
3718 		if (available_mem_log < VM_USER_WIREABLE_MIN_CONFIG) {
3719 			available_mem_log = 0;
3720 		} else {
3721 			available_mem_log -= VM_USER_WIREABLE_MIN_CONFIG;
3722 		}
3723 		if (available_mem_log >= wire_limit_percents_length) {
3724 			available_mem_log = wire_limit_percents_length - 1;
3725 		}
3726 		max_wire_percent = wire_limit_percents[available_mem_log];
3727 	}
3728 
3729 	limit = config_memsize * max_wire_percent / 100;
3730 	/* Cap the number of non lockable bytes at VM_NOT_USER_WIREABLE_MAX */
3731 	if (config_memsize - limit > VM_NOT_USER_WIREABLE_MAX) {
3732 		limit = config_memsize - VM_NOT_USER_WIREABLE_MAX;
3733 	}
3734 
3735 	vm_global_user_wire_limit = limit;
3736 	/* the default per task limit is the same as the global limit */
3737 	vm_per_task_user_wire_limit = limit;
3738 	vm_add_wire_count_over_global_limit = 0;
3739 	vm_add_wire_count_over_user_limit = 0;
3740 }
3741 
3742 #define KMEM_MAX_CLAIMS 50
3743 __startup_data
3744 struct kmem_range_startup_spec kmem_claims[KMEM_MAX_CLAIMS] = {};
3745 
3746 #if !MACH_ASSERT
3747 __startup_data
3748 #endif /* !MACH_ASSERT */
3749 uint32_t kmem_claim_count = 0;
3750 
3751 #if MACH_ASSERT
3752 /**
3753  * Save off some minimal information about the ranges for consumption by
3754  * post-lockdown tests.
3755  */
3756 static struct mach_vm_range kmem_test_saved_ranges[KMEM_MAX_CLAIMS];
3757 #endif /* MACH_ASSERT */
3758 
3759 /**
3760  * For a requested claim size (i.e. kc_size), get the number of bytes which
3761  * should actually be allocated for a region in order to be able to properly
3762  * provide the requested size (the allocation size).
3763  *
3764  * This allocation size is always greater or equal to the claim size. It can,
3765  * for example, include additional space as required by the kernel memory
3766  * configuration.
3767  *
3768  * @param known_last Is the claim in question known to be the last region after
3769  * all placing has completed? The size for a known_last allocation is always
3770  * less than or equal to a non-known_last allocation of the same size.
3771  */
3772 __startup_func
3773 static vm_map_size_t
kmem_claim_to_allocation_size(vm_map_size_t claim_size,bool known_last)3774 kmem_claim_to_allocation_size(vm_map_size_t claim_size, bool known_last)
3775 {
3776 	(void)known_last;
3777 	/*
3778 	 * Allocation size and claim size are identical.
3779 	 */
3780 	return claim_size;
3781 }
3782 
3783 /**
3784  * Compute the largest claim which can be made from a given allocation size.
3785  */
3786 static vm_map_size_t
kmem_allocation_to_claim_size(vm_map_size_t allocation_size)3787 kmem_allocation_to_claim_size(vm_map_size_t allocation_size)
3788 {
3789 	/*
3790 	 * Allocation size and claim size are identical.
3791 	 */
3792 	return allocation_size;
3793 }
3794 
3795 __startup_func
3796 void
kmem_range_startup_init(struct kmem_range_startup_spec * sp)3797 kmem_range_startup_init(
3798 	struct kmem_range_startup_spec *sp)
3799 {
3800 	assert(kmem_claim_count < KMEM_MAX_CLAIMS - KMEM_RANGE_COUNT);
3801 	if (sp->kc_calculate_sz) {
3802 		sp->kc_size = (sp->kc_calculate_sz)();
3803 	}
3804 	if (sp->kc_size) {
3805 		kmem_claims[kmem_claim_count] = *sp;
3806 		kmem_claim_count++;
3807 	}
3808 }
3809 
3810 static vm_offset_t
kmem_fuzz_start(void)3811 kmem_fuzz_start(void)
3812 {
3813 	vm_offset_t kmapoff_kaddr = 0;
3814 	uint32_t kmapoff_pgcnt;
3815 
3816 	kmapoff_pgcnt = (early_random() & 0x1ff) + 1; /* 9 bits */
3817 
3818 	vm_map_size_t kmapoff_size = ptoa(kmapoff_pgcnt);
3819 
3820 	kmem_alloc(kernel_map, &kmapoff_kaddr, kmapoff_size,
3821 	    KMA_NOFAIL | KMA_KOBJECT | KMA_PERMANENT | KMA_VAONLY,
3822 	    VM_KERN_MEMORY_OSFMK);
3823 
3824 
3825 	return kmapoff_kaddr + kmapoff_size;
3826 }
3827 
3828 /*
3829  * Generate a randomly shuffled array of indices from 0 to count - 1
3830  */
3831 __startup_func
3832 void
kmem_shuffle(uint16_t * shuffle_buf,uint16_t count)3833 kmem_shuffle(
3834 	uint16_t       *shuffle_buf,
3835 	uint16_t        count)
3836 {
3837 	for (uint16_t i = 0; i < count; i++) {
3838 		uint16_t j = kmem_get_random16(i);
3839 		if (j != i) {
3840 			shuffle_buf[i] = shuffle_buf[j];
3841 		}
3842 		shuffle_buf[j] = i;
3843 	}
3844 }
3845 
3846 __startup_func
3847 static void
kmem_shuffle_claims(void)3848 kmem_shuffle_claims(void)
3849 {
3850 	uint16_t shuffle_buf[KMEM_MAX_CLAIMS] = {};
3851 	uint16_t limit = (uint16_t)kmem_claim_count;
3852 
3853 	kmem_shuffle(&shuffle_buf[0], limit);
3854 	for (uint16_t i = 0; i < limit; i++) {
3855 		struct kmem_range_startup_spec tmp = kmem_claims[i];
3856 		kmem_claims[i] = kmem_claims[shuffle_buf[i]];
3857 		kmem_claims[shuffle_buf[i]] = tmp;
3858 	}
3859 }
3860 
3861 __startup_func
3862 static void
kmem_readjust_ranges(uint32_t cur_idx)3863 kmem_readjust_ranges(
3864 	uint32_t        cur_idx)
3865 {
3866 	assert(cur_idx != 0);
3867 	uint32_t j = cur_idx - 1, random;
3868 	struct kmem_range_startup_spec sp = kmem_claims[cur_idx];
3869 	struct mach_vm_range *sp_range = sp.kc_range;
3870 	/*
3871 	 * Even if sp is currently last, it will never be last after it is moved.
3872 	 * As such, we want to bump other claims over it and include any necessary
3873 	 * padding for a non-last claim.
3874 	 *
3875 	 * While changing which claim is last can impact the total VA usage, since a
3876 	 * known_last allocation size is guaranteed to always be less-than-or-equal
3877 	 * to a non-known_last allocation (which is used for pre-placement sizing),
3878 	 * we will always have enough space so long as the pre-placement sizing had
3879 	 * enough space.
3880 	 */
3881 	vm_map_offset_t sp_allocation_size =
3882 	    kmem_claim_to_allocation_size(sp.kc_size, /* known_last */ false);
3883 
3884 	/*
3885 	 * Find max index where restriction is met
3886 	 */
3887 	for (; j > 0; j--) {
3888 		struct kmem_range_startup_spec spj = kmem_claims[j];
3889 		vm_map_offset_t max_start = spj.kc_range->min_address;
3890 		if (spj.kc_flags & KC_NO_MOVE) {
3891 			panic("kmem_range_init: Can't scramble with multiple constraints");
3892 		}
3893 		if (max_start <= sp_range->min_address) {
3894 			break;
3895 		}
3896 	}
3897 
3898 	/*
3899 	 * Pick a random index from 0 to max index and shift claims to the right
3900 	 * to make room for restricted claim
3901 	 */
3902 	random = kmem_get_random16((uint16_t)j);
3903 	assert(random <= j);
3904 
3905 	sp_range->min_address = kmem_claims[random].kc_range->min_address;
3906 	sp_range->max_address = sp_range->min_address + sp.kc_size;
3907 
3908 	for (j = cur_idx - 1; j >= random && j != UINT32_MAX; j--) {
3909 		struct kmem_range_startup_spec spj = kmem_claims[j];
3910 		struct mach_vm_range *range = spj.kc_range;
3911 		range->min_address += sp_allocation_size;
3912 		range->max_address += sp_allocation_size;
3913 		kmem_claims[j + 1] = spj;
3914 	}
3915 
3916 	sp.kc_flags |= KC_NO_MOVE;
3917 	kmem_claims[random] = sp;
3918 }
3919 
3920 __startup_func
3921 static void
kmem_add_ptr_claims(void)3922 kmem_add_ptr_claims(void)
3923 {
3924 	uint64_t kmem_meta_num, kmem_ptr_chunks;
3925 	vm_map_size_t org_ptr_range_size __assert_only;
3926 
3927 	org_ptr_range_size = ptr_range_size;
3928 
3929 	ptr_range_size -= PAGE_SIZE;
3930 	ptr_range_size *= KMEM_CHUNK_SIZE_MIN;
3931 	ptr_range_size /= (KMEM_CHUNK_SIZE_MIN + sizeof(struct kmem_page_meta));
3932 
3933 	kmem_ptr_chunks = ptr_range_size / KMEM_CHUNK_SIZE_MIN;
3934 	ptr_range_size = kmem_ptr_chunks * KMEM_CHUNK_SIZE_MIN;
3935 
3936 	kmem_meta_num = kmem_ptr_chunks + 2;
3937 	kmem_meta_size = round_page(kmem_meta_num * sizeof(struct kmem_page_meta));
3938 
3939 	assert(kmem_meta_size + ptr_range_size <= org_ptr_range_size);
3940 	/*
3941 	 * Add claims for kmem's ranges
3942 	 */
3943 	for (uint32_t i = 0; i < kmem_ptr_ranges; i++) {
3944 		struct kmem_range_startup_spec kmem_spec = {
3945 			.kc_name = "kmem_ptr_range",
3946 			.kc_range = &kmem_ranges[KMEM_RANGE_ID_PTR_0 + i],
3947 			.kc_size = ptr_range_size,
3948 			.kc_flags = KC_NO_ENTRY,
3949 		};
3950 		kmem_claims[kmem_claim_count++] = kmem_spec;
3951 
3952 		struct kmem_range_startup_spec kmem_meta_spec = {
3953 			.kc_name = "kmem_ptr_range_meta",
3954 			.kc_range = &kmem_meta_range[KMEM_RANGE_ID_PTR_0 + i],
3955 			.kc_size = kmem_meta_size,
3956 			.kc_flags = KC_NONE,
3957 		};
3958 		kmem_claims[kmem_claim_count++] = kmem_meta_spec;
3959 	}
3960 }
3961 
3962 __startup_func
3963 static void
kmem_add_extra_claims(void)3964 kmem_add_extra_claims(void)
3965 {
3966 	vm_map_size_t largest_free_size = 0, total_claims = 0;
3967 	vm_map_size_t sane_sprayqtn_size = 0, sprayqtn_allocation_size = 0;
3968 	vm_map_size_t ptr_total_allocation_size = 0;
3969 
3970 	vm_map_sizes(kernel_map, NULL, NULL, &largest_free_size);
3971 	largest_free_size = trunc_page(largest_free_size);
3972 
3973 	/*
3974 	 * kasan and configs w/o *TRR need to have just one ptr range due to
3975 	 * resource constraints.
3976 	 */
3977 #if !ZSECURITY_CONFIG(KERNEL_PTR_SPLIT)
3978 	kmem_ptr_ranges = 1;
3979 #endif
3980 	/*
3981 	 * Determine size of data and pointer kmem_ranges
3982 	 */
3983 	for (uint32_t i = 0; i < kmem_claim_count; i++) {
3984 		struct kmem_range_startup_spec sp_i = kmem_claims[i];
3985 
3986 		total_claims += kmem_claim_to_allocation_size(
3987 			sp_i.kc_size, /* known_last */ false);
3988 	}
3989 	assert((total_claims & PAGE_MASK) == 0);
3990 
3991 
3992 	largest_free_size -= total_claims;
3993 
3994 	/*
3995 	 * Use half the total available VA for all pointer allocations (this
3996 	 * includes the kmem_sprayqtn range). Given that we have 4 total
3997 	 * ranges divide the available VA by 8.
3998 	 */
3999 	ptr_range_size = largest_free_size / ((kmem_ptr_ranges + 1) * 2);
4000 
4001 	sprayqtn_range_size = ptr_range_size;
4002 	sane_sprayqtn_size = kmem_claim_to_allocation_size(
4003 		/* claim_size */ sane_size / 2, /* known_last */ false);
4004 	if (sprayqtn_range_size > sane_sprayqtn_size) {
4005 		vm_map_size_t sprayqtn_extra;
4006 
4007 		/*
4008 		 * Spray quarantine doesn't need that much space.
4009 		 * Shrink it to something reasonable and equally share the leftover VA
4010 		 * with the other pointer ranges.
4011 		 */
4012 		sprayqtn_extra = sprayqtn_range_size - sane_sprayqtn_size;
4013 		sprayqtn_range_size -= sprayqtn_extra;
4014 		ptr_range_size += sprayqtn_extra / kmem_ptr_ranges;
4015 	}
4016 
4017 	ptr_range_size = round_page(ptr_range_size);
4018 	sprayqtn_range_size = round_page(sprayqtn_range_size);
4019 
4020 	/* Less any necessary allocation padding... */
4021 	ptr_range_size = kmem_allocation_to_claim_size(ptr_range_size);
4022 	sprayqtn_range_size = kmem_allocation_to_claim_size(sprayqtn_range_size);
4023 
4024 	/*
4025 	 * Add the pointer and metadata claims
4026 	 * Note: this call modifies ptr_range_size and may, depending on the padding
4027 	 * requirements, slightly increase or decrease the overall allocation size
4028 	 * of the pointer+metadata region.
4029 	 */
4030 	kmem_add_ptr_claims();
4031 
4032 	sprayqtn_allocation_size = kmem_claim_to_allocation_size(
4033 		sprayqtn_range_size, /* known_last */ false);
4034 	ptr_total_allocation_size =
4035 	    (kmem_claim_to_allocation_size(ptr_range_size, /* known_last */ false) +
4036 	    kmem_claim_to_allocation_size(kmem_meta_size, /* known_last */ false)) *
4037 	    kmem_ptr_ranges;
4038 
4039 	/*
4040 	 * Check: spray and ptr_range are minimally valid.
4041 	 * This is a useful assert as it should catch us if we were to end up with a
4042 	 * "negative" (or extremely large) data_range_size.
4043 	 */
4044 	assert(sprayqtn_allocation_size + ptr_total_allocation_size < largest_free_size);
4045 
4046 	/*
4047 	 * Finally, give any remaining allocable space to the data region.
4048 	 */
4049 	data_range_size = largest_free_size - sprayqtn_allocation_size -
4050 	    ptr_total_allocation_size;
4051 
4052 	/*
4053 	 * If we need the data shared range, divide the size
4054 	 * for the data ranges between BUFFERS and SHARED.
4055 	 *
4056 	 * If not, all data allocations go into KMEM_RANGE_ID_DATA.
4057 	 */
4058 	if (kmem_needs_data_share_range()) {
4059 		/*
4060 		 * Round down the size, because our kmem ranges logic round
4061 		 * these sizes to page size, and we need to make sure we never
4062 		 * exceed the remaining allocable space we divided.
4063 		 */
4064 		shared_data_range_size = data_range_size =
4065 		    trunc_page(data_range_size / 2);
4066 	} else {
4067 		shared_data_range_size = 0;
4068 	}
4069 
4070 	/* Less any necessary allocation padding... */
4071 	data_range_size = kmem_allocation_to_claim_size(data_range_size);
4072 	shared_data_range_size = shared_data_range_size ?
4073 	    kmem_allocation_to_claim_size(shared_data_range_size) : 0;
4074 
4075 	/* Check: our allocations should all still fit in the free space */
4076 	assert(sprayqtn_allocation_size + ptr_total_allocation_size +
4077 	    kmem_claim_to_allocation_size(data_range_size, /* known_last */ false) +
4078 	    kmem_claim_to_allocation_size(shared_data_range_size, /* known_last */ false) <=
4079 	    largest_free_size);
4080 
4081 	struct kmem_range_startup_spec kmem_spec_sprayqtn = {
4082 		.kc_name = "kmem_sprayqtn_range",
4083 		.kc_range = &kmem_ranges[KMEM_RANGE_ID_SPRAYQTN],
4084 		.kc_size = sprayqtn_range_size,
4085 		.kc_flags = KC_NO_ENTRY,
4086 	};
4087 	kmem_claims[kmem_claim_count++] = kmem_spec_sprayqtn;
4088 
4089 	struct kmem_range_startup_spec kmem_spec_data_buffers = {
4090 		.kc_name = "kmem_data_buffers_range",
4091 		.kc_range = &kmem_ranges[KMEM_RANGE_ID_DATA],
4092 		.kc_size = data_range_size,
4093 		.kc_flags = KC_NO_ENTRY,
4094 	};
4095 	kmem_claims[kmem_claim_count++] = kmem_spec_data_buffers;
4096 
4097 	if (kmem_needs_data_share_range()) {
4098 		struct kmem_range_startup_spec kmem_spec_data_shared = {
4099 			.kc_name = "kmem_data_shared_range",
4100 			.kc_range = &kmem_ranges[KMEM_RANGE_ID_DATA_SHARED],
4101 			.kc_size = shared_data_range_size,
4102 			.kc_flags = KC_NO_ENTRY,
4103 		};
4104 		kmem_claims[kmem_claim_count++] = kmem_spec_data_shared;
4105 	}
4106 }
4107 
4108 __startup_func
4109 static void
kmem_scramble_ranges(void)4110 kmem_scramble_ranges(void)
4111 {
4112 	vm_map_offset_t va_alloc_head = 0;
4113 
4114 	/*
4115 	 * Initiatize KMEM_RANGE_ID_NONE range to use the entire map so that
4116 	 * the vm can find the requested ranges.
4117 	 */
4118 	kmem_ranges[KMEM_RANGE_ID_NONE].min_address = MAX(kernel_map->min_offset,
4119 	    VM_MAP_PAGE_SIZE(kernel_map));
4120 	kmem_ranges[KMEM_RANGE_ID_NONE].max_address = kernel_map->max_offset;
4121 
4122 	/*
4123 	 * Allocating the g_kext_map prior to randomizing the remaining submaps as
4124 	 * this map is 2G in size and starts at the end of kernel_text on x86. It
4125 	 * could overflow into the heap.
4126 	 */
4127 	kext_alloc_init();
4128 
4129 	/*
4130 	 * Eat a random amount of kernel_map to fuzz subsequent heap, zone and
4131 	 * stack addresses. (With a 4K page and 9 bits of randomness, this
4132 	 * eats about 2M of VA from the map)
4133 	 *
4134 	 * Note that we always need to slide by at least one page because the VM
4135 	 * pointer packing schemes using KERNEL_PMAP_HEAP_RANGE_START as a base
4136 	 * do not admit this address to be part of any zone submap.
4137 	 */
4138 	va_alloc_head = kmem_fuzz_start();
4139 
4140 	/*
4141 	 * Add claims for ptr and data kmem_ranges
4142 	 */
4143 	kmem_add_extra_claims();
4144 
4145 	/*
4146 	 * Minimally verify that our placer will be able to resolve the constraints
4147 	 * of all claims
4148 	 */
4149 	bool has_min_address = false;
4150 	for (uint32_t i = 0; i < kmem_claim_count; i++) {
4151 		struct kmem_range_startup_spec sp_i = kmem_claims[i];
4152 
4153 		/* Verify that we have only one claim with a min address constraint */
4154 		if (sp_i.kc_range->min_address) {
4155 			if (has_min_address) {
4156 				panic("Cannot place with multiple min_address constraints");
4157 			} else {
4158 				has_min_address = true;
4159 			}
4160 		}
4161 
4162 		if (sp_i.kc_range->max_address) {
4163 			panic("Cannot place with a max_address constraint");
4164 		}
4165 	}
4166 
4167 
4168 	/*
4169 	 * Shuffle registered claims
4170 	 */
4171 	assert(kmem_claim_count < UINT16_MAX);
4172 	kmem_shuffle_claims();
4173 
4174 	/*
4175 	 * Apply restrictions and determine range for each claim
4176 	 */
4177 	for (uint32_t i = 0; i < kmem_claim_count; i++) {
4178 		struct kmem_range_startup_spec sp = kmem_claims[i];
4179 		struct mach_vm_range *sp_range = sp.kc_range;
4180 
4181 		/*
4182 		 * Find space using the allocation size (rather than the claim size) in
4183 		 * order to ensure we provide any applicable padding.
4184 		 */
4185 		bool is_last = (i == kmem_claim_count - 1);
4186 		vm_map_offset_t sp_allocation_size =
4187 		    kmem_claim_to_allocation_size(sp.kc_size, is_last);
4188 
4189 		if (vm_map_locate_space_anywhere(kernel_map, sp_allocation_size, 0,
4190 		    VM_MAP_KERNEL_FLAGS_ANYWHERE(.vmkf_no_soft_limit = true),
4191 		    &va_alloc_head, NULL) != KERN_SUCCESS) {
4192 			panic("kmem_range_init: vm_map_locate_space failing for claim %s, "
4193 			    "size 0x%llx",
4194 			    sp.kc_name, sp_allocation_size);
4195 		}
4196 
4197 		/*
4198 		 * Re-adjust ranges if restriction not met
4199 		 */
4200 		if (sp_range->min_address && va_alloc_head > sp_range->min_address) {
4201 			kmem_readjust_ranges(i);
4202 		} else {
4203 			/*
4204 			 * Though the actual allocated space may be larger, provide only the
4205 			 * size requested by the original claim.
4206 			 */
4207 			sp_range->min_address = va_alloc_head;
4208 			sp_range->max_address = va_alloc_head + sp.kc_size;
4209 		}
4210 
4211 		va_alloc_head += sp_allocation_size;
4212 	}
4213 
4214 	/*
4215 	 * We have settled on the ranges, now create temporary entries for the
4216 	 * claims
4217 	 */
4218 	for (uint32_t i = 0; i < kmem_claim_count; i++) {
4219 		struct kmem_range_startup_spec sp = kmem_claims[i];
4220 		bool is_last = (i == kmem_claim_count - 1);
4221 		vm_map_offset_t sp_allocation_size =
4222 		    kmem_claim_to_allocation_size(sp.kc_size, is_last);
4223 		vm_map_entry_t entry = NULL;
4224 		if (sp.kc_flags & KC_NO_ENTRY) {
4225 			continue;
4226 		}
4227 
4228 
4229 		/*
4230 		 * We reserve the full allocation size (rather than the claim size) so
4231 		 * that nothing ends up placed in the padding space (if applicable).
4232 		 */
4233 		if (vm_map_find_space(kernel_map, sp.kc_range->min_address,
4234 		    sp_allocation_size, 0,
4235 		    VM_MAP_KERNEL_FLAGS_ANYWHERE(.vmkf_no_soft_limit = true),
4236 		    &entry) != KERN_SUCCESS) {
4237 			panic("kmem_range_init: vm_map_find_space failing for claim %s",
4238 			    sp.kc_name);
4239 		}
4240 		vm_object_reference(kernel_object_default);
4241 		VME_OBJECT_SET(entry, kernel_object_default, false, 0);
4242 		VME_OFFSET_SET(entry, entry->vme_start);
4243 		vm_map_unlock(kernel_map);
4244 	}
4245 
4246 	/*
4247 	 * Now that we are done assigning all the ranges, reset
4248 	 * kmem_ranges[KMEM_RANGE_ID_NONE]
4249 	 */
4250 	kmem_ranges[KMEM_RANGE_ID_NONE] = (struct mach_vm_range) {};
4251 
4252 #if DEBUG || DEVELOPMENT
4253 	for (uint32_t i = 0; i < kmem_claim_count; i++) {
4254 		struct kmem_range_startup_spec sp = kmem_claims[i];
4255 
4256 		printf("%-24s: %p - %p (%u%c)\n", sp.kc_name,
4257 		    (void *)sp.kc_range->min_address,
4258 		    (void *)sp.kc_range->max_address,
4259 		    mach_vm_size_pretty(sp.kc_size),
4260 		    mach_vm_size_unit(sp.kc_size));
4261 	}
4262 #endif /* DEBUG || DEVELOPMENT */
4263 
4264 #if MACH_ASSERT
4265 	/*
4266 	 * Since many parts of the claim infrastructure are marked as startup data
4267 	 * (and are thus unavailable post-lockdown), save off information our tests
4268 	 * need now.
4269 	 */
4270 	for (uint32_t i = 0; i < kmem_claim_count; i++) {
4271 		kmem_test_saved_ranges[i] = *(kmem_claims[i].kc_range);
4272 	}
4273 #endif /* MACH_ASSERT */
4274 }
4275 
4276 __startup_func
4277 static void
kmem_range_init(void)4278 kmem_range_init(void)
4279 {
4280 	vm_size_t range_adjustment;
4281 
4282 	kmem_scramble_ranges();
4283 
4284 	range_adjustment = sprayqtn_range_size >> 3;
4285 	kmem_large_ranges[KMEM_RANGE_ID_SPRAYQTN].min_address =
4286 	    kmem_ranges[KMEM_RANGE_ID_SPRAYQTN].min_address + range_adjustment;
4287 	kmem_large_ranges[KMEM_RANGE_ID_SPRAYQTN].max_address =
4288 	    kmem_ranges[KMEM_RANGE_ID_SPRAYQTN].max_address;
4289 
4290 	range_adjustment = data_range_size >> 3;
4291 	kmem_large_ranges[KMEM_RANGE_ID_DATA].min_address =
4292 	    kmem_ranges[KMEM_RANGE_ID_DATA].min_address + range_adjustment;
4293 	kmem_large_ranges[KMEM_RANGE_ID_DATA].max_address =
4294 	    kmem_ranges[KMEM_RANGE_ID_DATA].max_address;
4295 
4296 	if (kmem_needs_data_share_range()) {
4297 		range_adjustment = shared_data_range_size >> 3;
4298 		kmem_large_ranges[KMEM_RANGE_ID_DATA_SHARED].min_address =
4299 		    kmem_ranges[KMEM_RANGE_ID_DATA_SHARED].min_address + range_adjustment;
4300 		kmem_large_ranges[KMEM_RANGE_ID_DATA_SHARED].max_address =
4301 		    kmem_ranges[KMEM_RANGE_ID_DATA_SHARED].max_address;
4302 	}
4303 
4304 	pmap_init();
4305 	kmem_metadata_init();
4306 	kmem_sizeclass_init();
4307 
4308 #if DEBUG || DEVELOPMENT
4309 	for (kmem_range_id_t i = 1; i < KMEM_RANGE_COUNT; i++) {
4310 		vm_size_t range_size = mach_vm_range_size(&kmem_large_ranges[i]);
4311 		printf("kmem_large_ranges[%d]    : %p - %p (%u%c)\n", i,
4312 		    (void *)kmem_large_ranges[i].min_address,
4313 		    (void *)kmem_large_ranges[i].max_address,
4314 		    mach_vm_size_pretty(range_size),
4315 		    mach_vm_size_unit(range_size));
4316 	}
4317 #endif
4318 }
4319 #ifndef __BUILDING_XNU_LIB_UNITTEST__ /* kernel map is not maintained in unit-test */
4320 STARTUP(KMEM, STARTUP_RANK_THIRD, kmem_range_init);
4321 #endif /* __BUILDING_XNU_LIB_UNITTEST__ */
4322 
4323 #if DEBUG || DEVELOPMENT
4324 __startup_func
4325 static void
kmem_log_init(void)4326 kmem_log_init(void)
4327 {
4328 	/*
4329 	 * Log can only be created after the the kmem subsystem is initialized as
4330 	 * btlog creation uses kmem
4331 	 */
4332 	kmem_outlier_log = btlog_create(BTLOG_LOG, KMEM_OUTLIER_LOG_SIZE, 0);
4333 }
4334 STARTUP(ZALLOC, STARTUP_RANK_FIRST, kmem_log_init);
4335 
4336 kmem_gobj_stats
kmem_get_gobj_stats(void)4337 kmem_get_gobj_stats(void)
4338 {
4339 	vmlp_api_start(KMEM_GET_GOBJ_STATS);
4340 	kmem_gobj_stats stats = {};
4341 
4342 	vm_map_lock(kernel_map);
4343 	for (uint8_t i = 0; i < kmem_ptr_ranges; i++) {
4344 		kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST + i;
4345 		struct mach_vm_range range = kmem_ranges[range_id];
4346 		struct kmem_page_meta *meta = kmem_meta_hwm[kmem_get_front(range_id, 0)];
4347 		struct kmem_page_meta *meta_end;
4348 		uint64_t meta_idx = meta - kmem_meta_base[range_id];
4349 		vm_map_size_t used = 0, va = 0, meta_sz = 0, pte_sz = 0;
4350 		vm_map_offset_t addr;
4351 		vm_map_entry_t entry;
4352 
4353 		/*
4354 		 * Left front
4355 		 */
4356 		va = (meta_idx * KMEM_CHUNK_SIZE_MIN);
4357 		meta_sz = round_page(meta_idx * sizeof(struct kmem_page_meta));
4358 
4359 		/*
4360 		 * Right front
4361 		 */
4362 		meta = kmem_meta_hwm[kmem_get_front(range_id, 1)];
4363 		meta_end = kmem_addr_to_meta(range.max_address, range_id, &addr,
4364 		    &meta_idx);
4365 		meta_idx = meta_end - meta;
4366 		meta_sz += round_page(meta_idx * sizeof(struct kmem_page_meta));
4367 		va += (meta_idx * KMEM_CHUNK_SIZE_MIN);
4368 
4369 		/*
4370 		 * Compute VA allocated in entire range
4371 		 */
4372 		if (vm_map_lookup_entry(kernel_map, range.min_address, &entry) == false) {
4373 			entry = entry->vme_next;
4374 		}
4375 
4376 		vmlp_range_event_entry(kernel_map, entry);
4377 
4378 		while (entry != vm_map_to_entry(kernel_map) &&
4379 		    entry->vme_start < range.max_address) {
4380 			used += (entry->vme_end - entry->vme_start);
4381 			entry = entry->vme_next;
4382 		}
4383 
4384 		pte_sz = round_page(atop(va - used) * 8);
4385 
4386 		stats.total_used += used;
4387 		stats.total_va += va;
4388 		stats.pte_sz += pte_sz;
4389 		stats.meta_sz += meta_sz;
4390 	}
4391 	vm_map_unlock(kernel_map);
4392 
4393 	vmlp_api_end(KMEM_GET_GOBJ_STATS, 0);
4394 	return stats;
4395 }
4396 
4397 #endif /* DEBUG || DEVELOPMENT */
4398 
4399 /*
4400  *	kmem_init:
4401  *
4402  *	Initialize the kernel's virtual memory map, taking
4403  *	into account all memory allocated up to this time.
4404  */
4405 __startup_func
4406 void
kmem_init(vm_offset_t start,vm_offset_t end)4407 kmem_init(
4408 	vm_offset_t     start,
4409 	vm_offset_t     end)
4410 {
4411 	vm_map_offset_t map_start;
4412 	vm_map_offset_t map_end;
4413 
4414 	map_start = vm_map_trunc_page(start,
4415 	    VM_MAP_PAGE_MASK(kernel_map));
4416 	map_end = vm_map_round_page(end,
4417 	    VM_MAP_PAGE_MASK(kernel_map));
4418 
4419 	vm_map_will_allocate_early_map(&kernel_map);
4420 #if defined(__arm64__)
4421 	kernel_map = vm_map_create_options(pmap_kernel(),
4422 	    VM_MIN_KERNEL_AND_KEXT_ADDRESS,
4423 	    VM_MAX_KERNEL_ADDRESS,
4424 	    VM_MAP_CREATE_DEFAULT);
4425 	/*
4426 	 *	Reserve virtual memory allocated up to this time.
4427 	 */
4428 	{
4429 		unsigned int    region_select = 0;
4430 		vm_map_offset_t region_start;
4431 		vm_map_size_t   region_size;
4432 		vm_map_offset_t map_addr;
4433 		kern_return_t kr;
4434 
4435 		while (pmap_virtual_region(region_select, &region_start, &region_size)) {
4436 			map_addr = region_start;
4437 			kr = vm_map_enter(kernel_map, &map_addr,
4438 			    vm_map_round_page(region_size,
4439 			    VM_MAP_PAGE_MASK(kernel_map)),
4440 			    (vm_map_offset_t) 0,
4441 			    VM_MAP_KERNEL_FLAGS_FIXED_PERMANENT(
4442 				    .vmkf_no_pmap_check = true,
4443 				    .vmkf_no_soft_limit = true),
4444 			    VM_OBJECT_NULL,
4445 			    (vm_object_offset_t) 0, FALSE, VM_PROT_NONE, VM_PROT_NONE,
4446 			    VM_INHERIT_DEFAULT);
4447 
4448 			if (kr != KERN_SUCCESS) {
4449 				panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
4450 				    (uint64_t) start, (uint64_t) end, (uint64_t) region_start,
4451 				    (uint64_t) region_size, kr);
4452 			}
4453 
4454 			region_select++;
4455 		}
4456 	}
4457 #else
4458 	kernel_map = vm_map_create_options(pmap_kernel(),
4459 	    VM_MIN_KERNEL_AND_KEXT_ADDRESS, map_end,
4460 	    VM_MAP_CREATE_DEFAULT);
4461 	/*
4462 	 *	Reserve virtual memory allocated up to this time.
4463 	 */
4464 	if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
4465 		vm_map_offset_t map_addr;
4466 		kern_return_t kr;
4467 
4468 		map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
4469 		kr = vm_map_enter(kernel_map,
4470 		    &map_addr,
4471 		    (vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
4472 		    (vm_map_offset_t) 0,
4473 		    VM_MAP_KERNEL_FLAGS_FIXED(.vmkf_no_pmap_check = true),
4474 		    VM_OBJECT_NULL,
4475 		    (vm_object_offset_t) 0, FALSE,
4476 		    VM_PROT_NONE, VM_PROT_NONE,
4477 		    VM_INHERIT_DEFAULT);
4478 
4479 		if (kr != KERN_SUCCESS) {
4480 			panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
4481 			    (uint64_t) start, (uint64_t) end,
4482 			    (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS,
4483 			    (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
4484 			    kr);
4485 		}
4486 	}
4487 #endif
4488 
4489 	kmem_set_user_wire_limits();
4490 }
4491 
4492 
4493 #pragma mark map copyio
4494 
4495 /*
4496  * Note: semantic types aren't used as `copyio` already validates.
4497  */
4498 
4499 kern_return_t
copyinmap(vm_map_t map,vm_map_offset_t fromaddr,void * todata,vm_size_t length)4500 copyinmap(
4501 	vm_map_t                map,
4502 	vm_map_offset_t         fromaddr,
4503 	void                   *todata,
4504 	vm_size_t               length)
4505 {
4506 	kern_return_t kr = KERN_SUCCESS;
4507 	vm_map_switch_context_t switch_ctx;
4508 
4509 	if (vm_map_pmap(map) == pmap_kernel()) {
4510 		/* assume a correct copy */
4511 		memcpy(todata, CAST_DOWN(void *, fromaddr), length);
4512 	} else if (current_map() == map) {
4513 		if (copyin(fromaddr, todata, length) != 0) {
4514 			kr = KERN_INVALID_ADDRESS;
4515 		}
4516 	} else {
4517 		vm_map_reference(map);
4518 		switch_ctx = vm_map_switch_with_sec_override(map, TRUE);
4519 		if (copyin(fromaddr, todata, length) != 0) {
4520 			kr = KERN_INVALID_ADDRESS;
4521 		}
4522 		vm_map_switch_back(switch_ctx);
4523 		vm_map_deallocate(map);
4524 	}
4525 	return kr;
4526 }
4527 
4528 kern_return_t
copyoutmap(vm_map_t map,void * fromdata,vm_map_address_t toaddr,vm_size_t length)4529 copyoutmap(
4530 	vm_map_t                map,
4531 	void                   *fromdata,
4532 	vm_map_address_t        toaddr,
4533 	vm_size_t               length)
4534 {
4535 	kern_return_t kr = KERN_SUCCESS;
4536 	vm_map_switch_context_t switch_ctx;
4537 
4538 	if (vm_map_pmap(map) == pmap_kernel()) {
4539 		/* assume a correct copy */
4540 		memcpy(CAST_DOWN(void *, toaddr), fromdata, length);
4541 	} else if (current_map() == map) {
4542 		if (copyout(fromdata, toaddr, length) != 0) {
4543 			ktriage_record(thread_tid(current_thread()),
4544 			    KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
4545 			    KDBG_TRIAGE_RESERVED,
4546 			    KDBG_TRIAGE_VM_COPYOUTMAP_SAMEMAP_ERROR),
4547 			    KERN_INVALID_ADDRESS /* arg */);
4548 			kr = KERN_INVALID_ADDRESS;
4549 		}
4550 	} else {
4551 		vm_map_reference(map);
4552 		switch_ctx = vm_map_switch_with_sec_override(map, TRUE);
4553 		if (copyout(fromdata, toaddr, length) != 0) {
4554 			ktriage_record(thread_tid(current_thread()),
4555 			    KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
4556 			    KDBG_TRIAGE_RESERVED,
4557 			    KDBG_TRIAGE_VM_COPYOUTMAP_DIFFERENTMAP_ERROR),
4558 			    KERN_INVALID_ADDRESS /* arg */);
4559 			kr = KERN_INVALID_ADDRESS;
4560 		}
4561 		vm_map_switch_back(switch_ctx);
4562 		vm_map_deallocate(map);
4563 	}
4564 	return kr;
4565 }
4566 
4567 kern_return_t
copyoutmap_atomic32(vm_map_t map,uint32_t value,vm_map_address_t toaddr)4568 copyoutmap_atomic32(
4569 	vm_map_t                map,
4570 	uint32_t                value,
4571 	vm_map_address_t        toaddr)
4572 {
4573 	kern_return_t kr = KERN_SUCCESS;
4574 	vm_map_switch_context_t switch_ctx;
4575 
4576 	if (vm_map_pmap(map) == pmap_kernel()) {
4577 		/* assume a correct toaddr */
4578 		*(uint32_t *)toaddr = value;
4579 	} else if (current_map() == map) {
4580 		if (copyout_atomic32(value, toaddr) != 0) {
4581 			kr = KERN_INVALID_ADDRESS;
4582 		}
4583 	} else {
4584 		vm_map_reference(map);
4585 		switch_ctx = vm_map_switch_with_sec_override(map, TRUE);
4586 		if (copyout_atomic32(value, toaddr) != 0) {
4587 			kr = KERN_INVALID_ADDRESS;
4588 		}
4589 		vm_map_switch_back(switch_ctx);
4590 		vm_map_deallocate(map);
4591 	}
4592 	return kr;
4593 }
4594 
4595 kern_return_t
copyoutmap_atomic64(vm_map_t map,uint64_t value,vm_map_address_t toaddr)4596 copyoutmap_atomic64(
4597 	vm_map_t                map,
4598 	uint64_t                value,
4599 	vm_map_address_t        toaddr)
4600 {
4601 	kern_return_t kr = KERN_SUCCESS;
4602 	vm_map_switch_context_t switch_ctx;
4603 
4604 	if (vm_map_pmap(map) == pmap_kernel()) {
4605 		/* assume a correct toaddr */
4606 		*(uint64_t *)toaddr = value;
4607 	} else if (current_map() == map) {
4608 		if (copyout_atomic64(value, toaddr) != 0) {
4609 			kr = KERN_INVALID_ADDRESS;
4610 		}
4611 	} else {
4612 		vm_map_reference(map);
4613 		switch_ctx = vm_map_switch_with_sec_override(map, TRUE);
4614 		if (copyout_atomic64(value, toaddr) != 0) {
4615 			kr = KERN_INVALID_ADDRESS;
4616 		}
4617 		vm_map_switch_back(switch_ctx);
4618 		vm_map_deallocate(map);
4619 	}
4620 	return kr;
4621 }
4622 
4623 
4624 #pragma mark pointer obfuscation / packing
4625 
4626 /*
4627  *
4628  *	The following two functions are to be used when exposing kernel
4629  *	addresses to userspace via any of the various debug or info
4630  *	facilities that exist. These are basically the same as VM_KERNEL_ADDRPERM()
4631  *	and VM_KERNEL_UNSLIDE_OR_PERM() except they use a different random seed and
4632  *	are exported to KEXTs.
4633  *
4634  *	NOTE: USE THE MACRO VERSIONS OF THESE FUNCTIONS (in vm_param.h) FROM WITHIN THE KERNEL
4635  */
4636 
4637 vm_offset_t
vm_kernel_addrhash_internal(vm_offset_t addr,uint64_t salt)4638 vm_kernel_addrhash_internal(vm_offset_t addr, uint64_t salt)
4639 {
4640 	assert(salt != 0);
4641 
4642 	if (addr == 0) {
4643 		return 0ul;
4644 	}
4645 
4646 	if (VM_KERNEL_IS_SLID(addr)) {
4647 		return VM_KERNEL_UNSLIDE(addr);
4648 	}
4649 
4650 	addr = VM_KERNEL_STRIP_PTR(addr);
4651 
4652 	vm_offset_t sha_digest[SHA256_DIGEST_LENGTH / sizeof(vm_offset_t)];
4653 	SHA256_CTX sha_ctx;
4654 
4655 	SHA256_Init(&sha_ctx);
4656 	SHA256_Update(&sha_ctx, &salt, sizeof(salt));
4657 	SHA256_Update(&sha_ctx, &addr, sizeof(addr));
4658 	SHA256_Final(sha_digest, &sha_ctx);
4659 
4660 	return sha_digest[0];
4661 }
4662 
4663 __exported vm_offset_t
4664 vm_kernel_addrhash_external(vm_offset_t addr);
4665 vm_offset_t
vm_kernel_addrhash_external(vm_offset_t addr)4666 vm_kernel_addrhash_external(vm_offset_t addr)
4667 {
4668 	return vm_kernel_addrhash_internal(addr, vm_kernel_addrhash_salt_ext);
4669 }
4670 
4671 void
vm_kernel_addrhide(vm_offset_t addr,vm_offset_t * hide_addr)4672 vm_kernel_addrhide(
4673 	vm_offset_t addr,
4674 	vm_offset_t *hide_addr)
4675 {
4676 	*hide_addr = VM_KERNEL_ADDRHIDE(addr);
4677 }
4678 
4679 void
vm_kernel_addrperm_external(vm_offset_t addr,vm_offset_t * perm_addr)4680 vm_kernel_addrperm_external(
4681 	vm_offset_t addr,
4682 	vm_offset_t *perm_addr)
4683 {
4684 	addr = VM_KERNEL_STRIP_UPTR(addr);
4685 
4686 	if (VM_KERNEL_IS_SLID(addr)) {
4687 		*perm_addr = VM_KERNEL_UNSLIDE(addr);
4688 	} else if (VM_KERNEL_ADDRESS(addr)) {
4689 		*perm_addr = ML_ADDRPERM(addr, vm_kernel_addrperm_ext);
4690 	} else {
4691 		*perm_addr = addr;
4692 	}
4693 }
4694 
4695 void
vm_kernel_unslide_or_perm_external(vm_offset_t addr,vm_offset_t * up_addr)4696 vm_kernel_unslide_or_perm_external(
4697 	vm_offset_t addr,
4698 	vm_offset_t *up_addr)
4699 {
4700 	vm_kernel_addrperm_external(addr, up_addr);
4701 }
4702 
4703 void
vm_packing_pointer_invalid(vm_offset_t ptr,vm_packing_params_t params)4704 vm_packing_pointer_invalid(vm_offset_t ptr, vm_packing_params_t params)
4705 {
4706 	if (ptr & ((1ul << params.vmpp_shift) - 1)) {
4707 		panic("pointer %p can't be packed: low %d bits aren't 0",
4708 		    (void *)ptr, params.vmpp_shift);
4709 	} else if (ptr <= params.vmpp_base) {
4710 		panic("pointer %p can't be packed: below base %p",
4711 		    (void *)ptr, (void *)params.vmpp_base);
4712 	} else {
4713 		panic("pointer %p can't be packed: maximum encodable pointer is %p",
4714 		    (void *)ptr, (void *)vm_packing_max_packable(params));
4715 	}
4716 }
4717 
4718 void
vm_packing_verify_range(const char * subsystem,vm_offset_t min_address,vm_offset_t max_address,vm_packing_params_t params)4719 vm_packing_verify_range(
4720 	const char *subsystem,
4721 	vm_offset_t min_address,
4722 	vm_offset_t max_address,
4723 	vm_packing_params_t params)
4724 {
4725 	if (min_address > max_address) {
4726 		panic("%s: %s range invalid min:%p > max:%p",
4727 		    __func__, subsystem, (void *)min_address, (void *)max_address);
4728 	}
4729 
4730 	if (!params.vmpp_base_relative) {
4731 		return;
4732 	}
4733 
4734 	if (min_address <= params.vmpp_base) {
4735 		panic("%s: %s range invalid min:%p <= base:%p",
4736 		    __func__, subsystem, (void *)min_address, (void *)params.vmpp_base);
4737 	}
4738 
4739 	if (max_address > vm_packing_max_packable(params)) {
4740 		panic("%s: %s range invalid max:%p >= max packable:%p",
4741 		    __func__, subsystem, (void *)max_address,
4742 		    (void *)vm_packing_max_packable(params));
4743 	}
4744 }
4745 
4746 #pragma mark tests
4747 #if MACH_ASSERT
4748 #include <sys/errno.h>
4749 
4750 static void
4751 kmem_test_for_entry(
4752 	vm_map_t                map,
4753 	vm_offset_t             addr,
4754 	void                  (^block)(vm_map_entry_t))
4755 {
4756 	vm_map_entry_t entry;
4757 
4758 	vm_map_lock(map);
4759 	block(vm_map_lookup_entry(map, addr, &entry) ? entry : NULL);
4760 	vm_map_unlock(map);
4761 }
4762 
4763 #define kmem_test_assert_map(map, pg, entries) ({ \
4764 	assert3u((map)->size, ==, ptoa(pg)); \
4765 	assert3u((map)->hdr.nentries, ==, entries); \
4766 })
4767 
4768 static bool
can_write_at(vm_offset_t offs,uint32_t page)4769 can_write_at(vm_offset_t offs, uint32_t page)
4770 {
4771 	static const int zero;
4772 
4773 	return verify_write(&zero, (void *)(offs + ptoa(page) + 128), 1) == 0;
4774 }
4775 #define assert_writeable(offs, page) \
4776 	assertf(can_write_at(offs, page), \
4777 	    "can write at %p + ptoa(%d)", (void *)offs, page)
4778 
4779 #define assert_faults(offs, page) \
4780 	assertf(!can_write_at(offs, page), \
4781 	    "can write at %p + ptoa(%d)", (void *)offs, page)
4782 
4783 #define peek(offs, page) \
4784 	(*(uint32_t *)((offs) + ptoa(page)))
4785 
4786 #define poke(offs, page, v) \
4787 	(*(uint32_t *)((offs) + ptoa(page)) = (v))
4788 
4789 #if CONFIG_SPTM
4790 __attribute__((noinline))
4791 static void
kmem_test_verify_type_policy(vm_offset_t addr,kmem_flags_t flags)4792 kmem_test_verify_type_policy(vm_offset_t addr, kmem_flags_t flags)
4793 {
4794 	extern bool use_xnu_restricted;
4795 	pmap_mapping_type_t expected_type = PMAP_MAPPING_TYPE_RESTRICTED;
4796 
4797 	/* Explicitly state the expected policy */
4798 	if (flags & (KMEM_COMPRESSOR | KMEM_DATA_SHARED)) {
4799 		expected_type = PMAP_MAPPING_TYPE_DEFAULT;
4800 	} else if ((flags & KMEM_DATA) &&
4801 	    !kalloc_is_restricted_data_mode_enforced()) {
4802 		expected_type = PMAP_MAPPING_TYPE_DEFAULT;
4803 	}
4804 
4805 	/* If X_K_R is disabled, DEFAULT is the only possible mapping */
4806 	if (!use_xnu_restricted) {
4807 		expected_type = PMAP_MAPPING_TYPE_DEFAULT;
4808 	}
4809 
4810 	/* Verify if derived correctly */
4811 	assert3u(expected_type, ==, __kmem_mapping_type(flags));
4812 
4813 	pmap_paddr_t pa = kvtophys(addr);
4814 	if (pa == 0) {
4815 		return;
4816 	}
4817 
4818 	/* Verify if the mapped address actually got the expected type */
4819 	assert3u(expected_type, ==, sptm_get_frame_type(pa));
4820 }
4821 #endif /* CONFIG_SPTM */
4822 
4823 __attribute__((noinline))
4824 static void
kmem_alloc_basic_test(vm_map_t map)4825 kmem_alloc_basic_test(vm_map_t map)
4826 {
4827 	kmem_guard_t guard = {
4828 		.kmg_tag = VM_KERN_MEMORY_DIAG,
4829 	};
4830 	vm_offset_t addr;
4831 
4832 	/*
4833 	 * Test wired basics:
4834 	 * - KMA_KOBJECT
4835 	 * - KMA_GUARD_FIRST, KMA_GUARD_LAST
4836 	 * - allocation alignment
4837 	 */
4838 	addr = kmem_alloc_guard(map, ptoa(10), ptoa(2) - 1,
4839 	    KMA_KOBJECT | KMA_GUARD_FIRST | KMA_GUARD_LAST, guard).kmr_address;
4840 	assertf(addr != 0ull, "kma(%p, 10p, 0, KO | GF | GL)", map);
4841 	assert3u((addr + PAGE_SIZE) % ptoa(2), ==, 0);
4842 	kmem_test_assert_map(map, 10, 1);
4843 
4844 	kmem_test_for_entry(map, addr, ^(__assert_only vm_map_entry_t e){
4845 		assertf(e, "unable to find address %p in map %p", (void *)addr, map);
4846 		assert(e->vme_kernel_object);
4847 		assert(!e->vme_atomic);
4848 		assert3u(e->vme_start, <=, addr);
4849 		assert3u(addr + ptoa(10), <=, e->vme_end);
4850 	});
4851 
4852 	assert_faults(addr, 0);
4853 	for (int i = 1; i < 9; i++) {
4854 		assert_writeable(addr, i);
4855 	}
4856 	assert_faults(addr, 9);
4857 
4858 	kmem_free(map, addr, ptoa(10));
4859 	kmem_test_assert_map(map, 0, 0);
4860 
4861 	/*
4862 	 * Test pageable basics.
4863 	 */
4864 	addr = kmem_alloc_guard(map, ptoa(10), 0,
4865 	    KMA_PAGEABLE, guard).kmr_address;
4866 	assertf(addr != 0ull, "kma(%p, 10p, 0, KO | PG)", map);
4867 	kmem_test_assert_map(map, 10, 1);
4868 
4869 	for (int i = 0; i < 9; i++) {
4870 		assert_faults(addr, i);
4871 		poke(addr, i, 42);
4872 		assert_writeable(addr, i);
4873 	}
4874 
4875 	kmem_free_guard(map, addr, ptoa(10),
4876 	    KMF_GUARD_FIRST | KMF_GUARD_LAST, guard);
4877 	kmem_test_assert_map(map, 0, 0);
4878 }
4879 
4880 __attribute__((noinline))
4881 static void
kmem_realloc_basic_test(vm_map_t map,kmr_flags_t kind)4882 kmem_realloc_basic_test(vm_map_t map, kmr_flags_t kind)
4883 {
4884 	kmem_guard_t guard = {
4885 		.kmg_atomic  = !(kind & (KMR_DATA | KMR_DATA_SHARED)),
4886 		.kmg_tag     = VM_KERN_MEMORY_DIAG,
4887 		.kmg_context = 0xefface,
4888 	};
4889 	vm_offset_t addr, newaddr;
4890 	const int N = 10;
4891 
4892 	/*
4893 	 *	This isn't something kmem_realloc_guard() _needs_ to do,
4894 	 *	we could conceive an implementation where it grows in place
4895 	 *	if there's space after it.
4896 	 *
4897 	 *	However, this is what the implementation does today.
4898 	 */
4899 	bool realloc_growth_changes_address = true;
4900 	bool GF = (kind & KMR_GUARD_FIRST);
4901 	bool GL = (kind & KMR_GUARD_LAST);
4902 
4903 	/*
4904 	 *	Initial N page allocation
4905 	 */
4906 	addr = kmem_alloc_guard(map, ptoa(N), 0,
4907 	    (kind & ~KMEM_FREEOLD) | KMA_ZERO, guard).kmr_address;
4908 	assert3u(addr, !=, 0);
4909 
4910 	kmem_test_assert_map(map, N, 1);
4911 	for (int pg = GF; pg < N - GL; pg++) {
4912 		poke(addr, pg, 42 + pg);
4913 	}
4914 	for (int pg = N - GL; pg < N; pg++) {
4915 		assert_faults(addr, pg);
4916 	}
4917 
4918 #if CONFIG_SPTM
4919 	kmem_test_verify_type_policy(addr, ANYF(kind));
4920 #endif /* CONFIG_SPTM */
4921 	/*
4922 	 *	Grow to N + 3 pages
4923 	 */
4924 	newaddr = kmem_realloc_guard(map, addr, ptoa(N), ptoa(N + 3),
4925 	    kind | KMR_ZERO, guard).kmr_address;
4926 	assert3u(newaddr, !=, 0);
4927 	if (realloc_growth_changes_address) {
4928 		assert3u(addr, !=, newaddr);
4929 	}
4930 	if ((kind & KMR_FREEOLD) || (addr == newaddr)) {
4931 		kmem_test_assert_map(map, N + 3, 1);
4932 	} else {
4933 		kmem_test_assert_map(map, 2 * N + 3, 2);
4934 	}
4935 	for (int pg = GF; pg < N - GL; pg++) {
4936 		assert3u(peek(newaddr, pg), ==, 42 + pg);
4937 	}
4938 	if ((kind & KMR_FREEOLD) == 0) {
4939 		for (int pg = GF; pg < N - GL; pg++) {
4940 			assert3u(peek(addr, pg), ==, 42 + pg);
4941 		}
4942 		/* check for tru-share */
4943 		poke(addr + 16, 0, 1234);
4944 		assert3u(peek(newaddr + 16, 0), ==, 1234);
4945 		kmem_free_guard(map, addr, ptoa(N),
4946 		    kind & (KMF_TAG | KMF_GUARD_FIRST | KMF_GUARD_LAST), guard);
4947 		kmem_test_assert_map(map, N + 3, 1);
4948 	}
4949 	if (addr != newaddr) {
4950 		for (int pg = GF; pg < N - GL; pg++) {
4951 			assert_faults(addr, pg);
4952 		}
4953 	}
4954 	for (int pg = N - GL; pg < N + 3 - GL; pg++) {
4955 		assert3u(peek(newaddr, pg), ==, 0);
4956 	}
4957 	for (int pg = N + 3 - GL; pg < N + 3; pg++) {
4958 		assert_faults(newaddr, pg);
4959 	}
4960 	addr = newaddr;
4961 
4962 
4963 	/*
4964 	 *	Shrink to N - 2 pages
4965 	 */
4966 	newaddr = kmem_realloc_guard(map, addr, ptoa(N + 3), ptoa(N - 2),
4967 	    kind | KMR_ZERO, guard).kmr_address;
4968 	assert3u(map->size, ==, ptoa(N - 2));
4969 	assert3u(newaddr, ==, addr);
4970 	kmem_test_assert_map(map, N - 2, 1);
4971 
4972 	for (int pg = GF; pg < N - 2 - GL; pg++) {
4973 		assert3u(peek(addr, pg), ==, 42 + pg);
4974 	}
4975 	for (int pg = N - 2 - GL; pg < N + 3; pg++) {
4976 		assert_faults(addr, pg);
4977 	}
4978 
4979 	kmem_free_guard(map, addr, ptoa(N - 2),
4980 	    kind & (KMF_TAG | KMF_GUARD_FIRST | KMF_GUARD_LAST), guard);
4981 	kmem_test_assert_map(map, 0, 0);
4982 }
4983 
4984 static int
kmem_basic_test(__unused int64_t in,int64_t * out)4985 kmem_basic_test(__unused int64_t in, int64_t *out)
4986 {
4987 	mach_vm_offset_t addr;
4988 	vm_map_t map;
4989 
4990 	printf("%s: test running\n", __func__);
4991 
4992 	map = kmem_suballoc(kernel_map, &addr, 64U << 20,
4993 	        VM_MAP_CREATE_DEFAULT, VM_FLAGS_ANYWHERE,
4994 	        KMS_NOFAIL | KMS_DATA_SHARED, VM_KERN_MEMORY_DIAG).kmr_submap;
4995 
4996 	printf("%s: kmem_alloc ...\n", __func__);
4997 	kmem_alloc_basic_test(map);
4998 	printf("%s:     PASS\n", __func__);
4999 
5000 	printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD) ...\n", __func__);
5001 	kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD);
5002 	printf("%s:     PASS\n", __func__);
5003 
5004 	printf("%s: kmem_realloc (KMR_FREEOLD) ...\n", __func__);
5005 	kmem_realloc_basic_test(map, KMR_FREEOLD);
5006 	printf("%s:     PASS\n", __func__);
5007 
5008 	printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST) ...\n", __func__);
5009 	kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST);
5010 	printf("%s:     PASS\n", __func__);
5011 
5012 	printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_LAST) ...\n", __func__);
5013 	kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_LAST);
5014 	printf("%s:     PASS\n", __func__);
5015 
5016 	printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST) ...\n", __func__);
5017 	kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST);
5018 	printf("%s:     PASS\n", __func__);
5019 
5020 	printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_FIRST) ...\n", __func__);
5021 	kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_FIRST);
5022 	printf("%s:     PASS\n", __func__);
5023 
5024 	printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_LAST) ...\n", __func__);
5025 	kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_LAST);
5026 	printf("%s:     PASS\n", __func__);
5027 
5028 	printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST) ...\n", __func__);
5029 	kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST);
5030 	printf("%s:     PASS\n", __func__);
5031 
5032 
5033 	/* using KMR_DATA signals to test the non atomic realloc path */
5034 	printf("%s: kmem_realloc (KMR_DATA | KMR_FREEOLD) ...\n", __func__);
5035 	kmem_realloc_basic_test(map, KMR_DATA | KMR_FREEOLD);
5036 	printf("%s:     PASS\n", __func__);
5037 
5038 	/*
5039 	 * Using KMR_DATA without KMR_FREEOLD violates the
5040 	 * single-mappability of RESTRICTED pages.
5041 	 */
5042 
5043 	/* test KMR_SHARED_DATA for the new shared kheap */
5044 	printf("%s: kmem_realloc (KMR_DATA_SHARED | KMR_FREEOLD) ...\n", __func__);
5045 	kmem_realloc_basic_test(map, KMR_DATA_SHARED | KMR_FREEOLD);
5046 	printf("%s:     PASS\n", __func__);
5047 
5048 	/* test KMR_SHARED_DATA for the new shared kheap */
5049 	printf("%s: kmem_realloc (KMR_DATA_SHARED) ...\n", __func__);
5050 	kmem_realloc_basic_test(map, KMR_DATA_SHARED);
5051 	printf("%s:     PASS\n", __func__);
5052 
5053 	kmem_free_guard(kernel_map, addr, 64U << 20, KMF_NONE, KMEM_GUARD_SUBMAP);
5054 	vm_map_deallocate(map);
5055 
5056 	printf("%s: test passed\n", __func__);
5057 	*out = 1;
5058 	return 0;
5059 }
5060 SYSCTL_TEST_REGISTER(kmem_basic, kmem_basic_test);
5061 
5062 static void
kmem_test_get_size_idx_for_chunks(uint32_t chunks)5063 kmem_test_get_size_idx_for_chunks(uint32_t chunks)
5064 {
5065 	__assert_only uint32_t idx = kmem_get_size_idx_for_chunks(chunks);
5066 
5067 	assert(chunks >= kmem_size_array[idx].ks_num_chunk);
5068 }
5069 
5070 __attribute__((noinline))
5071 static void
kmem_test_get_size_idx_for_all_chunks()5072 kmem_test_get_size_idx_for_all_chunks()
5073 {
5074 	for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) {
5075 		uint32_t chunks = kmem_size_array[i].ks_num_chunk;
5076 
5077 		if (chunks != 1) {
5078 			kmem_test_get_size_idx_for_chunks(chunks - 1);
5079 		}
5080 		kmem_test_get_size_idx_for_chunks(chunks);
5081 		kmem_test_get_size_idx_for_chunks(chunks + 1);
5082 	}
5083 }
5084 
5085 static int
kmem_guard_obj_test(__unused int64_t in,int64_t * out)5086 kmem_guard_obj_test(__unused int64_t in, int64_t *out)
5087 {
5088 	printf("%s: test running\n", __func__);
5089 
5090 	printf("%s: kmem_get_size_idx_for_chunks\n", __func__);
5091 	kmem_test_get_size_idx_for_all_chunks();
5092 	printf("%s:     PASS\n", __func__);
5093 
5094 	printf("%s: test passed\n", __func__);
5095 	*out = 1;
5096 	return 0;
5097 }
5098 SYSCTL_TEST_REGISTER(kmem_guard_obj, kmem_guard_obj_test);
5099 
5100 
5101 #endif /* MACH_ASSERT */
5102