1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_kern.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Kernel memory management.
64 */
65
66 #include <mach/kern_return.h>
67 #include <mach/vm_param.h>
68 #include <kern/assert.h>
69 #include <kern/thread.h>
70 #include <vm/vm_kern_internal.h>
71 #include <vm/vm_map_internal.h>
72 #include <vm/vm_object_internal.h>
73 #include <vm/vm_page_internal.h>
74 #include <vm/vm_compressor_xnu.h>
75 #include <vm/vm_pageout_xnu.h>
76 #include <vm/vm_init_xnu.h>
77 #include <vm/vm_fault.h>
78 #include <vm/vm_memtag.h>
79 #include <kern/misc_protos.h>
80 #include <vm/cpm_internal.h>
81 #include <kern/ledger.h>
82 #include <kern/bits.h>
83 #include <kern/startup.h>
84
85 #include <string.h>
86
87 #include <libkern/OSDebug.h>
88 #include <libkern/crypto/sha2.h>
89 #include <libkern/section_keywords.h>
90 #include <sys/kdebug.h>
91 #include <sys/kdebug_triage.h>
92
93 #include <san/kasan.h>
94 #include <kern/kext_alloc.h>
95 #include <kern/backtrace.h>
96 #include <os/hash.h>
97 #include <kern/zalloc_internal.h>
98 #include <libkern/crypto/rand.h>
99
100 /*
101 * Variables exported by this module.
102 */
103
104 SECURITY_READ_ONLY_LATE(vm_map_t) kernel_map;
105 SECURITY_READ_ONLY_LATE(struct mach_vm_range) kmem_ranges[KMEM_RANGE_COUNT];
106 SECURITY_READ_ONLY_LATE(struct mach_vm_range) kmem_large_ranges[KMEM_RANGE_COUNT];
107
108 static TUNABLE(uint32_t, kmem_ptr_ranges, "kmem_ptr_ranges",
109 KMEM_RANGE_ID_NUM_PTR);
110 #define KMEM_GOBJ_THRESHOLD (32ULL << 20)
111 #if DEBUG || DEVELOPMENT
112 #define KMEM_OUTLIER_LOG_SIZE (16ULL << 10)
113 #define KMEM_OUTLIER_SIZE 0
114 #define KMEM_OUTLIER_ALIGN 1
115 btlog_t kmem_outlier_log;
116 #endif /* DEBUG || DEVELOPMENT */
117
118 __startup_data static vm_map_size_t data_range_size;
119 __startup_data static vm_map_size_t ptr_range_size;
120 __startup_data static vm_map_size_t sprayqtn_range_size;
121
122 #pragma mark helpers
123
124 __attribute__((overloadable))
125 __header_always_inline kmem_flags_t
ANYF(kma_flags_t flags)126 ANYF(kma_flags_t flags)
127 {
128 return (kmem_flags_t)flags;
129 }
130
131 __attribute__((overloadable))
132 __header_always_inline kmem_flags_t
ANYF(kmr_flags_t flags)133 ANYF(kmr_flags_t flags)
134 {
135 return (kmem_flags_t)flags;
136 }
137
138 __attribute__((overloadable))
139 __header_always_inline kmem_flags_t
ANYF(kmf_flags_t flags)140 ANYF(kmf_flags_t flags)
141 {
142 return (kmem_flags_t)flags;
143 }
144
145 __abortlike
146 static void
__kmem_invalid_size_panic(vm_map_t map,vm_size_t size,uint32_t flags)147 __kmem_invalid_size_panic(
148 vm_map_t map,
149 vm_size_t size,
150 uint32_t flags)
151 {
152 panic("kmem(map=%p, flags=0x%x): invalid size %zd",
153 map, flags, (size_t)size);
154 }
155
156 __abortlike
157 static void
__kmem_invalid_arguments_panic(const char * what,vm_map_t map,vm_address_t address,vm_size_t size,uint32_t flags)158 __kmem_invalid_arguments_panic(
159 const char *what,
160 vm_map_t map,
161 vm_address_t address,
162 vm_size_t size,
163 uint32_t flags)
164 {
165 panic("kmem_%s(map=%p, addr=%p, size=%zd, flags=0x%x): "
166 "invalid arguments passed",
167 what, map, (void *)address, (size_t)size, flags);
168 }
169
170 __abortlike
171 static void
__kmem_failed_panic(vm_map_t map,vm_size_t size,uint32_t flags,kern_return_t kr,const char * what)172 __kmem_failed_panic(
173 vm_map_t map,
174 vm_size_t size,
175 uint32_t flags,
176 kern_return_t kr,
177 const char *what)
178 {
179 panic("kmem_%s(%p, %zd, 0x%x): failed with %d",
180 what, map, (size_t)size, flags, kr);
181 }
182
183 __abortlike
184 static void
__kmem_entry_not_found_panic(vm_map_t map,vm_offset_t addr)185 __kmem_entry_not_found_panic(
186 vm_map_t map,
187 vm_offset_t addr)
188 {
189 panic("kmem(map=%p) no entry found at %p", map, (void *)addr);
190 }
191
192 static inline vm_object_t
__kmem_object(kmem_flags_t flags)193 __kmem_object(kmem_flags_t flags)
194 {
195 if (flags & KMEM_COMPRESSOR) {
196 if (flags & KMEM_KOBJECT) {
197 panic("both KMEM_KOBJECT and KMEM_COMPRESSOR specified");
198 }
199 return compressor_object;
200 }
201 if (!(flags & KMEM_KOBJECT)) {
202 panic("KMEM_KOBJECT or KMEM_COMPRESSOR is required");
203 }
204 return kernel_object_default;
205 }
206
207 static inline pmap_mapping_type_t
__kmem_mapping_type(kmem_flags_t flags)208 __kmem_mapping_type(kmem_flags_t flags)
209 {
210 if (flags & (KMEM_DATA | KMEM_COMPRESSOR)) {
211 return PMAP_MAPPING_TYPE_DEFAULT;
212 } else {
213 return PMAP_MAPPING_TYPE_RESTRICTED;
214 }
215 }
216
217 static inline vm_size_t
__kmem_guard_left(kmem_flags_t flags)218 __kmem_guard_left(kmem_flags_t flags)
219 {
220 return (flags & KMEM_GUARD_FIRST) ? PAGE_SIZE : 0;
221 }
222
223 static inline vm_size_t
__kmem_guard_right(kmem_flags_t flags)224 __kmem_guard_right(kmem_flags_t flags)
225 {
226 return (flags & KMEM_GUARD_LAST) ? PAGE_SIZE : 0;
227 }
228
229 static inline vm_size_t
__kmem_guard_size(kmem_flags_t flags)230 __kmem_guard_size(kmem_flags_t flags)
231 {
232 return __kmem_guard_left(flags) + __kmem_guard_right(flags);
233 }
234
235 __pure2
236 static inline vm_size_t
__kmem_entry_orig_size(vm_map_entry_t entry)237 __kmem_entry_orig_size(vm_map_entry_t entry)
238 {
239 vm_object_t object = VME_OBJECT(entry);
240
241 if (entry->vme_kernel_object) {
242 return entry->vme_end - entry->vme_start -
243 entry->vme_object_or_delta;
244 } else {
245 return object->vo_size - object->vo_size_delta;
246 }
247 }
248
249
250 #pragma mark kmem range methods
251
252 #if __arm64__
253 // <rdar://problem/48304934> arm64 doesn't use ldp when I'd expect it to
254 #define mach_vm_range_load(r, r_min, r_max) \
255 asm("ldp %[rmin], %[rmax], [%[range]]" \
256 : [rmin] "=r"(r_min), [rmax] "=r"(r_max) \
257 : [range] "r"(r), "m"((r)->min_address), "m"((r)->max_address))
258 #else
259 #define mach_vm_range_load(r, rmin, rmax) \
260 ({ rmin = (r)->min_address; rmax = (r)->max_address; })
261 #endif
262
263 __abortlike
264 static void
__mach_vm_range_overflow(mach_vm_offset_t addr,mach_vm_offset_t size)265 __mach_vm_range_overflow(
266 mach_vm_offset_t addr,
267 mach_vm_offset_t size)
268 {
269 panic("invalid vm range: [0x%llx, 0x%llx + 0x%llx) wraps around",
270 addr, addr, size);
271 }
272
273 __abortlike
274 static void
__mach_vm_range_invalid(mach_vm_offset_t min_address,mach_vm_offset_t max_address)275 __mach_vm_range_invalid(
276 mach_vm_offset_t min_address,
277 mach_vm_offset_t max_address)
278 {
279 panic("invalid vm range: [0x%llx, 0x%llx) wraps around",
280 min_address, max_address);
281 }
282
283 __header_always_inline mach_vm_size_t
mach_vm_range_size(const struct mach_vm_range * r)284 mach_vm_range_size(const struct mach_vm_range *r)
285 {
286 mach_vm_offset_t rmin, rmax;
287
288 mach_vm_range_load(r, rmin, rmax);
289 return rmax - rmin;
290 }
291
292 __attribute__((overloadable))
293 __header_always_inline bool
mach_vm_range_contains(const struct mach_vm_range * r,mach_vm_offset_t addr)294 mach_vm_range_contains(const struct mach_vm_range *r, mach_vm_offset_t addr)
295 {
296 mach_vm_offset_t rmin, rmax;
297
298 #if CONFIG_KERNEL_TAGGING
299 if (VM_KERNEL_ADDRESS(addr)) {
300 addr = vm_memtag_canonicalize_address(addr);
301 }
302 #endif /* CONFIG_KERNEL_TAGGING */
303
304 /*
305 * The `&` is not a typo: we really expect the check to pass,
306 * so encourage the compiler to eagerly load and test without branches
307 */
308 mach_vm_range_load(r, rmin, rmax);
309 return (addr >= rmin) & (addr < rmax);
310 }
311
312 __attribute__((overloadable))
313 __header_always_inline bool
mach_vm_range_contains(const struct mach_vm_range * r,mach_vm_offset_t addr,mach_vm_offset_t size)314 mach_vm_range_contains(
315 const struct mach_vm_range *r,
316 mach_vm_offset_t addr,
317 mach_vm_offset_t size)
318 {
319 mach_vm_offset_t rmin, rmax;
320
321 #if CONFIG_KERNEL_TAGGING
322 if (VM_KERNEL_ADDRESS(addr)) {
323 addr = vm_memtag_canonicalize_address(addr);
324 }
325 #endif /* CONFIG_KERNEL_TAGGING */
326
327 /*
328 * The `&` is not a typo: we really expect the check to pass,
329 * so encourage the compiler to eagerly load and test without branches
330 */
331 mach_vm_range_load(r, rmin, rmax);
332 return (addr >= rmin) & (addr + size >= rmin) & (addr + size <= rmax);
333 }
334
335 __attribute__((overloadable))
336 __header_always_inline bool
mach_vm_range_intersects(const struct mach_vm_range * r1,const struct mach_vm_range * r2)337 mach_vm_range_intersects(
338 const struct mach_vm_range *r1,
339 const struct mach_vm_range *r2)
340 {
341 mach_vm_offset_t r1_min, r1_max;
342 mach_vm_offset_t r2_min, r2_max;
343
344 mach_vm_range_load(r1, r1_min, r1_max);
345 r2_min = r2->min_address;
346 r2_max = r2->max_address;
347
348 if (r1_min > r1_max) {
349 __mach_vm_range_invalid(r1_min, r1_max);
350 }
351
352 if (r2_min > r2_max) {
353 __mach_vm_range_invalid(r2_min, r2_max);
354 }
355
356 return r1_max > r2_min && r1_min < r2_max;
357 }
358
359 __attribute__((overloadable))
360 __header_always_inline bool
mach_vm_range_intersects(const struct mach_vm_range * r1,mach_vm_offset_t addr,mach_vm_offset_t size)361 mach_vm_range_intersects(
362 const struct mach_vm_range *r1,
363 mach_vm_offset_t addr,
364 mach_vm_offset_t size)
365 {
366 struct mach_vm_range r2;
367
368 addr = VM_KERNEL_STRIP_UPTR(addr);
369 r2.min_address = addr;
370 if (os_add_overflow(addr, size, &r2.max_address)) {
371 __mach_vm_range_overflow(addr, size);
372 }
373
374 return mach_vm_range_intersects(r1, &r2);
375 }
376
377 bool
kmem_range_id_contains(kmem_range_id_t range_id,vm_map_offset_t addr,vm_map_size_t size)378 kmem_range_id_contains(
379 kmem_range_id_t range_id,
380 vm_map_offset_t addr,
381 vm_map_size_t size)
382 {
383 return mach_vm_range_contains(&kmem_ranges[range_id], addr, size);
384 }
385
386 __abortlike
387 static void
kmem_range_invalid_panic(kmem_range_id_t range_id,vm_map_offset_t addr,vm_map_size_t size)388 kmem_range_invalid_panic(
389 kmem_range_id_t range_id,
390 vm_map_offset_t addr,
391 vm_map_size_t size)
392 {
393 const struct mach_vm_range *r = &kmem_ranges[range_id];
394 mach_vm_offset_t rmin, rmax;
395
396 mach_vm_range_load(r, rmin, rmax);
397 if (addr + size < rmin) {
398 panic("addr %p + size %llu overflows %p", (void *)addr, size,
399 (void *)(addr + size));
400 }
401 panic("addr %p + size %llu doesnt fit in one range (id: %u min: %p max: %p)",
402 (void *)addr, size, range_id, (void *)rmin, (void *)rmax);
403 }
404
405 /*
406 * Return whether the entire allocation is contained in the given range
407 */
408 static bool
kmem_range_contains_fully(kmem_range_id_t range_id,vm_map_offset_t addr,vm_map_size_t size)409 kmem_range_contains_fully(
410 kmem_range_id_t range_id,
411 vm_map_offset_t addr,
412 vm_map_size_t size)
413 {
414 const struct mach_vm_range *r = &kmem_ranges[range_id];
415 mach_vm_offset_t rmin, rmax;
416 bool result = false;
417
418 if (VM_KERNEL_ADDRESS(addr)) {
419 addr = vm_memtag_canonicalize_address(addr);
420 }
421
422 /*
423 * The `&` is not a typo: we really expect the check to pass,
424 * so encourage the compiler to eagerly load and test without branches
425 */
426 mach_vm_range_load(r, rmin, rmax);
427 result = (addr >= rmin) & (addr < rmax);
428 if (__improbable(result
429 && ((addr + size < rmin) || (addr + size > rmax)))) {
430 kmem_range_invalid_panic(range_id, addr, size);
431 }
432 return result;
433 }
434
435 vm_map_size_t
kmem_range_id_size(kmem_range_id_t range_id)436 kmem_range_id_size(kmem_range_id_t range_id)
437 {
438 return mach_vm_range_size(&kmem_ranges[range_id]);
439 }
440
441 kmem_range_id_t
kmem_addr_get_range(vm_map_offset_t addr,vm_map_size_t size)442 kmem_addr_get_range(vm_map_offset_t addr, vm_map_size_t size)
443 {
444 kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST;
445
446 for (; range_id < KMEM_RANGE_COUNT; range_id++) {
447 if (kmem_range_contains_fully(range_id, addr, size)) {
448 return range_id;
449 }
450 }
451 return KMEM_RANGE_ID_NONE;
452 }
453
454 bool
kmem_is_ptr_range(vm_map_range_id_t range_id)455 kmem_is_ptr_range(vm_map_range_id_t range_id)
456 {
457 return (range_id >= KMEM_RANGE_ID_FIRST) &&
458 (range_id <= KMEM_RANGE_ID_NUM_PTR);
459 }
460
461 __abortlike
462 static void
kmem_range_invalid_for_overwrite(vm_map_offset_t addr)463 kmem_range_invalid_for_overwrite(vm_map_offset_t addr)
464 {
465 panic("Can't overwrite mappings (addr: %p) in kmem ptr ranges",
466 (void *)addr);
467 }
468
469 mach_vm_range_t
kmem_validate_range_for_overwrite(vm_map_offset_t addr,vm_map_size_t size)470 kmem_validate_range_for_overwrite(
471 vm_map_offset_t addr,
472 vm_map_size_t size)
473 {
474 vm_map_range_id_t range_id = kmem_addr_get_range(addr, size);
475
476 if (kmem_is_ptr_range(range_id)) {
477 kmem_range_invalid_for_overwrite(addr);
478 }
479
480 return &kmem_ranges[range_id];
481 }
482
483
484 #pragma mark entry parameters
485
486
487 __abortlike
488 static void
__kmem_entry_validate_panic(vm_map_t map,vm_map_entry_t entry,vm_offset_t addr,vm_size_t size,uint32_t flags,kmem_guard_t guard)489 __kmem_entry_validate_panic(
490 vm_map_t map,
491 vm_map_entry_t entry,
492 vm_offset_t addr,
493 vm_size_t size,
494 uint32_t flags,
495 kmem_guard_t guard)
496 {
497 const char *what = "???";
498
499 if (entry->vme_atomic != guard.kmg_atomic) {
500 what = "atomicity";
501 } else if (entry->is_sub_map != guard.kmg_submap) {
502 what = "objectness";
503 } else if (addr != entry->vme_start) {
504 what = "left bound";
505 } else if ((flags & KMF_GUESS_SIZE) == 0 && addr + size != entry->vme_end) {
506 what = "right bound";
507 } else if (guard.kmg_context != entry->vme_context) {
508 what = "guard";
509 }
510
511 panic("kmem(map=%p, addr=%p, size=%zd, flags=0x%x): "
512 "entry:%p %s mismatch guard(0x%08x)",
513 map, (void *)addr, size, flags, entry,
514 what, guard.kmg_context);
515 }
516
517 static bool
__kmem_entry_validate_guard(vm_map_entry_t entry,vm_offset_t addr,vm_size_t size,kmem_flags_t flags,kmem_guard_t guard)518 __kmem_entry_validate_guard(
519 vm_map_entry_t entry,
520 vm_offset_t addr,
521 vm_size_t size,
522 kmem_flags_t flags,
523 kmem_guard_t guard)
524 {
525 if (entry->vme_atomic != guard.kmg_atomic) {
526 return false;
527 }
528
529 if (!guard.kmg_atomic) {
530 return true;
531 }
532
533 if (entry->is_sub_map != guard.kmg_submap) {
534 return false;
535 }
536
537 if (addr != entry->vme_start) {
538 return false;
539 }
540
541 if ((flags & KMEM_GUESS_SIZE) == 0 && addr + size != entry->vme_end) {
542 return false;
543 }
544
545 if (!guard.kmg_submap && guard.kmg_context != entry->vme_context) {
546 return false;
547 }
548
549 return true;
550 }
551
552 void
kmem_entry_validate_guard(vm_map_t map,vm_map_entry_t entry,vm_offset_t addr,vm_size_t size,kmem_guard_t guard)553 kmem_entry_validate_guard(
554 vm_map_t map,
555 vm_map_entry_t entry,
556 vm_offset_t addr,
557 vm_size_t size,
558 kmem_guard_t guard)
559 {
560 if (!__kmem_entry_validate_guard(entry, addr, size, KMEM_NONE, guard)) {
561 __kmem_entry_validate_panic(map, entry, addr, size, KMEM_NONE, guard);
562 }
563 }
564
565 __abortlike
566 static void
__kmem_entry_validate_object_panic(vm_map_t map,vm_map_entry_t entry,kmem_flags_t flags)567 __kmem_entry_validate_object_panic(
568 vm_map_t map,
569 vm_map_entry_t entry,
570 kmem_flags_t flags)
571 {
572 const char *what;
573 const char *verb;
574
575 if (entry->is_sub_map) {
576 panic("kmem(map=%p) entry %p is a submap", map, entry);
577 }
578
579 if (flags & KMEM_KOBJECT) {
580 what = "kernel";
581 verb = "isn't";
582 } else if (flags & KMEM_COMPRESSOR) {
583 what = "compressor";
584 verb = "isn't";
585 } else if (entry->vme_kernel_object) {
586 what = "kernel";
587 verb = "is unexpectedly";
588 } else {
589 what = "compressor";
590 verb = "is unexpectedly";
591 }
592
593 panic("kmem(map=%p, flags=0x%x): entry %p %s for the %s object",
594 map, flags, entry, verb, what);
595 }
596
597 static bool
__kmem_entry_validate_object(vm_map_entry_t entry,kmem_flags_t flags)598 __kmem_entry_validate_object(
599 vm_map_entry_t entry,
600 kmem_flags_t flags)
601 {
602 if (entry->is_sub_map) {
603 return false;
604 }
605 if ((bool)(flags & KMEM_KOBJECT) != entry->vme_kernel_object) {
606 return false;
607 }
608
609 return (bool)(flags & KMEM_COMPRESSOR) ==
610 (VME_OBJECT(entry) == compressor_object);
611 }
612
613 vm_size_t
kmem_size_guard(vm_map_t map,vm_offset_t addr,kmem_guard_t guard)614 kmem_size_guard(
615 vm_map_t map,
616 vm_offset_t addr,
617 kmem_guard_t guard)
618 {
619 kmem_flags_t flags = KMEM_GUESS_SIZE;
620 vm_map_entry_t entry;
621 vm_size_t size;
622
623 vm_map_lock_read(map);
624
625 #if KASAN_CLASSIC
626 addr -= PAGE_SIZE;
627 #endif /* KASAN_CLASSIC */
628 addr = vm_memtag_canonicalize_address(addr);
629
630 if (!vm_map_lookup_entry(map, addr, &entry)) {
631 __kmem_entry_not_found_panic(map, addr);
632 }
633
634 if (!__kmem_entry_validate_guard(entry, addr, 0, flags, guard)) {
635 __kmem_entry_validate_panic(map, entry, addr, 0, flags, guard);
636 }
637
638 size = __kmem_entry_orig_size(entry);
639
640 vm_map_unlock_read(map);
641
642 return size;
643 }
644
645 static inline uint16_t
kmem_hash_backtrace(void * fp)646 kmem_hash_backtrace(
647 void *fp)
648 {
649 uint64_t bt_count;
650 uintptr_t bt[8] = {};
651
652 struct backtrace_control ctl = {
653 .btc_frame_addr = (uintptr_t)fp,
654 };
655
656 bt_count = backtrace(bt, sizeof(bt) / sizeof(bt[0]), &ctl, NULL);
657 return (uint16_t) os_hash_jenkins(bt, bt_count * sizeof(bt[0]));
658 }
659
660 static_assert(KMEM_RANGE_ID_DATA - 1 <= KMEM_RANGE_MASK,
661 "Insufficient bits to represent ptr ranges");
662
663 kmem_range_id_t
kmem_adjust_range_id(uint32_t hash)664 kmem_adjust_range_id(
665 uint32_t hash)
666 {
667 return (kmem_range_id_t) (KMEM_RANGE_ID_PTR_0 +
668 (hash & KMEM_RANGE_MASK) % kmem_ptr_ranges);
669 }
670
671 static bool
kmem_use_sprayqtn(kma_flags_t kma_flags,vm_map_size_t map_size,vm_offset_t mask)672 kmem_use_sprayqtn(
673 kma_flags_t kma_flags,
674 vm_map_size_t map_size,
675 vm_offset_t mask)
676 {
677 /*
678 * Pointer allocations that are above the guard objects threshold or have
679 * leading guard pages with non standard alignment requests are redirected
680 * to the sprayqtn range.
681 */
682 #if DEBUG || DEVELOPMENT
683 btref_get_flags_t flags = (kma_flags & KMA_NOPAGEWAIT) ?
684 BTREF_GET_NOWAIT : 0;
685
686 if ((kma_flags & KMA_SPRAYQTN) == 0) {
687 if (map_size > KMEM_GOBJ_THRESHOLD) {
688 btlog_record(kmem_outlier_log, (void *)map_size, KMEM_OUTLIER_SIZE,
689 btref_get(__builtin_frame_address(0), flags));
690 } else if ((kma_flags & KMA_GUARD_FIRST) && (mask > PAGE_MASK)) {
691 btlog_record(kmem_outlier_log, (void *)mask, KMEM_OUTLIER_ALIGN,
692 btref_get(__builtin_frame_address(0), flags));
693 }
694 }
695 #endif /* DEBUG || DEVELOPMENT */
696
697 return (kma_flags & KMA_SPRAYQTN) ||
698 (map_size > KMEM_GOBJ_THRESHOLD) ||
699 ((kma_flags & KMA_GUARD_FIRST) && (mask > PAGE_MASK));
700 }
701
702 static void
kmem_apply_security_policy(vm_map_t map,kma_flags_t kma_flags,kmem_guard_t guard,vm_map_size_t map_size,vm_offset_t mask,vm_map_kernel_flags_t * vmk_flags,bool assert_dir __unused)703 kmem_apply_security_policy(
704 vm_map_t map,
705 kma_flags_t kma_flags,
706 kmem_guard_t guard,
707 vm_map_size_t map_size,
708 vm_offset_t mask,
709 vm_map_kernel_flags_t *vmk_flags,
710 bool assert_dir __unused)
711 {
712 kmem_range_id_t range_id;
713 bool from_right;
714 uint16_t type_hash = guard.kmg_type_hash;
715
716 if (startup_phase < STARTUP_SUB_KMEM || map != kernel_map) {
717 return;
718 }
719
720 /*
721 * A non-zero type-hash must be passed by krealloc_type
722 */
723 #if (DEBUG || DEVELOPMENT)
724 if (assert_dir && !(kma_flags & KMA_DATA)) {
725 assert(type_hash != 0);
726 }
727 #endif
728
729 if (kma_flags & KMA_DATA) {
730 range_id = KMEM_RANGE_ID_DATA;
731 /*
732 * As an optimization in KMA_DATA to avoid fragmentation,
733 * allocate static carveouts at the end of the DATA range.
734 */
735 from_right = (bool)(kma_flags & KMA_PERMANENT);
736 } else if (kmem_use_sprayqtn(kma_flags, map_size, mask)) {
737 range_id = KMEM_RANGE_ID_SPRAYQTN;
738 from_right = (bool)(kma_flags & KMA_PERMANENT);
739 } else if (type_hash) {
740 range_id = (kmem_range_id_t)(type_hash & KMEM_RANGE_MASK);
741 from_right = type_hash & KMEM_DIRECTION_MASK;
742 } else {
743 /*
744 * Range id needs to correspond to one of the PTR ranges
745 */
746 type_hash = (uint16_t) kmem_hash_backtrace(__builtin_frame_address(0));
747 range_id = kmem_adjust_range_id(type_hash);
748 from_right = type_hash & KMEM_DIRECTION_MASK;
749 }
750
751 vmk_flags->vmkf_range_id = range_id;
752 vmk_flags->vmkf_last_free = from_right;
753 }
754
755 #pragma mark allocation
756
757 static kmem_return_t
758 kmem_alloc_guard_internal(
759 vm_map_t map,
760 vm_size_t size,
761 vm_offset_t mask,
762 kma_flags_t flags,
763 kmem_guard_t guard,
764 kern_return_t (^alloc_pages)(vm_size_t, kma_flags_t, vm_page_t *))
765 {
766 vm_object_t object;
767 vm_offset_t delta = 0;
768 vm_map_entry_t entry = NULL;
769 vm_map_offset_t map_addr, fill_start;
770 vm_map_size_t map_size, fill_size;
771 vm_page_t guard_left = VM_PAGE_NULL;
772 vm_page_t guard_right = VM_PAGE_NULL;
773 vm_page_t wired_page_list = VM_PAGE_NULL;
774 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_ANYWHERE();
775 bool skip_guards;
776 kmem_return_t kmr = { };
777
778 assert(kernel_map && map->pmap == kernel_pmap);
779
780 #if DEBUG || DEVELOPMENT
781 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_START,
782 size, 0, 0, 0);
783 #endif
784
785
786 if (size == 0 ||
787 (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) ||
788 (size < __kmem_guard_size(ANYF(flags)))) {
789 __kmem_invalid_size_panic(map, size, flags);
790 }
791
792 /*
793 * limit the size of a single extent of wired memory
794 * to try and limit the damage to the system if
795 * too many pages get wired down
796 * limit raised to 2GB with 128GB max physical limit,
797 * but scaled by installed memory above this
798 *
799 * Note: kmem_alloc_contig_guard() is immune to this check.
800 */
801 if (__improbable(!(flags & (KMA_VAONLY | KMA_PAGEABLE)) &&
802 alloc_pages == NULL &&
803 size > MAX(1ULL << 31, sane_size / 64))) {
804 kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
805 goto out_error;
806 }
807
808 /*
809 * Guard pages:
810 *
811 * Guard pages are implemented as fictitious pages.
812 *
813 * However, some maps, and some objects are known
814 * to manage their memory explicitly, and do not need
815 * those to be materialized, which saves memory.
816 *
817 * By placing guard pages on either end of a stack,
818 * they can help detect cases where a thread walks
819 * off either end of its stack.
820 *
821 * They are allocated and set up here and attempts
822 * to access those pages are trapped in vm_fault_page().
823 *
824 * The map_size we were passed may include extra space for
825 * guard pages. fill_size represents the actual size to populate.
826 * Similarly, fill_start indicates where the actual pages
827 * will begin in the range.
828 */
829
830 map_size = round_page(size);
831 fill_start = 0;
832 fill_size = map_size - __kmem_guard_size(ANYF(flags));
833
834 #if KASAN_CLASSIC
835 if (flags & KMA_KASAN_GUARD) {
836 assert((flags & (KMA_GUARD_FIRST | KMA_GUARD_LAST)) == 0);
837 flags |= KMA_GUARD_FIRST | KMEM_GUARD_LAST;
838 delta = ptoa(2);
839 map_size += delta;
840 }
841 #else
842 (void)delta;
843 #endif /* KASAN_CLASSIC */
844
845 skip_guards = (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) ||
846 map->never_faults;
847
848 if (flags & KMA_GUARD_FIRST) {
849 vmk_flags.vmkf_guard_before = true;
850 fill_start += PAGE_SIZE;
851 }
852 if ((flags & KMA_GUARD_FIRST) && !skip_guards) {
853 guard_left = vm_page_grab_guard((flags & KMA_NOPAGEWAIT) == 0);
854 if (__improbable(guard_left == VM_PAGE_NULL)) {
855 kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
856 goto out_error;
857 }
858 }
859 if ((flags & KMA_GUARD_LAST) && !skip_guards) {
860 guard_right = vm_page_grab_guard((flags & KMA_NOPAGEWAIT) == 0);
861 if (__improbable(guard_right == VM_PAGE_NULL)) {
862 kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
863 goto out_error;
864 }
865 }
866
867 if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
868 if (alloc_pages) {
869 kmr.kmr_return = alloc_pages(fill_size, flags,
870 &wired_page_list);
871 } else {
872 kmr.kmr_return = vm_page_alloc_list(atop(fill_size), flags,
873 &wired_page_list);
874 }
875 if (__improbable(kmr.kmr_return != KERN_SUCCESS)) {
876 goto out_error;
877 }
878 }
879
880 /*
881 * Allocate a new object (if necessary). We must do this before
882 * locking the map, or risk deadlock with the default pager.
883 */
884 if (flags & KMA_KOBJECT) {
885 {
886 object = kernel_object_default;
887 }
888 vm_object_reference(object);
889 } else if (flags & KMA_COMPRESSOR) {
890 object = compressor_object;
891 vm_object_reference(object);
892 } else {
893 object = vm_object_allocate(map_size);
894 vm_object_lock(object);
895 vm_object_set_size(object, map_size, size);
896 /* stabilize the object to prevent shadowing */
897 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
898 VM_OBJECT_SET_TRUE_SHARE(object, TRUE);
899 vm_object_unlock(object);
900 }
901
902 if (flags & KMA_LAST_FREE) {
903 vmk_flags.vmkf_last_free = true;
904 }
905 if (flags & KMA_PERMANENT) {
906 vmk_flags.vmf_permanent = true;
907 }
908 kmem_apply_security_policy(map, flags, guard, map_size, mask, &vmk_flags,
909 false);
910
911 kmr.kmr_return = vm_map_find_space(map, 0, map_size, mask,
912 vmk_flags, &entry);
913 if (__improbable(KERN_SUCCESS != kmr.kmr_return)) {
914 vm_object_deallocate(object);
915 goto out_error;
916 }
917
918 map_addr = entry->vme_start;
919 VME_OBJECT_SET(entry, object, guard.kmg_atomic, guard.kmg_context);
920 VME_ALIAS_SET(entry, guard.kmg_tag);
921 if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
922 VME_OFFSET_SET(entry, map_addr);
923 }
924
925 #if KASAN
926 if ((flags & KMA_KOBJECT) && guard.kmg_atomic) {
927 entry->vme_object_or_delta = (-size & PAGE_MASK) + delta;
928 }
929 #endif /* KASAN */
930
931 if (!(flags & (KMA_COMPRESSOR | KMA_PAGEABLE))) {
932 entry->wired_count = 1;
933 vme_btref_consider_and_set(entry, __builtin_frame_address(0));
934 }
935
936 if (guard_left || guard_right || wired_page_list) {
937 vm_object_offset_t offset = 0ull;
938
939 vm_object_lock(object);
940 vm_map_unlock(map);
941
942 if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
943 offset = map_addr;
944 }
945
946 if (guard_left) {
947 vm_page_insert(guard_left, object, offset);
948 guard_left->vmp_busy = FALSE;
949 guard_left = VM_PAGE_NULL;
950 }
951
952 if (guard_right) {
953 vm_page_insert(guard_right, object,
954 offset + fill_start + fill_size);
955 guard_right->vmp_busy = FALSE;
956 guard_right = VM_PAGE_NULL;
957 }
958
959 if (wired_page_list) {
960 kernel_memory_populate_object_and_unlock(object,
961 map_addr + fill_start, offset + fill_start, fill_size,
962 wired_page_list, flags, guard.kmg_tag, VM_PROT_DEFAULT,
963 __kmem_mapping_type(ANYF(flags)));
964 } else {
965 vm_object_unlock(object);
966 }
967 } else {
968 vm_map_unlock(map);
969 }
970
971 /*
972 * now that the pages are wired, we no longer have to fear coalesce
973 */
974 if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
975 vm_map_simplify(map, map_addr);
976 }
977
978 #if DEBUG || DEVELOPMENT
979 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
980 atop(fill_size), 0, 0, 0);
981 #endif /* DEBUG || DEVELOPMENT */
982 kmr.kmr_address = CAST_DOWN(vm_offset_t, map_addr);
983
984 #if KASAN
985 if (flags & (KMA_KASAN_GUARD | KMA_PAGEABLE)) {
986 /*
987 * We need to allow the range for pageable memory,
988 * or faulting will not be allowed.
989 */
990 kasan_notify_address(map_addr, map_size);
991 }
992 #endif /* KASAN */
993 #if KASAN_CLASSIC
994 if (flags & KMA_KASAN_GUARD) {
995 kmr.kmr_address += PAGE_SIZE;
996 kasan_alloc_large(kmr.kmr_address, size);
997 }
998 #endif /* KASAN_CLASSIC */
999 #if CONFIG_KERNEL_TAGGING
1000 if (!(flags & KMA_VAONLY) && (flags & KMA_TAG)) {
1001 kmr.kmr_address = vm_memtag_assign_tag(kmr.kmr_address, size);
1002 vm_memtag_set_tag((vm_offset_t)kmr.kmr_address, size);
1003 #if KASAN_TBI
1004 kasan_tbi_retag_unused_space((vm_offset_t)kmr.kmr_address, map_size, size);
1005 #endif /* KASAN_TBI */
1006 }
1007 #endif /* CONFIG_KERNEL_TAGGING */
1008 return kmr;
1009
1010 out_error:
1011 if (flags & KMA_NOFAIL) {
1012 __kmem_failed_panic(map, size, flags, kmr.kmr_return, "alloc");
1013 }
1014 if (guard_left) {
1015 guard_left->vmp_snext = wired_page_list;
1016 wired_page_list = guard_left;
1017 }
1018 if (guard_right) {
1019 guard_right->vmp_snext = wired_page_list;
1020 wired_page_list = guard_right;
1021 }
1022 if (wired_page_list) {
1023 vm_page_free_list(wired_page_list, FALSE);
1024 }
1025
1026 #if DEBUG || DEVELOPMENT
1027 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
1028 0, 0, 0, 0);
1029 #endif /* DEBUG || DEVELOPMENT */
1030
1031 return kmr;
1032 }
1033
1034 kmem_return_t
kmem_alloc_guard(vm_map_t map,vm_size_t size,vm_offset_t mask,kma_flags_t flags,kmem_guard_t guard)1035 kmem_alloc_guard(
1036 vm_map_t map,
1037 vm_size_t size,
1038 vm_offset_t mask,
1039 kma_flags_t flags,
1040 kmem_guard_t guard)
1041 {
1042 return kmem_alloc_guard_internal(map, size, mask, flags, guard, NULL);
1043 }
1044
1045 kmem_return_t
kmem_alloc_contig_guard(vm_map_t map,vm_size_t size,vm_offset_t mask,ppnum_t max_pnum,ppnum_t pnum_mask,kma_flags_t flags,kmem_guard_t guard)1046 kmem_alloc_contig_guard(
1047 vm_map_t map,
1048 vm_size_t size,
1049 vm_offset_t mask,
1050 ppnum_t max_pnum,
1051 ppnum_t pnum_mask,
1052 kma_flags_t flags,
1053 kmem_guard_t guard)
1054 {
1055 __auto_type alloc_pages = ^(vm_size_t fill_size, kma_flags_t kma_flags, vm_page_t *pages) {
1056 return cpm_allocate(fill_size, pages, max_pnum, pnum_mask, FALSE, kma_flags);
1057 };
1058
1059 return kmem_alloc_guard_internal(map, size, mask, flags, guard, alloc_pages);
1060 }
1061
1062 kmem_return_t
kmem_suballoc(vm_map_t parent,mach_vm_offset_t * addr,vm_size_t size,vm_map_create_options_t vmc_options,int vm_flags,kms_flags_t flags,vm_tag_t tag)1063 kmem_suballoc(
1064 vm_map_t parent,
1065 mach_vm_offset_t *addr,
1066 vm_size_t size,
1067 vm_map_create_options_t vmc_options,
1068 int vm_flags,
1069 kms_flags_t flags,
1070 vm_tag_t tag)
1071 {
1072 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1073 vm_map_offset_t map_addr = 0;
1074 kmem_return_t kmr = { };
1075 vm_map_t map;
1076
1077 assert(page_aligned(size));
1078 assert(parent->pmap == kernel_pmap);
1079
1080 vm_map_kernel_flags_set_vmflags(&vmk_flags, vm_flags, tag);
1081
1082 if (parent == kernel_map) {
1083 assert(vmk_flags.vmf_overwrite || (flags & KMS_DATA));
1084 }
1085
1086 if (vmk_flags.vmf_fixed) {
1087 map_addr = trunc_page(*addr);
1088 }
1089
1090 pmap_reference(vm_map_pmap(parent));
1091 map = vm_map_create_options(vm_map_pmap(parent), 0, size, vmc_options);
1092
1093 /*
1094 * 1. vm_map_enter() will consume one ref on success.
1095 *
1096 * 2. make the entry atomic as kernel submaps should never be split.
1097 *
1098 * 3. instruct vm_map_enter() that it is a fresh submap
1099 * that needs to be taught its bounds as it inserted.
1100 */
1101 vm_map_reference(map);
1102
1103 vmk_flags.vmkf_submap = true;
1104 if ((flags & KMS_DATA) == 0) {
1105 /* FIXME: IOKit submaps get fragmented and can't be atomic */
1106 vmk_flags.vmkf_submap_atomic = true;
1107 }
1108 vmk_flags.vmkf_submap_adjust = true;
1109 if (flags & KMS_LAST_FREE) {
1110 vmk_flags.vmkf_last_free = true;
1111 }
1112 if (flags & KMS_PERMANENT) {
1113 vmk_flags.vmf_permanent = true;
1114 }
1115 if (flags & KMS_DATA) {
1116 vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
1117 }
1118
1119 kmr.kmr_return = vm_map_enter(parent, &map_addr, size, 0,
1120 vmk_flags, (vm_object_t)map, 0, FALSE,
1121 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1122
1123 if (kmr.kmr_return != KERN_SUCCESS) {
1124 if (flags & KMS_NOFAIL) {
1125 panic("kmem_suballoc(map=%p, size=%zd) failed with %d",
1126 parent, size, kmr.kmr_return);
1127 }
1128 assert(os_ref_get_count_raw(&map->map_refcnt) == 2);
1129 vm_map_deallocate(map);
1130 vm_map_deallocate(map); /* also removes ref to pmap */
1131 return kmr;
1132 }
1133
1134 /*
1135 * For kmem_suballocs that register a claim and are assigned a range, ensure
1136 * that the exact same range is returned.
1137 */
1138 if (*addr != 0 && parent == kernel_map &&
1139 startup_phase > STARTUP_SUB_KMEM) {
1140 assert(CAST_DOWN(vm_offset_t, map_addr) == *addr);
1141 } else {
1142 *addr = map_addr;
1143 }
1144
1145 kmr.kmr_submap = map;
1146 return kmr;
1147 }
1148
1149 /*
1150 * kmem_alloc:
1151 *
1152 * Allocate wired-down memory in the kernel's address map
1153 * or a submap. The memory is not zero-filled.
1154 */
1155
1156 __exported kern_return_t
1157 kmem_alloc_external(
1158 vm_map_t map,
1159 vm_offset_t *addrp,
1160 vm_size_t size);
1161 kern_return_t
kmem_alloc_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1162 kmem_alloc_external(
1163 vm_map_t map,
1164 vm_offset_t *addrp,
1165 vm_size_t size)
1166 {
1167 if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
1168 return kmem_alloc(map, addrp, size, KMA_NONE, vm_tag_bt());
1169 }
1170 /* Maintain ABI compatibility: invalid sizes used to be allowed */
1171 return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
1172 }
1173
1174
1175 /*
1176 * kmem_alloc_kobject:
1177 *
1178 * Allocate wired-down memory in the kernel's address map
1179 * or a submap. The memory is not zero-filled.
1180 *
1181 * The memory is allocated in the kernel_object.
1182 * It may not be copied with vm_map_copy, and
1183 * it may not be reallocated with kmem_realloc.
1184 */
1185
1186 __exported kern_return_t
1187 kmem_alloc_kobject_external(
1188 vm_map_t map,
1189 vm_offset_t *addrp,
1190 vm_size_t size);
1191 kern_return_t
kmem_alloc_kobject_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1192 kmem_alloc_kobject_external(
1193 vm_map_t map,
1194 vm_offset_t *addrp,
1195 vm_size_t size)
1196 {
1197 if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
1198 return kmem_alloc(map, addrp, size, KMA_KOBJECT, vm_tag_bt());
1199 }
1200 /* Maintain ABI compatibility: invalid sizes used to be allowed */
1201 return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
1202 }
1203
1204 /*
1205 * kmem_alloc_pageable:
1206 *
1207 * Allocate pageable memory in the kernel's address map.
1208 */
1209
1210 __exported kern_return_t
1211 kmem_alloc_pageable_external(
1212 vm_map_t map,
1213 vm_offset_t *addrp,
1214 vm_size_t size);
1215 kern_return_t
kmem_alloc_pageable_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1216 kmem_alloc_pageable_external(
1217 vm_map_t map,
1218 vm_offset_t *addrp,
1219 vm_size_t size)
1220 {
1221 if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
1222 return kmem_alloc(map, addrp, size, KMA_PAGEABLE | KMA_DATA, vm_tag_bt());
1223 }
1224 /* Maintain ABI compatibility: invalid sizes used to be allowed */
1225 return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
1226 }
1227
1228 static __attribute__((always_inline, warn_unused_result))
1229 kern_return_t
mach_vm_allocate_kernel_sanitize(vm_map_t map,mach_vm_offset_ut addr_u,mach_vm_size_ut size_u,vm_map_kernel_flags_t vmk_flags,vm_map_offset_t * map_addr,vm_map_size_t * map_size)1230 mach_vm_allocate_kernel_sanitize(
1231 vm_map_t map,
1232 mach_vm_offset_ut addr_u,
1233 mach_vm_size_ut size_u,
1234 vm_map_kernel_flags_t vmk_flags,
1235 vm_map_offset_t *map_addr,
1236 vm_map_size_t *map_size)
1237 {
1238 kern_return_t result;
1239 vm_map_offset_t map_end;
1240
1241 if (vmk_flags.vmf_fixed) {
1242 result = vm_sanitize_addr_size(addr_u, size_u,
1243 VM_SANITIZE_CALLER_VM_ALLOCATE_FIXED,
1244 map,
1245 VM_SANITIZE_FLAGS_SIZE_ZERO_SUCCEEDS | VM_SANITIZE_FLAGS_REALIGN_START,
1246 map_addr, &map_end, map_size);
1247 if (__improbable(result != KERN_SUCCESS)) {
1248 return result;
1249 }
1250 } else {
1251 *map_addr = 0;
1252 result = vm_sanitize_size(0, size_u,
1253 VM_SANITIZE_CALLER_VM_ALLOCATE_ANYWHERE, map,
1254 VM_SANITIZE_FLAGS_SIZE_ZERO_SUCCEEDS,
1255 map_size);
1256 if (__improbable(result != KERN_SUCCESS)) {
1257 return result;
1258 }
1259 }
1260
1261 return KERN_SUCCESS;
1262 }
1263
1264 kern_return_t
mach_vm_allocate_kernel(vm_map_t map,mach_vm_offset_ut * addr_u,mach_vm_size_ut size_u,vm_map_kernel_flags_t vmk_flags)1265 mach_vm_allocate_kernel(
1266 vm_map_t map,
1267 mach_vm_offset_ut *addr_u,
1268 mach_vm_size_ut size_u,
1269 vm_map_kernel_flags_t vmk_flags)
1270 {
1271 vm_map_offset_t map_addr;
1272 vm_map_size_t map_size;
1273 kern_return_t result;
1274
1275 if (map == VM_MAP_NULL) {
1276 ktriage_record(thread_tid(current_thread()),
1277 KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
1278 KDBG_TRIAGE_RESERVED,
1279 KDBG_TRIAGE_VM_ALLOCATE_KERNEL_BADMAP_ERROR),
1280 KERN_INVALID_ARGUMENT /* arg */);
1281 return KERN_INVALID_ARGUMENT;
1282 }
1283
1284 if (!vm_map_kernel_flags_check_vm_and_kflags(vmk_flags,
1285 VM_FLAGS_USER_ALLOCATE)) {
1286 return KERN_INVALID_ARGUMENT;
1287 }
1288
1289 result = mach_vm_allocate_kernel_sanitize(map,
1290 *addr_u,
1291 size_u,
1292 vmk_flags,
1293 &map_addr,
1294 &map_size);
1295 if (__improbable(result != KERN_SUCCESS)) {
1296 result = vm_sanitize_get_kr(result);
1297 if (result == KERN_SUCCESS) {
1298 *addr_u = vm_sanitize_wrap_addr(0);
1299 } else {
1300 ktriage_record(thread_tid(current_thread()),
1301 KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
1302 KDBG_TRIAGE_RESERVED,
1303 KDBG_TRIAGE_VM_ALLOCATE_KERNEL_BADSIZE_ERROR),
1304 KERN_INVALID_ARGUMENT /* arg */);
1305 }
1306 return result;
1307 }
1308
1309 vm_map_kernel_flags_update_range_id(&vmk_flags, map, map_size);
1310
1311 result = vm_map_enter(
1312 map,
1313 &map_addr,
1314 map_size,
1315 (vm_map_offset_t)0,
1316 vmk_flags,
1317 VM_OBJECT_NULL,
1318 (vm_object_offset_t)0,
1319 FALSE,
1320 VM_PROT_DEFAULT,
1321 VM_PROT_ALL,
1322 VM_INHERIT_DEFAULT);
1323
1324 if (result == KERN_SUCCESS) {
1325 #if KASAN
1326 if (map->pmap == kernel_pmap) {
1327 kasan_notify_address(map_addr, map_size);
1328 }
1329 #endif
1330 *addr_u = vm_sanitize_wrap_addr(map_addr);
1331 } else {
1332 ktriage_record(thread_tid(current_thread()),
1333 KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
1334 KDBG_TRIAGE_RESERVED,
1335 KDBG_TRIAGE_VM_ALLOCATE_KERNEL_VMMAPENTER_ERROR),
1336 result /* arg */);
1337 }
1338 return result;
1339 }
1340
1341 #pragma mark population
1342
1343 static void
kernel_memory_populate_pmap_enter(vm_object_t object,vm_address_t addr,vm_object_offset_t offset,vm_page_t mem,vm_prot_t prot,int pe_flags,pmap_mapping_type_t mapping_type)1344 kernel_memory_populate_pmap_enter(
1345 vm_object_t object,
1346 vm_address_t addr,
1347 vm_object_offset_t offset,
1348 vm_page_t mem,
1349 vm_prot_t prot,
1350 int pe_flags,
1351 pmap_mapping_type_t mapping_type)
1352 {
1353 kern_return_t pe_result;
1354 int pe_options;
1355
1356 if (VMP_ERROR_GET(mem)) {
1357 panic("VM page %p should not have an error", mem);
1358 }
1359
1360 pe_options = PMAP_OPTIONS_NOWAIT;
1361 if (object->internal) {
1362 pe_options |= PMAP_OPTIONS_INTERNAL;
1363 }
1364 if (mem->vmp_reusable || object->all_reusable) {
1365 pe_options |= PMAP_OPTIONS_REUSABLE;
1366 }
1367
1368 pe_result = pmap_enter_options(kernel_pmap, addr + offset,
1369 VM_PAGE_GET_PHYS_PAGE(mem), prot, VM_PROT_NONE,
1370 pe_flags, /* wired */ TRUE, pe_options, NULL, mapping_type);
1371
1372 if (pe_result == KERN_RESOURCE_SHORTAGE) {
1373 vm_object_unlock(object);
1374
1375 pe_options &= ~PMAP_OPTIONS_NOWAIT;
1376
1377 pe_result = pmap_enter_options(kernel_pmap, addr + offset,
1378 VM_PAGE_GET_PHYS_PAGE(mem), prot, VM_PROT_NONE,
1379 pe_flags, /* wired */ TRUE, pe_options, NULL, mapping_type);
1380
1381 vm_object_lock(object);
1382 }
1383
1384 assert(pe_result == KERN_SUCCESS);
1385 }
1386
1387 void
kernel_memory_populate_object_and_unlock(vm_object_t object,vm_address_t addr,vm_offset_t offset,vm_size_t size,vm_page_t page_list,kma_flags_t flags,vm_tag_t tag,vm_prot_t prot,pmap_mapping_type_t mapping_type)1388 kernel_memory_populate_object_and_unlock(
1389 vm_object_t object, /* must be locked */
1390 vm_address_t addr,
1391 vm_offset_t offset,
1392 vm_size_t size,
1393 vm_page_t page_list,
1394 kma_flags_t flags,
1395 vm_tag_t tag,
1396 vm_prot_t prot,
1397 pmap_mapping_type_t mapping_type)
1398 {
1399 vm_page_t mem;
1400 int pe_flags;
1401 bool gobbled_list = page_list && page_list->vmp_gobbled;
1402
1403 assert(((flags & KMA_KOBJECT) != 0) == (is_kernel_object(object) != 0));
1404 assert3u((bool)(flags & KMA_COMPRESSOR), ==, object == compressor_object);
1405
1406
1407 if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
1408 assert3u(offset, ==, addr);
1409 } else {
1410 /*
1411 * kernel_memory_populate_pmap_enter() might drop the object
1412 * lock, and the caller might not own a reference anymore
1413 * and rely on holding the vm object lock for liveness.
1414 */
1415 vm_object_reference_locked(object);
1416 }
1417
1418 if (flags & KMA_KSTACK) {
1419 pe_flags = VM_MEM_STACK;
1420 } else {
1421 pe_flags = 0;
1422 }
1423
1424
1425 for (vm_object_offset_t pg_offset = 0;
1426 pg_offset < size;
1427 pg_offset += PAGE_SIZE_64) {
1428 if (page_list == NULL) {
1429 panic("%s: page_list too short", __func__);
1430 }
1431
1432 mem = page_list;
1433 page_list = mem->vmp_snext;
1434 mem->vmp_snext = NULL;
1435
1436 assert(mem->vmp_wire_count == 0);
1437 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
1438 assert(!mem->vmp_fictitious && !mem->vmp_private);
1439
1440 if (flags & KMA_COMPRESSOR) {
1441 mem->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
1442 /*
1443 * Background processes doing I/O accounting can call
1444 * into NVME driver to do some work which results in
1445 * an allocation here and so we want to make sure
1446 * that the pages used by compressor, regardless of
1447 * process context, are never on the special Q.
1448 */
1449 mem->vmp_on_specialq = VM_PAGE_SPECIAL_Q_EMPTY;
1450
1451 vm_page_insert(mem, object, offset + pg_offset);
1452 } else {
1453 mem->vmp_q_state = VM_PAGE_IS_WIRED;
1454 mem->vmp_wire_count = 1;
1455
1456 vm_page_insert_wired(mem, object, offset + pg_offset, tag);
1457 }
1458
1459 mem->vmp_gobbled = false;
1460 mem->vmp_busy = false;
1461 mem->vmp_pmapped = true;
1462 mem->vmp_wpmapped = true;
1463
1464 /*
1465 * Manual PMAP_ENTER_OPTIONS() with shortcuts
1466 * for the kernel and compressor objects.
1467 */
1468 kernel_memory_populate_pmap_enter(object, addr, pg_offset,
1469 mem, prot, pe_flags, mapping_type);
1470
1471 if (flags & KMA_NOENCRYPT) {
1472 pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
1473 }
1474 }
1475
1476 if (page_list) {
1477 panic("%s: page_list too long", __func__);
1478 }
1479
1480 vm_object_unlock(object);
1481 if ((flags & (KMA_KOBJECT | KMA_COMPRESSOR)) == 0) {
1482 vm_object_deallocate(object);
1483 }
1484
1485 /*
1486 * Update the accounting:
1487 * - the compressor "wired" pages don't really count as wired
1488 * - kmem_alloc_contig_guard() gives gobbled pages,
1489 * which already count as wired but need to be ungobbled.
1490 */
1491 if (gobbled_list) {
1492 vm_page_lockspin_queues();
1493 if (flags & KMA_COMPRESSOR) {
1494 vm_page_wire_count -= atop(size);
1495 }
1496 vm_page_gobble_count -= atop(size);
1497 vm_page_unlock_queues();
1498 } else if ((flags & KMA_COMPRESSOR) == 0) {
1499 vm_page_lockspin_queues();
1500 vm_page_wire_count += atop(size);
1501 vm_page_unlock_queues();
1502 }
1503
1504 if (flags & KMA_KOBJECT) {
1505 /* vm_page_insert_wired() handles regular objects already */
1506 vm_tag_update_size(tag, size, NULL);
1507 }
1508
1509 #if KASAN
1510 if (flags & KMA_COMPRESSOR) {
1511 kasan_notify_address_nopoison(addr, size);
1512 } else {
1513 kasan_notify_address(addr, size);
1514 }
1515 #endif /* KASAN */
1516 }
1517
1518
1519 kern_return_t
kernel_memory_populate(vm_offset_t addr,vm_size_t size,kma_flags_t flags,vm_tag_t tag)1520 kernel_memory_populate(
1521 vm_offset_t addr,
1522 vm_size_t size,
1523 kma_flags_t flags,
1524 vm_tag_t tag)
1525 {
1526 kern_return_t kr = KERN_SUCCESS;
1527 vm_page_t page_list = NULL;
1528 vm_size_t page_count = atop_64(size);
1529 vm_object_t object = __kmem_object(ANYF(flags));
1530
1531 #if DEBUG || DEVELOPMENT
1532 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_START,
1533 size, 0, 0, 0);
1534 #endif /* DEBUG || DEVELOPMENT */
1535
1536
1537 kr = vm_page_alloc_list(page_count, flags, &page_list);
1538 if (kr == KERN_SUCCESS) {
1539 vm_object_lock(object);
1540 kernel_memory_populate_object_and_unlock(object, addr,
1541 addr, size, page_list, flags, tag, VM_PROT_DEFAULT,
1542 __kmem_mapping_type(ANYF(flags)));
1543 }
1544
1545 #if DEBUG || DEVELOPMENT
1546 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
1547 page_count, 0, 0, 0);
1548 #endif /* DEBUG || DEVELOPMENT */
1549 return kr;
1550 }
1551
1552 void
kernel_memory_depopulate(vm_offset_t addr,vm_size_t size,kma_flags_t flags,vm_tag_t tag)1553 kernel_memory_depopulate(
1554 vm_offset_t addr,
1555 vm_size_t size,
1556 kma_flags_t flags,
1557 vm_tag_t tag)
1558 {
1559 vm_object_t object = __kmem_object(ANYF(flags));
1560 vm_object_offset_t offset = addr;
1561 vm_page_t mem;
1562 vm_page_t local_freeq = NULL;
1563 unsigned int pages_unwired = 0;
1564
1565 vm_object_lock(object);
1566
1567 pmap_protect(kernel_pmap, offset, offset + size, VM_PROT_NONE);
1568
1569 for (vm_object_offset_t pg_offset = 0;
1570 pg_offset < size;
1571 pg_offset += PAGE_SIZE_64) {
1572 mem = vm_page_lookup(object, offset + pg_offset);
1573
1574 assert(mem);
1575
1576 if (flags & KMA_COMPRESSOR) {
1577 assert(mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR);
1578 } else {
1579 assert(mem->vmp_q_state == VM_PAGE_IS_WIRED);
1580 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
1581 pages_unwired++;
1582 }
1583
1584 mem->vmp_busy = TRUE;
1585
1586 assert(mem->vmp_tabled);
1587 vm_page_remove(mem, TRUE);
1588 assert(mem->vmp_busy);
1589
1590 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
1591
1592 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
1593 mem->vmp_snext = local_freeq;
1594 local_freeq = mem;
1595 }
1596
1597 vm_object_unlock(object);
1598
1599 vm_page_free_list(local_freeq, TRUE);
1600
1601 if (!(flags & KMA_COMPRESSOR)) {
1602 vm_page_lockspin_queues();
1603 vm_page_wire_count -= pages_unwired;
1604 vm_page_unlock_queues();
1605 }
1606
1607 if (flags & KMA_KOBJECT) {
1608 /* vm_page_remove() handles regular objects already */
1609 vm_tag_update_size(tag, -ptoa_64(pages_unwired), NULL);
1610 }
1611 }
1612
1613 #pragma mark reallocation
1614
1615 __abortlike
1616 static void
__kmem_realloc_invalid_object_size_panic(vm_map_t map,vm_address_t address,vm_size_t size,vm_map_entry_t entry)1617 __kmem_realloc_invalid_object_size_panic(
1618 vm_map_t map,
1619 vm_address_t address,
1620 vm_size_t size,
1621 vm_map_entry_t entry)
1622 {
1623 vm_object_t object = VME_OBJECT(entry);
1624 vm_size_t objsize = __kmem_entry_orig_size(entry);
1625
1626 panic("kmem_realloc(map=%p, addr=%p, size=%zd, entry=%p): "
1627 "object %p has unexpected size %ld",
1628 map, (void *)address, (size_t)size, entry, object, objsize);
1629 }
1630
1631 __abortlike
1632 static void
__kmem_realloc_invalid_pager_panic(vm_map_t map,vm_address_t address,vm_size_t size,vm_map_entry_t entry)1633 __kmem_realloc_invalid_pager_panic(
1634 vm_map_t map,
1635 vm_address_t address,
1636 vm_size_t size,
1637 vm_map_entry_t entry)
1638 {
1639 vm_object_t object = VME_OBJECT(entry);
1640 memory_object_t pager = object->pager;
1641 bool pager_created = object->pager_created;
1642 bool pager_initialized = object->pager_initialized;
1643 bool pager_ready = object->pager_ready;
1644
1645 panic("kmem_realloc(map=%p, addr=%p, size=%zd, entry=%p): "
1646 "object %p has unexpected pager %p (%d,%d,%d)",
1647 map, (void *)address, (size_t)size, entry, object,
1648 pager, pager_created, pager_initialized, pager_ready);
1649 }
1650
1651 static kmem_return_t
kmem_realloc_shrink_guard(vm_map_t map,vm_offset_t req_oldaddr,vm_size_t req_oldsize,vm_size_t req_newsize,kmr_flags_t flags,kmem_guard_t guard,vm_map_entry_t entry)1652 kmem_realloc_shrink_guard(
1653 vm_map_t map,
1654 vm_offset_t req_oldaddr,
1655 vm_size_t req_oldsize,
1656 vm_size_t req_newsize,
1657 kmr_flags_t flags,
1658 kmem_guard_t guard,
1659 vm_map_entry_t entry)
1660 {
1661 vmr_flags_t vmr_flags = VM_MAP_REMOVE_KUNWIRE;
1662 vm_object_t object;
1663 vm_offset_t delta = 0;
1664 kmem_return_t kmr;
1665 bool was_atomic;
1666 vm_size_t oldsize = round_page(req_oldsize);
1667 vm_size_t newsize = round_page(req_newsize);
1668 vm_address_t oldaddr = req_oldaddr;
1669
1670 #if KASAN_CLASSIC
1671 if (flags & KMR_KASAN_GUARD) {
1672 assert((flags & (KMR_GUARD_FIRST | KMR_GUARD_LAST)) == 0);
1673 flags |= KMR_GUARD_FIRST | KMR_GUARD_LAST;
1674 oldaddr -= PAGE_SIZE;
1675 delta = ptoa(2);
1676 oldsize += delta;
1677 newsize += delta;
1678 }
1679 #endif /* KASAN_CLASSIC */
1680
1681 if (flags & KMR_TAG) {
1682 oldaddr = vm_memtag_canonicalize_address(req_oldaddr);
1683 }
1684
1685 vm_map_lock_assert_exclusive(map);
1686
1687 if ((flags & KMR_KOBJECT) == 0) {
1688 object = VME_OBJECT(entry);
1689 vm_object_reference(object);
1690 }
1691
1692 /*
1693 * Shrinking an atomic entry starts with splitting it,
1694 * and removing the second half.
1695 */
1696 was_atomic = entry->vme_atomic;
1697 entry->vme_atomic = false;
1698 vm_map_clip_end(map, entry, entry->vme_start + newsize);
1699 entry->vme_atomic = was_atomic;
1700
1701 #if KASAN
1702 if (entry->vme_kernel_object && was_atomic) {
1703 entry->vme_object_or_delta = (-req_newsize & PAGE_MASK) + delta;
1704 }
1705 #if KASAN_CLASSIC
1706 if (flags & KMR_KASAN_GUARD) {
1707 kasan_poison_range(oldaddr + newsize, oldsize - newsize,
1708 ASAN_VALID);
1709 }
1710 #endif
1711 #if KASAN_TBI
1712 if (flags & KMR_TAG) {
1713 kasan_tbi_mark_free_space(req_oldaddr + newsize, oldsize - newsize);
1714 }
1715 #endif /* KASAN_TBI */
1716 #endif /* KASAN */
1717 (void)vm_map_remove_and_unlock(map,
1718 oldaddr + newsize, oldaddr + oldsize,
1719 vmr_flags, KMEM_GUARD_NONE);
1720
1721
1722 /*
1723 * Lastly, if there are guard pages, deal with them.
1724 *
1725 * The kernel object just needs to depopulate,
1726 * regular objects require freeing the last page
1727 * and replacing it with a guard.
1728 */
1729 if (flags & KMR_KOBJECT) {
1730 if (flags & KMR_GUARD_LAST) {
1731 kernel_memory_depopulate(oldaddr + newsize - PAGE_SIZE,
1732 PAGE_SIZE, KMA_KOBJECT, guard.kmg_tag);
1733 }
1734 } else {
1735 vm_page_t guard_right = VM_PAGE_NULL;
1736 vm_offset_t remove_start = newsize;
1737
1738 if (flags & KMR_GUARD_LAST) {
1739 if (!map->never_faults) {
1740 guard_right = vm_page_grab_guard(true);
1741 }
1742 remove_start -= PAGE_SIZE;
1743 }
1744
1745 vm_object_lock(object);
1746
1747 if (object->vo_size != oldsize) {
1748 __kmem_realloc_invalid_object_size_panic(map,
1749 req_oldaddr, req_oldsize + delta, entry);
1750 }
1751 vm_object_set_size(object, newsize, req_newsize);
1752
1753 vm_object_page_remove(object, remove_start, oldsize);
1754
1755 if (guard_right) {
1756 vm_page_insert(guard_right, object, newsize - PAGE_SIZE);
1757 guard_right->vmp_busy = false;
1758 }
1759 vm_object_unlock(object);
1760 vm_object_deallocate(object);
1761 }
1762
1763 kmr.kmr_address = req_oldaddr;
1764 kmr.kmr_return = 0;
1765 #if KASAN_CLASSIC
1766 if (flags & KMA_KASAN_GUARD) {
1767 kasan_alloc_large(kmr.kmr_address, req_newsize);
1768 }
1769 #endif /* KASAN_CLASSIC */
1770 #if KASAN_TBI
1771 if ((flags & KMR_TAG) && (flags & KMR_FREEOLD)) {
1772 kmr.kmr_address = vm_memtag_assign_tag(kmr.kmr_address, req_newsize);
1773 vm_memtag_set_tag(kmr.kmr_address, req_newsize);
1774 kasan_tbi_retag_unused_space(kmr.kmr_address, newsize, req_newsize);
1775 }
1776 #endif /* KASAN_TBI */
1777
1778 return kmr;
1779 }
1780
1781 kmem_return_t
kmem_realloc_guard(vm_map_t map,vm_offset_t req_oldaddr,vm_size_t req_oldsize,vm_size_t req_newsize,kmr_flags_t flags,kmem_guard_t guard)1782 kmem_realloc_guard(
1783 vm_map_t map,
1784 vm_offset_t req_oldaddr,
1785 vm_size_t req_oldsize,
1786 vm_size_t req_newsize,
1787 kmr_flags_t flags,
1788 kmem_guard_t guard)
1789 {
1790 vm_object_t object;
1791 vm_size_t oldsize;
1792 vm_size_t newsize;
1793 vm_offset_t delta = 0;
1794 vm_map_offset_t oldaddr;
1795 vm_map_offset_t newaddr;
1796 vm_object_offset_t newoffs;
1797 vm_map_entry_t oldentry;
1798 vm_map_entry_t newentry;
1799 vm_page_t page_list = NULL;
1800 bool needs_wakeup = false;
1801 kmem_return_t kmr = { };
1802 unsigned int last_timestamp;
1803 vm_map_kernel_flags_t vmk_flags = {
1804 .vmkf_last_free = (bool)(flags & KMR_LAST_FREE),
1805 };
1806
1807 assert(KMEM_REALLOC_FLAGS_VALID(flags));
1808 if (!guard.kmg_atomic && (flags & (KMR_DATA | KMR_KOBJECT)) != KMR_DATA) {
1809 __kmem_invalid_arguments_panic("realloc", map, req_oldaddr,
1810 req_oldsize, flags);
1811 }
1812
1813 if (req_oldaddr == 0ul) {
1814 return kmem_alloc_guard(map, req_newsize, 0, (kma_flags_t)flags, guard);
1815 }
1816
1817 if (req_newsize == 0ul) {
1818 kmem_free_guard(map, req_oldaddr, req_oldsize,
1819 (kmf_flags_t)flags, guard);
1820 return kmr;
1821 }
1822
1823 if (req_newsize >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) {
1824 __kmem_invalid_size_panic(map, req_newsize, flags);
1825 }
1826 if (req_newsize < __kmem_guard_size(ANYF(flags))) {
1827 __kmem_invalid_size_panic(map, req_newsize, flags);
1828 }
1829
1830 oldsize = round_page(req_oldsize);
1831 newsize = round_page(req_newsize);
1832 oldaddr = req_oldaddr;
1833 #if KASAN_CLASSIC
1834 if (flags & KMR_KASAN_GUARD) {
1835 flags |= KMR_GUARD_FIRST | KMR_GUARD_LAST;
1836 oldaddr -= PAGE_SIZE;
1837 delta = ptoa(2);
1838 oldsize += delta;
1839 newsize += delta;
1840 }
1841 #endif /* KASAN_CLASSIC */
1842 #if CONFIG_KERNEL_TAGGING
1843 if (flags & KMR_TAG) {
1844 vm_memtag_verify_tag(req_oldaddr);
1845 oldaddr = vm_memtag_canonicalize_address(req_oldaddr);
1846 }
1847 #endif /* CONFIG_KERNEL_TAGGING */
1848
1849 #if !KASAN
1850 /*
1851 * If not on a KASAN variant and no difference in requested size,
1852 * just return.
1853 *
1854 * Otherwise we want to validate the size and re-tag for KASAN_TBI.
1855 */
1856 if (oldsize == newsize) {
1857 kmr.kmr_address = req_oldaddr;
1858 return kmr;
1859 }
1860 #endif /* !KASAN */
1861
1862 /*
1863 * If we're growing the allocation,
1864 * then reserve the pages we'll need,
1865 * and find a spot for its new place.
1866 */
1867 if (oldsize < newsize) {
1868 #if DEBUG || DEVELOPMENT
1869 VM_DEBUG_CONSTANT_EVENT(vm_kern_request,
1870 DBG_VM_KERN_REQUEST, DBG_FUNC_START,
1871 newsize - oldsize, 0, 0, 0);
1872 #endif /* DEBUG || DEVELOPMENT */
1873 kmr.kmr_return = vm_page_alloc_list(atop(newsize - oldsize),
1874 (kma_flags_t)flags, &page_list);
1875 if (kmr.kmr_return == KERN_SUCCESS) {
1876 kmem_apply_security_policy(map, (kma_flags_t)flags, guard,
1877 newsize, 0, &vmk_flags, true);
1878 kmr.kmr_return = vm_map_find_space(map, 0, newsize, 0,
1879 vmk_flags, &newentry);
1880 }
1881 if (__improbable(kmr.kmr_return != KERN_SUCCESS)) {
1882 if (flags & KMR_REALLOCF) {
1883 kmem_free_guard(map, req_oldaddr, req_oldsize,
1884 KMF_NONE, guard);
1885 }
1886 if (page_list) {
1887 vm_page_free_list(page_list, FALSE);
1888 }
1889 #if DEBUG || DEVELOPMENT
1890 VM_DEBUG_CONSTANT_EVENT(vm_kern_request,
1891 DBG_VM_KERN_REQUEST, DBG_FUNC_END,
1892 0, 0, 0, 0);
1893 #endif /* DEBUG || DEVELOPMENT */
1894 return kmr;
1895 }
1896
1897 /* map is locked */
1898 } else {
1899 vm_map_lock(map);
1900 }
1901
1902
1903 /*
1904 * Locate the entry:
1905 * - wait for it to quiesce.
1906 * - validate its guard,
1907 * - learn its correct tag,
1908 */
1909 again:
1910 if (!vm_map_lookup_entry(map, oldaddr, &oldentry)) {
1911 __kmem_entry_not_found_panic(map, req_oldaddr);
1912 }
1913 if ((flags & KMR_KOBJECT) && oldentry->in_transition) {
1914 oldentry->needs_wakeup = true;
1915 vm_map_entry_wait(map, THREAD_UNINT);
1916 goto again;
1917 }
1918 kmem_entry_validate_guard(map, oldentry, oldaddr, oldsize, guard);
1919 if (!__kmem_entry_validate_object(oldentry, ANYF(flags))) {
1920 __kmem_entry_validate_object_panic(map, oldentry, ANYF(flags));
1921 }
1922 /*
1923 * TODO: We should validate for non atomic entries that the range
1924 * we are acting on is what we expect here.
1925 */
1926 #if KASAN
1927 if (__kmem_entry_orig_size(oldentry) != req_oldsize) {
1928 __kmem_realloc_invalid_object_size_panic(map,
1929 req_oldaddr, req_oldsize + delta, oldentry);
1930 }
1931
1932 if (oldsize == newsize) {
1933 kmr.kmr_address = req_oldaddr;
1934 if (oldentry->vme_kernel_object) {
1935 oldentry->vme_object_or_delta = delta +
1936 (-req_newsize & PAGE_MASK);
1937 } else {
1938 object = VME_OBJECT(oldentry);
1939 vm_object_lock(object);
1940 vm_object_set_size(object, newsize, req_newsize);
1941 vm_object_unlock(object);
1942 }
1943 vm_map_unlock(map);
1944
1945 #if KASAN_CLASSIC
1946 if (flags & KMA_KASAN_GUARD) {
1947 kasan_alloc_large(kmr.kmr_address, req_newsize);
1948 }
1949 #endif /* KASAN_CLASSIC */
1950 #if KASAN_TBI
1951 if ((flags & KMR_TAG) && (flags & KMR_FREEOLD)) {
1952 kmr.kmr_address = vm_memtag_assign_tag(kmr.kmr_address, req_newsize);
1953 vm_memtag_set_tag(kmr.kmr_address, req_newsize);
1954 kasan_tbi_retag_unused_space(kmr.kmr_address, newsize, req_newsize);
1955 }
1956 #endif /* KASAN_TBI */
1957 return kmr;
1958 }
1959 #endif /* KASAN */
1960
1961 guard.kmg_tag = VME_ALIAS(oldentry);
1962
1963 if (newsize < oldsize) {
1964 return kmem_realloc_shrink_guard(map, req_oldaddr,
1965 req_oldsize, req_newsize, flags, guard, oldentry);
1966 }
1967
1968
1969 /*
1970 * We are growing the entry
1971 *
1972 * For regular objects we use the object `vo_size` updates
1973 * as a guarantee that no 2 kmem_realloc() can happen
1974 * concurrently (by doing it before the map is unlocked.
1975 *
1976 * For the kernel object, prevent the entry from being
1977 * reallocated or changed by marking it "in_transition".
1978 */
1979
1980 object = VME_OBJECT(oldentry);
1981 vm_object_lock(object);
1982 vm_object_reference_locked(object);
1983
1984 newaddr = newentry->vme_start;
1985 newoffs = oldsize;
1986
1987 VME_OBJECT_SET(newentry, object, guard.kmg_atomic, guard.kmg_context);
1988 VME_ALIAS_SET(newentry, guard.kmg_tag);
1989 if (flags & KMR_KOBJECT) {
1990 oldentry->in_transition = true;
1991 VME_OFFSET_SET(newentry, newaddr);
1992 newentry->wired_count = 1;
1993 vme_btref_consider_and_set(newentry, __builtin_frame_address(0));
1994 newoffs = newaddr + oldsize;
1995 #if KASAN
1996 newentry->vme_object_or_delta = delta +
1997 (-req_newsize & PAGE_MASK);
1998 #endif /* KASAN */
1999 } else {
2000 if (object->pager_created || object->pager) {
2001 /*
2002 * We can't "realloc/grow" the pager, so pageable
2003 * allocations should not go through this path.
2004 */
2005 __kmem_realloc_invalid_pager_panic(map,
2006 req_oldaddr, req_oldsize + delta, oldentry);
2007 }
2008 if (object->vo_size != oldsize) {
2009 __kmem_realloc_invalid_object_size_panic(map,
2010 req_oldaddr, req_oldsize + delta, oldentry);
2011 }
2012 vm_object_set_size(object, newsize, req_newsize);
2013 }
2014
2015 last_timestamp = map->timestamp;
2016 vm_map_unlock(map);
2017
2018
2019 /*
2020 * Now proceed with the population of pages.
2021 *
2022 * Kernel objects can use the kmem population helpers.
2023 *
2024 * Regular objects will insert pages manually,
2025 * then wire the memory into the new range.
2026 */
2027
2028 vm_size_t guard_right_size = __kmem_guard_right(ANYF(flags));
2029
2030 if (flags & KMR_KOBJECT) {
2031 pmap_mapping_type_t mapping_type = __kmem_mapping_type(ANYF(flags));
2032
2033 pmap_protect(kernel_pmap,
2034 oldaddr, oldaddr + oldsize - guard_right_size,
2035 VM_PROT_NONE);
2036
2037 for (vm_object_offset_t offset = 0;
2038 offset < oldsize - guard_right_size;
2039 offset += PAGE_SIZE_64) {
2040 vm_page_t mem;
2041
2042 mem = vm_page_lookup(object, oldaddr + offset);
2043 if (mem == VM_PAGE_NULL) {
2044 continue;
2045 }
2046
2047 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
2048
2049 mem->vmp_busy = true;
2050 vm_page_remove(mem, true);
2051 vm_page_insert_wired(mem, object, newaddr + offset,
2052 guard.kmg_tag);
2053 mem->vmp_busy = false;
2054
2055 kernel_memory_populate_pmap_enter(object, newaddr,
2056 offset, mem, VM_PROT_DEFAULT, 0, mapping_type);
2057 }
2058
2059 kernel_memory_populate_object_and_unlock(object,
2060 newaddr + oldsize - guard_right_size,
2061 newoffs - guard_right_size,
2062 newsize - oldsize,
2063 page_list, (kma_flags_t)flags,
2064 guard.kmg_tag, VM_PROT_DEFAULT, mapping_type);
2065 } else {
2066 vm_page_t guard_right = VM_PAGE_NULL;
2067
2068 /*
2069 * Note: we are borrowing the new entry reference
2070 * on the object for the duration of this code,
2071 * which works because we keep the object locked
2072 * throughout.
2073 */
2074 if ((flags & KMR_GUARD_LAST) && !map->never_faults) {
2075 guard_right = vm_page_lookup(object, oldsize - PAGE_SIZE);
2076 assert(guard_right->vmp_fictitious);
2077 guard_right->vmp_busy = true;
2078 vm_page_remove(guard_right, true);
2079 }
2080
2081 if (flags & KMR_FREEOLD) {
2082 /*
2083 * Freeing the old mapping will make
2084 * the old pages become pageable until
2085 * the new mapping makes them wired again.
2086 * Let's take an extra "wire_count" to
2087 * prevent any accidental "page out".
2088 * We'll have to undo that after wiring
2089 * the new mapping.
2090 */
2091 vm_object_reference_locked(object); /* keep object alive */
2092 for (vm_object_offset_t offset = 0;
2093 offset < oldsize - guard_right_size;
2094 offset += PAGE_SIZE_64) {
2095 vm_page_t mem;
2096
2097 mem = vm_page_lookup(object, offset);
2098 assert(mem != VM_PAGE_NULL);
2099 assertf(!VM_PAGE_PAGEABLE(mem),
2100 "mem %p qstate %d",
2101 mem, mem->vmp_q_state);
2102 if (VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_guard_addr) {
2103 /* guard pages are not wired */
2104 } else {
2105 assertf(VM_PAGE_WIRED(mem),
2106 "mem %p qstate %d wirecount %d",
2107 mem,
2108 mem->vmp_q_state,
2109 mem->vmp_wire_count);
2110 assertf(mem->vmp_wire_count >= 1,
2111 "mem %p wirecount %d",
2112 mem, mem->vmp_wire_count);
2113 mem->vmp_wire_count++;
2114 }
2115 }
2116 }
2117
2118 for (vm_object_offset_t offset = oldsize - guard_right_size;
2119 offset < newsize - guard_right_size;
2120 offset += PAGE_SIZE_64) {
2121 vm_page_t mem = page_list;
2122
2123 page_list = mem->vmp_snext;
2124 mem->vmp_snext = VM_PAGE_NULL;
2125 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
2126 assert(!VM_PAGE_PAGEABLE(mem));
2127
2128 vm_page_insert(mem, object, offset);
2129 mem->vmp_busy = false;
2130 }
2131
2132 if (guard_right) {
2133 vm_page_insert(guard_right, object, newsize - PAGE_SIZE);
2134 guard_right->vmp_busy = false;
2135 }
2136
2137 vm_object_unlock(object);
2138 }
2139
2140 /*
2141 * Mark the entry as idle again,
2142 * and honor KMR_FREEOLD if needed.
2143 */
2144
2145 vm_map_lock(map);
2146 if (last_timestamp + 1 != map->timestamp &&
2147 !vm_map_lookup_entry(map, oldaddr, &oldentry)) {
2148 __kmem_entry_not_found_panic(map, req_oldaddr);
2149 }
2150
2151 if (flags & KMR_KOBJECT) {
2152 assert(oldentry->in_transition);
2153 oldentry->in_transition = false;
2154 if (oldentry->needs_wakeup) {
2155 needs_wakeup = true;
2156 oldentry->needs_wakeup = false;
2157 }
2158 }
2159
2160 if (flags & KMR_FREEOLD) {
2161 vmr_flags_t vmr_flags = VM_MAP_REMOVE_KUNWIRE;
2162
2163 #if KASAN_CLASSIC
2164 if (flags & KMR_KASAN_GUARD) {
2165 kasan_poison_range(oldaddr, oldsize, ASAN_VALID);
2166 }
2167 #endif
2168 #if KASAN_TBI
2169 if (flags & KMR_TAG) {
2170 kasan_tbi_mark_free_space(req_oldaddr, oldsize);
2171 }
2172 #endif /* KASAN_TBI */
2173 if (flags & KMR_GUARD_LAST) {
2174 vmr_flags |= VM_MAP_REMOVE_NOKUNWIRE_LAST;
2175 }
2176 (void)vm_map_remove_and_unlock(map,
2177 oldaddr, oldaddr + oldsize,
2178 vmr_flags, guard);
2179 } else {
2180 vm_map_unlock(map);
2181 }
2182
2183 if ((flags & KMR_KOBJECT) == 0) {
2184 kern_return_t kr;
2185 /*
2186 * This must happen _after_ we do the KMR_FREEOLD,
2187 * because wiring the pages will call into the pmap,
2188 * and if the pages are typed XNU_KERNEL_RESTRICTED,
2189 * this would cause a second mapping of the page and panic.
2190 */
2191 kr = vm_map_wire_kernel(map,
2192 vm_sanitize_wrap_addr(newaddr),
2193 vm_sanitize_wrap_addr(newaddr + newsize),
2194 vm_sanitize_wrap_prot(VM_PROT_DEFAULT),
2195 guard.kmg_tag, FALSE);
2196 assert(kr == KERN_SUCCESS);
2197
2198 if (flags & KMR_FREEOLD) {
2199 /*
2200 * Undo the extra "wiring" we made above
2201 * and release the extra reference we took
2202 * on the object.
2203 */
2204 vm_object_lock(object);
2205 for (vm_object_offset_t offset = 0;
2206 offset < oldsize - guard_right_size;
2207 offset += PAGE_SIZE_64) {
2208 vm_page_t mem;
2209
2210 mem = vm_page_lookup(object, offset);
2211 assert(mem != VM_PAGE_NULL);
2212 assertf(!VM_PAGE_PAGEABLE(mem),
2213 "mem %p qstate %d",
2214 mem, mem->vmp_q_state);
2215 if (VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_guard_addr) {
2216 /* guard pages are not wired */
2217 } else {
2218 assertf(VM_PAGE_WIRED(mem),
2219 "mem %p qstate %d wirecount %d",
2220 mem,
2221 mem->vmp_q_state,
2222 mem->vmp_wire_count);
2223 assertf(mem->vmp_wire_count >= 2,
2224 "mem %p wirecount %d",
2225 mem, mem->vmp_wire_count);
2226 mem->vmp_wire_count--;
2227 assert(VM_PAGE_WIRED(mem));
2228 assert(mem->vmp_wire_count >= 1);
2229 }
2230 }
2231 vm_object_unlock(object);
2232 vm_object_deallocate(object); /* release extra ref */
2233 }
2234 }
2235
2236 if (needs_wakeup) {
2237 vm_map_entry_wakeup(map);
2238 }
2239
2240 #if DEBUG || DEVELOPMENT
2241 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
2242 atop(newsize - oldsize), 0, 0, 0);
2243 #endif /* DEBUG || DEVELOPMENT */
2244 kmr.kmr_address = newaddr;
2245
2246 #if KASAN
2247 kasan_notify_address(kmr.kmr_address, newsize);
2248 #endif /* KASAN */
2249 #if KASAN_CLASSIC
2250 if (flags & KMR_KASAN_GUARD) {
2251 kmr.kmr_address += PAGE_SIZE;
2252 kasan_alloc_large(kmr.kmr_address, req_newsize);
2253 }
2254 #endif /* KASAN_CLASSIC */
2255 #if KASAN_TBI
2256 if (flags & KMR_TAG) {
2257 kmr.kmr_address = vm_memtag_assign_tag(kmr.kmr_address, req_newsize);
2258 vm_memtag_set_tag(kmr.kmr_address, req_newsize);
2259 kasan_tbi_retag_unused_space(kmr.kmr_address, newsize, req_newsize);
2260 }
2261 #endif /* KASAN_TBI */
2262
2263 return kmr;
2264 }
2265
2266 #pragma mark map/remap/wire
2267
2268 kern_return_t
mach_vm_map_kernel(vm_map_t target_map,mach_vm_offset_ut * address,mach_vm_size_ut initial_size,mach_vm_offset_ut mask,vm_map_kernel_flags_t vmk_flags,ipc_port_t port,memory_object_offset_ut offset,boolean_t copy,vm_prot_ut cur_protection,vm_prot_ut max_protection,vm_inherit_ut inheritance)2269 mach_vm_map_kernel(
2270 vm_map_t target_map,
2271 mach_vm_offset_ut *address,
2272 mach_vm_size_ut initial_size,
2273 mach_vm_offset_ut mask,
2274 vm_map_kernel_flags_t vmk_flags,
2275 ipc_port_t port,
2276 memory_object_offset_ut offset,
2277 boolean_t copy,
2278 vm_prot_ut cur_protection,
2279 vm_prot_ut max_protection,
2280 vm_inherit_ut inheritance)
2281 {
2282 /* range_id is set by vm_map_enter_mem_object */
2283 return vm_map_enter_mem_object(target_map,
2284 address,
2285 initial_size,
2286 mask,
2287 vmk_flags,
2288 port,
2289 offset,
2290 copy,
2291 cur_protection,
2292 max_protection,
2293 inheritance,
2294 NULL,
2295 0);
2296 }
2297
2298 kern_return_t
mach_vm_remap_new_kernel(vm_map_t target_map,mach_vm_offset_ut * address,mach_vm_size_ut size,mach_vm_offset_ut mask,vm_map_kernel_flags_t vmk_flags,vm_map_t src_map,mach_vm_offset_ut memory_address,boolean_t copy,vm_prot_ut * cur_protection,vm_prot_ut * max_protection,vm_inherit_ut inheritance)2299 mach_vm_remap_new_kernel(
2300 vm_map_t target_map,
2301 mach_vm_offset_ut *address,
2302 mach_vm_size_ut size,
2303 mach_vm_offset_ut mask,
2304 vm_map_kernel_flags_t vmk_flags,
2305 vm_map_t src_map,
2306 mach_vm_offset_ut memory_address,
2307 boolean_t copy,
2308 vm_prot_ut *cur_protection, /* IN/OUT */
2309 vm_prot_ut *max_protection, /* IN/OUT */
2310 vm_inherit_ut inheritance)
2311 {
2312 if (!vm_map_kernel_flags_check_vm_and_kflags(vmk_flags,
2313 VM_FLAGS_USER_REMAP)) {
2314 return KERN_INVALID_ARGUMENT;
2315 }
2316
2317
2318 vmk_flags.vmf_return_data_addr = true;
2319
2320 /* range_id is set by vm_map_remap */
2321 return vm_map_remap(target_map,
2322 address,
2323 size,
2324 mask,
2325 vmk_flags,
2326 src_map,
2327 memory_address,
2328 copy,
2329 cur_protection,
2330 max_protection,
2331 inheritance);
2332 }
2333
2334 #pragma mark free
2335
2336 #if KASAN
2337
2338 __abortlike
2339 static void
__kmem_free_invalid_object_size_panic(vm_map_t map,vm_address_t address,vm_size_t size,vm_map_entry_t entry)2340 __kmem_free_invalid_object_size_panic(
2341 vm_map_t map,
2342 vm_address_t address,
2343 vm_size_t size,
2344 vm_map_entry_t entry)
2345 {
2346 vm_object_t object = VME_OBJECT(entry);
2347 vm_size_t objsize = __kmem_entry_orig_size(entry);
2348
2349 panic("kmem_free(map=%p, addr=%p, size=%zd, entry=%p): "
2350 "object %p has unexpected size %ld",
2351 map, (void *)address, (size_t)size, entry, object, objsize);
2352 }
2353
2354 #endif /* KASAN */
2355
2356 vm_size_t
kmem_free_guard(vm_map_t map,vm_offset_t req_addr,vm_size_t req_size,kmf_flags_t flags,kmem_guard_t guard)2357 kmem_free_guard(
2358 vm_map_t map,
2359 vm_offset_t req_addr,
2360 vm_size_t req_size,
2361 kmf_flags_t flags,
2362 kmem_guard_t guard)
2363 {
2364 vmr_flags_t vmr_flags = VM_MAP_REMOVE_KUNWIRE;
2365 vm_address_t addr = req_addr;
2366 vm_offset_t delta = 0;
2367 vm_size_t size;
2368 #if KASAN
2369 vm_map_entry_t entry;
2370 #endif /* KASAN */
2371
2372 assert(map->pmap == kernel_pmap);
2373
2374 #if KASAN_CLASSIC
2375 if (flags & KMF_KASAN_GUARD) {
2376 addr -= PAGE_SIZE;
2377 delta = ptoa(2);
2378 }
2379 #endif /* KASAN_CLASSIC */
2380 #if CONFIG_KERNEL_TAGGING
2381 if (flags & KMF_TAG) {
2382 vm_memtag_verify_tag(req_addr);
2383 addr = vm_memtag_canonicalize_address(req_addr);
2384 }
2385 #endif /* CONFIG_KERNEL_TAGGING */
2386
2387 if (flags & KMF_GUESS_SIZE) {
2388 vmr_flags |= VM_MAP_REMOVE_GUESS_SIZE;
2389 size = PAGE_SIZE;
2390 } else if (req_size == 0) {
2391 __kmem_invalid_size_panic(map, req_size, flags);
2392 } else {
2393 size = round_page(req_size) + delta;
2394 }
2395
2396 vm_map_lock(map);
2397
2398 #if KASAN
2399 if (!vm_map_lookup_entry(map, addr, &entry)) {
2400 __kmem_entry_not_found_panic(map, req_addr);
2401 }
2402 if (flags & KMF_GUESS_SIZE) {
2403 vmr_flags &= ~VM_MAP_REMOVE_GUESS_SIZE;
2404 req_size = __kmem_entry_orig_size(entry);
2405 size = round_page(req_size + delta);
2406 } else if (guard.kmg_atomic && entry->vme_kernel_object &&
2407 __kmem_entry_orig_size(entry) != req_size) {
2408 /*
2409 * We can't make a strict check for regular
2410 * VM objects because it could be:
2411 *
2412 * - the kmem_guard_free() of a kmem_realloc_guard() without
2413 * KMR_FREEOLD, and in that case the object size won't match.
2414 *
2415 * - a submap, in which case there is no "orig size".
2416 */
2417 __kmem_free_invalid_object_size_panic(map,
2418 req_addr, req_size + delta, entry);
2419 }
2420 #endif /* KASAN */
2421 #if KASAN_CLASSIC
2422 if (flags & KMR_KASAN_GUARD) {
2423 kasan_poison_range(addr, size, ASAN_VALID);
2424 }
2425 #endif
2426 #if KASAN_TBI
2427 if (flags & KMF_TAG) {
2428 kasan_tbi_mark_free_space(req_addr, size);
2429 }
2430 #endif /* KASAN_TBI */
2431
2432 /*
2433 * vm_map_remove_and_unlock is called with VM_MAP_REMOVE_KUNWIRE, which
2434 * unwires the kernel mapping. The page won't be mapped any longer so
2435 * there is no extra step that is required for memory tagging to "clear"
2436 * it -- the page will be later laundered when reused.
2437 */
2438 return vm_map_remove_and_unlock(map, addr, addr + size,
2439 vmr_flags, guard).kmr_size - delta;
2440 }
2441
2442 __exported void
2443 kmem_free_external(
2444 vm_map_t map,
2445 vm_offset_t addr,
2446 vm_size_t size);
2447 void
kmem_free_external(vm_map_t map,vm_offset_t addr,vm_size_t size)2448 kmem_free_external(
2449 vm_map_t map,
2450 vm_offset_t addr,
2451 vm_size_t size)
2452 {
2453 if (size) {
2454 kmem_free(map, trunc_page(addr), size);
2455 #if MACH_ASSERT
2456 } else {
2457 printf("kmem_free(map=%p, addr=%p) called with size=0, lr: %p\n",
2458 map, (void *)addr, __builtin_return_address(0));
2459 #endif
2460 }
2461 }
2462
2463 #pragma mark kmem metadata
2464
2465 /*
2466 * Guard objects for kmem pointer allocation:
2467 *
2468 * Guard objects introduce size slabs to kmem pointer allocations that are
2469 * allocated in chunks of n * sizeclass. When an allocation of a specific
2470 * sizeclass is requested a random slot from [0, n) is returned.
2471 * Allocations are returned from that chunk until m slots are left. The
2472 * remaining m slots are referred to as guard objects. They don't get
2473 * allocated and the chunk is now considered full. When an allocation is
2474 * freed to the chunk 1 slot is now available from m + 1 for the next
2475 * allocation of that sizeclass.
2476 *
2477 * Guard objects are intended to make exploitation of use after frees harder
2478 * as allocations that are freed can no longer be reliable reallocated.
2479 * They also make exploitation of OOBs harder as overflowing out of an
2480 * allocation can no longer be safe even with sufficient spraying.
2481 */
2482
2483 #define KMEM_META_PRIMARY UINT8_MAX
2484 #define KMEM_META_START (UINT8_MAX - 1)
2485 #define KMEM_META_FREE (UINT8_MAX - 2)
2486 #if __ARM_16K_PG__
2487 #define KMEM_MIN_SIZE PAGE_SIZE
2488 #define KMEM_CHUNK_SIZE_MIN (KMEM_MIN_SIZE * 16)
2489 #else /* __ARM_16K_PG__ */
2490 /*
2491 * PAGE_SIZE isn't a compile time constant on some arm64 devices. Those
2492 * devices use 4k page size when their RAM is <= 1GB and 16k otherwise.
2493 * Therefore populate sizeclasses from 4k for those devices.
2494 */
2495 #define KMEM_MIN_SIZE (4 * 1024)
2496 #define KMEM_CHUNK_SIZE_MIN (KMEM_MIN_SIZE * 32)
2497 #endif /* __ARM_16K_PG__ */
2498 #define KMEM_MAX_SIZE (32ULL << 20)
2499 #define KMEM_START_IDX (kmem_log2down(KMEM_MIN_SIZE))
2500 #define KMEM_LAST_IDX (kmem_log2down(KMEM_MAX_SIZE))
2501 #define KMEM_NUM_SIZECLASS (KMEM_LAST_IDX - KMEM_START_IDX + 1)
2502 #define KMEM_FRONTS (KMEM_RANGE_ID_NUM_PTR * 2)
2503 #define KMEM_NUM_GUARDS 2
2504
2505 struct kmem_page_meta {
2506 union {
2507 /*
2508 * On primary allocated chunk with KMEM_META_PRIMARY marker
2509 */
2510 uint32_t km_bitmap;
2511 /*
2512 * On start and end of free chunk with KMEM_META_FREE marker
2513 */
2514 uint32_t km_free_chunks;
2515 };
2516 /*
2517 * KMEM_META_PRIMARY: Start meta of allocated chunk
2518 * KMEM_META_FREE : Start and end meta of free chunk
2519 * KMEM_META_START : Meta region start and end
2520 */
2521 uint8_t km_page_marker;
2522 uint8_t km_sizeclass;
2523 union {
2524 /*
2525 * On primary allocated chunk with KMEM_META_PRIMARY marker
2526 */
2527 uint16_t km_chunk_len;
2528 /*
2529 * On secondary allocated chunks
2530 */
2531 uint16_t km_page_idx;
2532 };
2533 LIST_ENTRY(kmem_page_meta) km_link;
2534 } kmem_page_meta_t;
2535
2536 typedef LIST_HEAD(kmem_list_head, kmem_page_meta) kmem_list_head_t;
2537 struct kmem_sizeclass {
2538 vm_map_size_t ks_size;
2539 uint32_t ks_num_chunk;
2540 uint32_t ks_num_elem;
2541 crypto_random_ctx_t __zpercpu ks_rng_ctx;
2542 kmem_list_head_t ks_allfree_head[KMEM_FRONTS];
2543 kmem_list_head_t ks_partial_head[KMEM_FRONTS];
2544 kmem_list_head_t ks_full_head[KMEM_FRONTS];
2545 };
2546
2547 static struct kmem_sizeclass kmem_size_array[KMEM_NUM_SIZECLASS];
2548
2549 /*
2550 * Locks to synchronize metadata population
2551 */
2552 static LCK_GRP_DECLARE(kmem_locks_grp, "kmem_locks");
2553 static LCK_MTX_DECLARE(kmem_meta_region_lck, &kmem_locks_grp);
2554 #define kmem_meta_lock() lck_mtx_lock(&kmem_meta_region_lck)
2555 #define kmem_meta_unlock() lck_mtx_unlock(&kmem_meta_region_lck)
2556
2557 static SECURITY_READ_ONLY_LATE(struct mach_vm_range)
2558 kmem_meta_range[KMEM_RANGE_ID_NUM_PTR + 1];
2559 static SECURITY_READ_ONLY_LATE(struct kmem_page_meta *)
2560 kmem_meta_base[KMEM_RANGE_ID_NUM_PTR + 1];
2561 /*
2562 * Keeps track of metadata high water mark for each front
2563 */
2564 static struct kmem_page_meta *kmem_meta_hwm[KMEM_FRONTS];
2565 static SECURITY_READ_ONLY_LATE(vm_map_t)
2566 kmem_meta_map[KMEM_RANGE_ID_NUM_PTR + 1];
2567 static vm_map_size_t kmem_meta_size;
2568
2569 static uint32_t
kmem_get_front(kmem_range_id_t range_id,bool from_right)2570 kmem_get_front(
2571 kmem_range_id_t range_id,
2572 bool from_right)
2573 {
2574 assert((range_id >= KMEM_RANGE_ID_FIRST) &&
2575 (range_id <= KMEM_RANGE_ID_NUM_PTR));
2576 return (range_id - KMEM_RANGE_ID_FIRST) * 2 + from_right;
2577 }
2578
2579 static inline uint32_t
kmem_slot_idx_to_bit(uint32_t slot_idx,uint32_t size_idx __unused)2580 kmem_slot_idx_to_bit(
2581 uint32_t slot_idx,
2582 uint32_t size_idx __unused)
2583 {
2584 assert(slot_idx < kmem_size_array[size_idx].ks_num_elem);
2585 return 1ull << slot_idx;
2586 }
2587
2588 static uint32_t
kmem_get_idx_from_size(vm_map_size_t size)2589 kmem_get_idx_from_size(vm_map_size_t size)
2590 {
2591 assert(size >= KMEM_MIN_SIZE && size <= KMEM_MAX_SIZE);
2592 return kmem_log2down(size - 1) - KMEM_START_IDX + 1;
2593 }
2594
2595 __abortlike
2596 static void
kmem_invalid_size_idx(uint32_t idx)2597 kmem_invalid_size_idx(uint32_t idx)
2598 {
2599 panic("Invalid sizeclass idx %u", idx);
2600 }
2601
2602 static vm_map_size_t
kmem_get_size_from_idx(uint32_t idx)2603 kmem_get_size_from_idx(uint32_t idx)
2604 {
2605 if (__improbable(idx >= KMEM_NUM_SIZECLASS)) {
2606 kmem_invalid_size_idx(idx);
2607 }
2608 return 1ul << (idx + KMEM_START_IDX);
2609 }
2610
2611 static inline uint16_t
kmem_get_page_idx(struct kmem_page_meta * meta)2612 kmem_get_page_idx(struct kmem_page_meta *meta)
2613 {
2614 uint8_t page_marker = meta->km_page_marker;
2615
2616 return (page_marker == KMEM_META_PRIMARY) ? 0 : meta->km_page_idx;
2617 }
2618
2619 __abortlike
2620 static void
kmem_invalid_chunk_len(struct kmem_page_meta * meta)2621 kmem_invalid_chunk_len(struct kmem_page_meta *meta)
2622 {
2623 panic("Reading free chunks for meta %p where marker != KMEM_META_PRIMARY",
2624 meta);
2625 }
2626
2627 static inline uint16_t
kmem_get_chunk_len(struct kmem_page_meta * meta)2628 kmem_get_chunk_len(struct kmem_page_meta *meta)
2629 {
2630 if (__improbable(meta->km_page_marker != KMEM_META_PRIMARY)) {
2631 kmem_invalid_chunk_len(meta);
2632 }
2633
2634 return meta->km_chunk_len;
2635 }
2636
2637 __abortlike
2638 static void
kmem_invalid_free_chunk_len(struct kmem_page_meta * meta)2639 kmem_invalid_free_chunk_len(struct kmem_page_meta *meta)
2640 {
2641 panic("Reading free chunks for meta %p where marker != KMEM_META_FREE",
2642 meta);
2643 }
2644
2645 static inline uint32_t
kmem_get_free_chunk_len(struct kmem_page_meta * meta)2646 kmem_get_free_chunk_len(struct kmem_page_meta *meta)
2647 {
2648 if (__improbable(meta->km_page_marker != KMEM_META_FREE)) {
2649 kmem_invalid_free_chunk_len(meta);
2650 }
2651
2652 return meta->km_free_chunks;
2653 }
2654
2655 /*
2656 * Return the metadata corresponding to the specified address
2657 */
2658 static struct kmem_page_meta *
kmem_addr_to_meta(vm_map_offset_t addr,vm_map_range_id_t range_id,vm_map_offset_t * range_start,uint64_t * meta_idx)2659 kmem_addr_to_meta(
2660 vm_map_offset_t addr,
2661 vm_map_range_id_t range_id,
2662 vm_map_offset_t *range_start,
2663 uint64_t *meta_idx)
2664 {
2665 struct kmem_page_meta *meta_base = kmem_meta_base[range_id];
2666
2667 *range_start = kmem_ranges[range_id].min_address;
2668 *meta_idx = (addr - *range_start) / KMEM_CHUNK_SIZE_MIN;
2669 return &meta_base[*meta_idx];
2670 }
2671
2672 /*
2673 * Return the metadata start of the chunk that the address belongs to
2674 */
2675 static struct kmem_page_meta *
kmem_addr_to_meta_start(vm_address_t addr,vm_map_range_id_t range_id,vm_map_offset_t * chunk_start)2676 kmem_addr_to_meta_start(
2677 vm_address_t addr,
2678 vm_map_range_id_t range_id,
2679 vm_map_offset_t *chunk_start)
2680 {
2681 vm_map_offset_t range_start;
2682 uint64_t meta_idx;
2683 struct kmem_page_meta *meta;
2684
2685 meta = kmem_addr_to_meta(addr, range_id, &range_start, &meta_idx);
2686 meta_idx -= kmem_get_page_idx(meta);
2687 meta -= kmem_get_page_idx(meta);
2688 assert(meta->km_page_marker == KMEM_META_PRIMARY);
2689 *chunk_start = range_start + (meta_idx * KMEM_CHUNK_SIZE_MIN);
2690 return meta;
2691 }
2692
2693 __startup_func
2694 static void
kmem_init_meta_front(struct kmem_page_meta * meta,kmem_range_id_t range_id,bool from_right)2695 kmem_init_meta_front(
2696 struct kmem_page_meta *meta,
2697 kmem_range_id_t range_id,
2698 bool from_right)
2699 {
2700 kernel_memory_populate(trunc_page((vm_map_offset_t) meta), PAGE_SIZE,
2701 KMA_KOBJECT | KMA_ZERO | KMA_NOFAIL, VM_KERN_MEMORY_OSFMK);
2702 meta->km_page_marker = KMEM_META_START;
2703 if (!from_right) {
2704 meta++;
2705 kmem_meta_base[range_id] = meta;
2706 }
2707 kmem_meta_hwm[kmem_get_front(range_id, from_right)] = meta;
2708 }
2709
2710 __startup_func
2711 static void
kmem_metadata_init(void)2712 kmem_metadata_init(void)
2713 {
2714 for (kmem_range_id_t i = KMEM_RANGE_ID_FIRST; i <= kmem_ptr_ranges; i++) {
2715 vm_map_offset_t addr = kmem_meta_range[i].min_address;
2716 struct kmem_page_meta *meta;
2717 uint64_t meta_idx;
2718
2719 vm_map_will_allocate_early_map(&kmem_meta_map[i]);
2720 kmem_meta_map[i] = kmem_suballoc(kernel_map, &addr, kmem_meta_size,
2721 VM_MAP_CREATE_NEVER_FAULTS | VM_MAP_CREATE_DISABLE_HOLELIST,
2722 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, KMS_PERMANENT | KMS_NOFAIL,
2723 VM_KERN_MEMORY_OSFMK).kmr_submap;
2724
2725 kmem_meta_range[i].min_address = addr;
2726 kmem_meta_range[i].max_address = addr + kmem_meta_size;
2727
2728 meta = (struct kmem_page_meta *) kmem_meta_range[i].min_address;
2729 kmem_init_meta_front(meta, i, 0);
2730
2731 meta = kmem_addr_to_meta(kmem_ranges[i].max_address, i, &addr,
2732 &meta_idx);
2733 kmem_init_meta_front(meta, i, 1);
2734 }
2735 }
2736
2737 __startup_func
2738 static void
kmem_init_front_head(struct kmem_sizeclass * ks,uint32_t front)2739 kmem_init_front_head(
2740 struct kmem_sizeclass *ks,
2741 uint32_t front)
2742 {
2743 LIST_INIT(&ks->ks_allfree_head[front]);
2744 LIST_INIT(&ks->ks_partial_head[front]);
2745 LIST_INIT(&ks->ks_full_head[front]);
2746 }
2747
2748 __startup_func
2749 static void
kmem_sizeclass_init(void)2750 kmem_sizeclass_init(void)
2751 {
2752 for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) {
2753 struct kmem_sizeclass *ks = &kmem_size_array[i];
2754 kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST;
2755
2756 ks->ks_size = kmem_get_size_from_idx(i);
2757 ks->ks_num_chunk = roundup(8 * ks->ks_size, KMEM_CHUNK_SIZE_MIN) /
2758 KMEM_CHUNK_SIZE_MIN;
2759 ks->ks_num_elem = (ks->ks_num_chunk * KMEM_CHUNK_SIZE_MIN) / ks->ks_size;
2760 assert(ks->ks_num_elem <=
2761 (sizeof(((struct kmem_page_meta *)0)->km_bitmap) * 8));
2762 for (; range_id <= KMEM_RANGE_ID_NUM_PTR; range_id++) {
2763 kmem_init_front_head(ks, kmem_get_front(range_id, 0));
2764 kmem_init_front_head(ks, kmem_get_front(range_id, 1));
2765 }
2766 }
2767 }
2768
2769 /*
2770 * This is done during EARLY_BOOT as it needs the corecrypto module to be
2771 * set up.
2772 */
2773 __startup_func
2774 static void
kmem_crypto_init(void)2775 kmem_crypto_init(void)
2776 {
2777 vm_size_t ctx_size = crypto_random_kmem_ctx_size();
2778
2779 for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) {
2780 struct kmem_sizeclass *ks = &kmem_size_array[i];
2781
2782 ks->ks_rng_ctx = zalloc_percpu_permanent(ctx_size, ZALIGN_PTR);
2783 zpercpu_foreach(ctx, ks->ks_rng_ctx) {
2784 crypto_random_kmem_init(ctx);
2785 }
2786 }
2787 }
2788 STARTUP(EARLY_BOOT, STARTUP_RANK_MIDDLE, kmem_crypto_init);
2789
2790 __abortlike
2791 static void
kmem_validate_slot_panic(vm_map_offset_t addr,struct kmem_page_meta * meta,uint32_t slot_idx,uint32_t size_idx)2792 kmem_validate_slot_panic(
2793 vm_map_offset_t addr,
2794 struct kmem_page_meta *meta,
2795 uint32_t slot_idx,
2796 uint32_t size_idx)
2797 {
2798 if (meta->km_page_marker != KMEM_META_PRIMARY) {
2799 panic("Metadata (%p) for addr (%p) not primary", meta, (void *)addr);
2800 }
2801 if (meta->km_sizeclass != size_idx) {
2802 panic("Metadata's (%p) sizeclass (%u != %u) changed during deletion",
2803 meta, meta->km_sizeclass, size_idx);
2804 }
2805 panic("Double free detected: Slot (%u) in meta (%p) for addr %p marked free",
2806 slot_idx, meta, (void *)addr);
2807 }
2808
2809 __abortlike
2810 static void
kmem_invalid_slot_for_addr(mach_vm_range_t slot,vm_map_offset_t start,vm_map_offset_t end)2811 kmem_invalid_slot_for_addr(
2812 mach_vm_range_t slot,
2813 vm_map_offset_t start,
2814 vm_map_offset_t end)
2815 {
2816 panic("Invalid kmem ptr slot [%p:%p] for allocation [%p:%p]",
2817 (void *)slot->min_address, (void *)slot->max_address,
2818 (void *)start, (void *)end);
2819 }
2820
2821 void
kmem_validate_slot(vm_map_offset_t addr,struct kmem_page_meta * meta,uint32_t size_idx,uint32_t slot_idx)2822 kmem_validate_slot(
2823 vm_map_offset_t addr,
2824 struct kmem_page_meta *meta,
2825 uint32_t size_idx,
2826 uint32_t slot_idx)
2827 {
2828 if ((meta->km_page_marker != KMEM_META_PRIMARY) ||
2829 (meta->km_sizeclass != size_idx) ||
2830 ((meta->km_bitmap & kmem_slot_idx_to_bit(slot_idx, size_idx)) != 0)) {
2831 kmem_validate_slot_panic(addr, meta, size_idx, slot_idx);
2832 }
2833 }
2834
2835 static void
kmem_validate_slot_initial(mach_vm_range_t slot,vm_map_offset_t start,vm_map_offset_t end,struct kmem_page_meta * meta,uint32_t size_idx,uint32_t slot_idx)2836 kmem_validate_slot_initial(
2837 mach_vm_range_t slot,
2838 vm_map_offset_t start,
2839 vm_map_offset_t end,
2840 struct kmem_page_meta *meta,
2841 uint32_t size_idx,
2842 uint32_t slot_idx)
2843 {
2844 if ((slot->min_address == 0) || (slot->max_address == 0) ||
2845 (start < slot->min_address) || (start >= slot->max_address) ||
2846 (end > slot->max_address)) {
2847 kmem_invalid_slot_for_addr(slot, start, end);
2848 }
2849
2850 kmem_validate_slot(start, meta, size_idx, slot_idx);
2851 }
2852
2853 uint32_t
kmem_addr_get_slot_idx(vm_map_offset_t start,vm_map_offset_t end,vm_map_range_id_t range_id,struct kmem_page_meta ** meta,uint32_t * size_idx,mach_vm_range_t slot)2854 kmem_addr_get_slot_idx(
2855 vm_map_offset_t start,
2856 vm_map_offset_t end,
2857 vm_map_range_id_t range_id,
2858 struct kmem_page_meta **meta,
2859 uint32_t *size_idx,
2860 mach_vm_range_t slot)
2861 {
2862 vm_map_offset_t chunk_start;
2863 vm_map_size_t slot_size;
2864 uint32_t slot_idx;
2865
2866 *meta = kmem_addr_to_meta_start(start, range_id, &chunk_start);
2867 *size_idx = (*meta)->km_sizeclass;
2868 slot_size = kmem_get_size_from_idx(*size_idx);
2869 slot_idx = (start - chunk_start) / slot_size;
2870 slot->min_address = chunk_start + slot_idx * slot_size;
2871 slot->max_address = slot->min_address + slot_size;
2872
2873 kmem_validate_slot_initial(slot, start, end, *meta, *size_idx, slot_idx);
2874
2875 return slot_idx;
2876 }
2877
2878 static bool
kmem_populate_needed(vm_offset_t from,vm_offset_t to)2879 kmem_populate_needed(vm_offset_t from, vm_offset_t to)
2880 {
2881 #if KASAN
2882 #pragma unused(from, to)
2883 return true;
2884 #else
2885 vm_offset_t page_addr = trunc_page(from);
2886
2887 for (; page_addr < to; page_addr += PAGE_SIZE) {
2888 /*
2889 * This can race with another thread doing a populate on the same metadata
2890 * page, where we see an updated pmap but unmapped KASan shadow, causing a
2891 * fault in the shadow when we first access the metadata page. Avoid this
2892 * by always synchronizing on the kmem_meta_lock with KASan.
2893 */
2894 if (!pmap_find_phys(kernel_pmap, page_addr)) {
2895 return true;
2896 }
2897 }
2898
2899 return false;
2900 #endif /* !KASAN */
2901 }
2902
2903 static void
kmem_populate_meta_locked(vm_offset_t from,vm_offset_t to)2904 kmem_populate_meta_locked(vm_offset_t from, vm_offset_t to)
2905 {
2906 vm_offset_t page_addr = trunc_page(from);
2907
2908 vm_map_unlock(kernel_map);
2909
2910 for (; page_addr < to; page_addr += PAGE_SIZE) {
2911 for (;;) {
2912 kern_return_t ret = KERN_SUCCESS;
2913
2914 /*
2915 * All updates to kmem metadata are done under the kmem_meta_lock
2916 */
2917 kmem_meta_lock();
2918 if (0 == pmap_find_phys(kernel_pmap, page_addr)) {
2919 ret = kernel_memory_populate(page_addr,
2920 PAGE_SIZE, KMA_NOPAGEWAIT | KMA_KOBJECT | KMA_ZERO,
2921 VM_KERN_MEMORY_OSFMK);
2922 }
2923 kmem_meta_unlock();
2924
2925 if (ret == KERN_SUCCESS) {
2926 break;
2927 }
2928
2929 /*
2930 * We can't pass KMA_NOPAGEWAIT under a global lock as it leads
2931 * to bad system deadlocks, so if the allocation failed,
2932 * we need to do the VM_PAGE_WAIT() outside of the lock.
2933 */
2934 VM_PAGE_WAIT();
2935 }
2936 }
2937
2938 vm_map_lock(kernel_map);
2939 }
2940
2941 __abortlike
2942 static void
kmem_invalid_meta_panic(struct kmem_page_meta * meta,uint32_t slot_idx,struct kmem_sizeclass sizeclass)2943 kmem_invalid_meta_panic(
2944 struct kmem_page_meta *meta,
2945 uint32_t slot_idx,
2946 struct kmem_sizeclass sizeclass)
2947 {
2948 uint32_t size_idx = kmem_get_idx_from_size(sizeclass.ks_size);
2949
2950 if (slot_idx >= sizeclass.ks_num_elem) {
2951 panic("Invalid slot idx %u [0:%u] for meta %p", slot_idx,
2952 sizeclass.ks_num_elem, meta);
2953 }
2954 if (meta->km_sizeclass != size_idx) {
2955 panic("Invalid size_idx (%u != %u) in meta %p", size_idx,
2956 meta->km_sizeclass, meta);
2957 }
2958 panic("page_marker %u not primary in meta %p", meta->km_page_marker, meta);
2959 }
2960
2961 __abortlike
2962 static void
kmem_slot_has_entry_panic(vm_map_entry_t entry,vm_map_offset_t addr)2963 kmem_slot_has_entry_panic(
2964 vm_map_entry_t entry,
2965 vm_map_offset_t addr)
2966 {
2967 panic("Entry (%p) already exists for addr (%p) being returned",
2968 entry, (void *)addr);
2969 }
2970
2971 __abortlike
2972 static void
kmem_slot_not_found(struct kmem_page_meta * meta,uint32_t slot_idx)2973 kmem_slot_not_found(
2974 struct kmem_page_meta *meta,
2975 uint32_t slot_idx)
2976 {
2977 panic("%uth free slot not found for meta %p bitmap %u", slot_idx, meta,
2978 meta->km_bitmap);
2979 }
2980
2981 /*
2982 * Returns a 16bit random number between 0 and
2983 * upper_limit (inclusive)
2984 */
2985 __startup_func
2986 uint16_t
kmem_get_random16(uint16_t upper_limit)2987 kmem_get_random16(
2988 uint16_t upper_limit)
2989 {
2990 static uint64_t random_entropy;
2991 assert(upper_limit < UINT16_MAX);
2992 if (random_entropy == 0) {
2993 random_entropy = early_random();
2994 }
2995 uint32_t result = random_entropy & UINT32_MAX;
2996 random_entropy >>= 32;
2997 return (uint16_t)(result % (upper_limit + 1));
2998 }
2999
3000 static uint32_t
kmem_get_nth_free_slot(struct kmem_page_meta * meta,uint32_t n,uint32_t bitmap)3001 kmem_get_nth_free_slot(
3002 struct kmem_page_meta *meta,
3003 uint32_t n,
3004 uint32_t bitmap)
3005 {
3006 uint32_t zeros_seen = 0, ones_seen = 0;
3007
3008 while (bitmap) {
3009 uint32_t count = __builtin_ctz(bitmap);
3010
3011 zeros_seen += count;
3012 bitmap >>= count;
3013 if (__probable(~bitmap)) {
3014 count = __builtin_ctz(~bitmap);
3015 } else {
3016 count = 32;
3017 }
3018 if (count + ones_seen > n) {
3019 return zeros_seen + n;
3020 }
3021 ones_seen += count;
3022 bitmap >>= count;
3023 }
3024
3025 kmem_slot_not_found(meta, n);
3026 }
3027
3028
3029 static uint32_t
kmem_get_next_slot(struct kmem_page_meta * meta,struct kmem_sizeclass sizeclass,uint32_t bitmap)3030 kmem_get_next_slot(
3031 struct kmem_page_meta *meta,
3032 struct kmem_sizeclass sizeclass,
3033 uint32_t bitmap)
3034 {
3035 uint32_t num_slots = __builtin_popcount(bitmap);
3036 uint64_t slot_idx = 0;
3037
3038 assert(num_slots > 0);
3039 if (__improbable(startup_phase < STARTUP_SUB_EARLY_BOOT)) {
3040 /*
3041 * Use early random prior to early boot as the ks_rng_ctx requires
3042 * the corecrypto module to be setup before it is initialized and
3043 * used.
3044 *
3045 * num_slots can't be 0 as we take this path when we have more than
3046 * one slot left.
3047 */
3048 slot_idx = kmem_get_random16((uint16_t)num_slots - 1);
3049 } else {
3050 crypto_random_uniform(zpercpu_get(sizeclass.ks_rng_ctx), num_slots,
3051 &slot_idx);
3052 }
3053
3054 return kmem_get_nth_free_slot(meta, slot_idx, bitmap);
3055 }
3056
3057 /*
3058 * Returns an unallocated slot from the given metadata
3059 */
3060 static vm_map_offset_t
kmem_get_addr_from_meta(struct kmem_page_meta * meta,vm_map_range_id_t range_id,struct kmem_sizeclass sizeclass,vm_map_entry_t * entry)3061 kmem_get_addr_from_meta(
3062 struct kmem_page_meta *meta,
3063 vm_map_range_id_t range_id,
3064 struct kmem_sizeclass sizeclass,
3065 vm_map_entry_t *entry)
3066 {
3067 vm_map_offset_t addr;
3068 vm_map_size_t size = sizeclass.ks_size;
3069 uint32_t size_idx = kmem_get_idx_from_size(size);
3070 uint64_t meta_idx = meta - kmem_meta_base[range_id];
3071 mach_vm_offset_t range_start = kmem_ranges[range_id].min_address;
3072 uint32_t slot_bit;
3073 uint32_t slot_idx = kmem_get_next_slot(meta, sizeclass, meta->km_bitmap);
3074
3075 if ((slot_idx >= sizeclass.ks_num_elem) ||
3076 (meta->km_sizeclass != size_idx) ||
3077 (meta->km_page_marker != KMEM_META_PRIMARY)) {
3078 kmem_invalid_meta_panic(meta, slot_idx, sizeclass);
3079 }
3080
3081 slot_bit = kmem_slot_idx_to_bit(slot_idx, size_idx);
3082 meta->km_bitmap &= ~slot_bit;
3083
3084 addr = range_start + (meta_idx * KMEM_CHUNK_SIZE_MIN) + (slot_idx * size);
3085 assert(kmem_range_contains_fully(range_id, addr, size));
3086 if (vm_map_lookup_entry(kernel_map, addr, entry)) {
3087 kmem_slot_has_entry_panic(*entry, addr);
3088 }
3089 if ((*entry != vm_map_to_entry(kernel_map)) &&
3090 ((*entry)->vme_next != vm_map_to_entry(kernel_map)) &&
3091 ((*entry)->vme_next->vme_start < (addr + size))) {
3092 kmem_slot_has_entry_panic(*entry, addr);
3093 }
3094 return addr;
3095 }
3096
3097 __abortlike
3098 static void
kmem_range_out_of_va(kmem_range_id_t range_id,uint32_t num_chunks)3099 kmem_range_out_of_va(
3100 kmem_range_id_t range_id,
3101 uint32_t num_chunks)
3102 {
3103 panic("No more VA to allocate %u chunks in range %u", num_chunks, range_id);
3104 }
3105
3106 static void
kmem_init_allocated_chunk(struct kmem_page_meta * meta,struct kmem_sizeclass sizeclass,uint32_t size_idx)3107 kmem_init_allocated_chunk(
3108 struct kmem_page_meta *meta,
3109 struct kmem_sizeclass sizeclass,
3110 uint32_t size_idx)
3111 {
3112 uint32_t meta_num = sizeclass.ks_num_chunk;
3113 uint32_t num_elem = sizeclass.ks_num_elem;
3114
3115 meta->km_bitmap = (1ull << num_elem) - 1;
3116 meta->km_chunk_len = (uint16_t)meta_num;
3117 assert(LIST_NEXT(meta, km_link) == NULL);
3118 assert(meta->km_link.le_prev == NULL);
3119 meta->km_sizeclass = (uint8_t)size_idx;
3120 meta->km_page_marker = KMEM_META_PRIMARY;
3121 meta++;
3122 for (uint32_t i = 1; i < meta_num; i++) {
3123 meta->km_page_idx = (uint16_t)i;
3124 meta->km_sizeclass = (uint8_t)size_idx;
3125 meta->km_page_marker = 0;
3126 meta->km_bitmap = 0;
3127 meta++;
3128 }
3129 }
3130
3131 static uint32_t
kmem_get_additional_meta(struct kmem_page_meta * meta,uint32_t meta_req,bool from_right,struct kmem_page_meta ** adj_free_meta)3132 kmem_get_additional_meta(
3133 struct kmem_page_meta *meta,
3134 uint32_t meta_req,
3135 bool from_right,
3136 struct kmem_page_meta **adj_free_meta)
3137 {
3138 struct kmem_page_meta *meta_prev = from_right ? meta : (meta - 1);
3139
3140 if (meta_prev->km_page_marker == KMEM_META_FREE) {
3141 uint32_t chunk_len = kmem_get_free_chunk_len(meta_prev);
3142
3143 *adj_free_meta = from_right ? meta_prev : (meta_prev - chunk_len + 1);
3144 meta_req -= chunk_len;
3145 } else {
3146 *adj_free_meta = NULL;
3147 }
3148
3149 return meta_req;
3150 }
3151
3152
3153 static struct kmem_page_meta *
kmem_get_new_chunk(vm_map_range_id_t range_id,bool from_right,uint32_t size_idx)3154 kmem_get_new_chunk(
3155 vm_map_range_id_t range_id,
3156 bool from_right,
3157 uint32_t size_idx)
3158 {
3159 struct kmem_sizeclass sizeclass = kmem_size_array[size_idx];
3160 struct kmem_page_meta *start, *end, *meta_update;
3161 struct kmem_page_meta *adj_free_meta = NULL;
3162 uint32_t meta_req = sizeclass.ks_num_chunk;
3163
3164 for (;;) {
3165 struct kmem_page_meta *metaf = kmem_meta_hwm[kmem_get_front(range_id, 0)];
3166 struct kmem_page_meta *metab = kmem_meta_hwm[kmem_get_front(range_id, 1)];
3167 struct kmem_page_meta *meta;
3168 vm_offset_t start_addr, end_addr;
3169 uint32_t meta_num;
3170
3171 meta = from_right ? metab : metaf;
3172 meta_num = kmem_get_additional_meta(meta, meta_req, from_right,
3173 &adj_free_meta);
3174
3175 if (metaf + meta_num >= metab) {
3176 kmem_range_out_of_va(range_id, meta_num);
3177 }
3178
3179 start = from_right ? (metab - meta_num) : metaf;
3180 end = from_right ? metab : (metaf + meta_num);
3181
3182 start_addr = (vm_offset_t)start;
3183 end_addr = (vm_offset_t)end;
3184
3185 /*
3186 * If the new high watermark stays on the same page,
3187 * no need to populate and drop the lock.
3188 */
3189 if (!page_aligned(from_right ? end_addr : start_addr) &&
3190 trunc_page(start_addr) == trunc_page(end_addr - 1)) {
3191 break;
3192 }
3193 if (!kmem_populate_needed(start_addr, end_addr)) {
3194 break;
3195 }
3196
3197 kmem_populate_meta_locked(start_addr, end_addr);
3198
3199 /*
3200 * Since we dropped the lock, reassess conditions still hold:
3201 * - the HWM we are changing must not have moved
3202 * - the other HWM must not intersect with ours
3203 * - in case of coalescing, the adjacent free meta must still
3204 * be free and of the same size.
3205 *
3206 * If we failed to grow, reevaluate whether freelists have
3207 * entries now by returning NULL.
3208 */
3209 metaf = kmem_meta_hwm[kmem_get_front(range_id, 0)];
3210 metab = kmem_meta_hwm[kmem_get_front(range_id, 1)];
3211 if (meta != (from_right ? metab : metaf)) {
3212 return NULL;
3213 }
3214 if (metaf + meta_num >= metab) {
3215 kmem_range_out_of_va(range_id, meta_num);
3216 }
3217 if (adj_free_meta) {
3218 if (adj_free_meta->km_page_marker != KMEM_META_FREE ||
3219 kmem_get_free_chunk_len(adj_free_meta) !=
3220 meta_req - meta_num) {
3221 return NULL;
3222 }
3223 }
3224
3225 break;
3226 }
3227
3228 /*
3229 * If there is an adjacent free chunk remove it from free list
3230 */
3231 if (adj_free_meta) {
3232 LIST_REMOVE(adj_free_meta, km_link);
3233 LIST_NEXT(adj_free_meta, km_link) = NULL;
3234 adj_free_meta->km_link.le_prev = NULL;
3235 }
3236
3237 /*
3238 * Update hwm
3239 */
3240 meta_update = from_right ? start : end;
3241 kmem_meta_hwm[kmem_get_front(range_id, from_right)] = meta_update;
3242
3243 /*
3244 * Initialize metadata
3245 */
3246 start = from_right ? start : (end - meta_req);
3247 kmem_init_allocated_chunk(start, sizeclass, size_idx);
3248
3249 return start;
3250 }
3251
3252 static void
kmem_requeue_meta(struct kmem_page_meta * meta,struct kmem_list_head * head)3253 kmem_requeue_meta(
3254 struct kmem_page_meta *meta,
3255 struct kmem_list_head *head)
3256 {
3257 LIST_REMOVE(meta, km_link);
3258 LIST_INSERT_HEAD(head, meta, km_link);
3259 }
3260
3261 /*
3262 * Return corresponding sizeclass to stash free chunks in
3263 */
3264 __abortlike
3265 static void
kmem_invalid_chunk_num(uint32_t chunks)3266 kmem_invalid_chunk_num(uint32_t chunks)
3267 {
3268 panic("Invalid number of chunks %u\n", chunks);
3269 }
3270
3271 static uint32_t
kmem_get_size_idx_for_chunks(uint32_t chunks)3272 kmem_get_size_idx_for_chunks(uint32_t chunks)
3273 {
3274 for (uint32_t i = KMEM_NUM_SIZECLASS - 1; i > 0; i--) {
3275 if (chunks >= kmem_size_array[i].ks_num_chunk) {
3276 return i;
3277 }
3278 }
3279 kmem_invalid_chunk_num(chunks);
3280 }
3281
3282 static void
kmem_clear_meta_range(struct kmem_page_meta * meta,uint32_t count)3283 kmem_clear_meta_range(struct kmem_page_meta *meta, uint32_t count)
3284 {
3285 bzero(meta, count * sizeof(struct kmem_page_meta));
3286 }
3287
3288 static void
kmem_check_meta_range_is_clear(struct kmem_page_meta * meta,uint32_t count)3289 kmem_check_meta_range_is_clear(struct kmem_page_meta *meta, uint32_t count)
3290 {
3291 #if MACH_ASSERT
3292 size_t size = count * sizeof(struct kmem_page_meta);
3293
3294 assert(memcmp_zero_ptr_aligned(meta, size) == 0);
3295 #else
3296 #pragma unused(meta, count)
3297 #endif
3298 }
3299
3300 /*!
3301 * @function kmem_init_free_chunk()
3302 *
3303 * @discussion
3304 * This function prepares a range of chunks to be put on a free list.
3305 * The first and last metadata might be dirty, but the "inner" ones
3306 * must be zero filled by the caller prior to calling this function.
3307 */
3308 static void
kmem_init_free_chunk(struct kmem_page_meta * meta,uint32_t num_chunks,uint32_t front)3309 kmem_init_free_chunk(
3310 struct kmem_page_meta *meta,
3311 uint32_t num_chunks,
3312 uint32_t front)
3313 {
3314 struct kmem_sizeclass *sizeclass;
3315 uint32_t size_idx = kmem_get_size_idx_for_chunks(num_chunks);
3316
3317 if (num_chunks > 2) {
3318 kmem_check_meta_range_is_clear(meta + 1, num_chunks - 2);
3319 }
3320
3321 meta[0] = (struct kmem_page_meta){
3322 .km_free_chunks = num_chunks,
3323 .km_page_marker = KMEM_META_FREE,
3324 .km_sizeclass = (uint8_t)size_idx,
3325 };
3326 if (num_chunks > 1) {
3327 meta[num_chunks - 1] = (struct kmem_page_meta){
3328 .km_free_chunks = num_chunks,
3329 .km_page_marker = KMEM_META_FREE,
3330 .km_sizeclass = (uint8_t)size_idx,
3331 };
3332 }
3333
3334 sizeclass = &kmem_size_array[size_idx];
3335 LIST_INSERT_HEAD(&sizeclass->ks_allfree_head[front], meta, km_link);
3336 }
3337
3338 static struct kmem_page_meta *
kmem_get_free_chunk_from_list(struct kmem_sizeclass * org_sizeclass,uint32_t size_idx,uint32_t front)3339 kmem_get_free_chunk_from_list(
3340 struct kmem_sizeclass *org_sizeclass,
3341 uint32_t size_idx,
3342 uint32_t front)
3343 {
3344 struct kmem_sizeclass *sizeclass;
3345 uint32_t num_chunks = org_sizeclass->ks_num_chunk;
3346 struct kmem_page_meta *meta;
3347 uint32_t idx = size_idx;
3348
3349 while (idx < KMEM_NUM_SIZECLASS) {
3350 sizeclass = &kmem_size_array[idx];
3351 meta = LIST_FIRST(&sizeclass->ks_allfree_head[front]);
3352 if (meta) {
3353 break;
3354 }
3355 idx++;
3356 }
3357
3358 /*
3359 * Trim if larger in size
3360 */
3361 if (meta) {
3362 uint32_t num_chunks_free = kmem_get_free_chunk_len(meta);
3363
3364 assert(meta->km_page_marker == KMEM_META_FREE);
3365 LIST_REMOVE(meta, km_link);
3366 LIST_NEXT(meta, km_link) = NULL;
3367 meta->km_link.le_prev = NULL;
3368 if (num_chunks_free > num_chunks) {
3369 num_chunks_free -= num_chunks;
3370 kmem_init_free_chunk(meta + num_chunks, num_chunks_free, front);
3371 }
3372
3373 kmem_init_allocated_chunk(meta, *org_sizeclass, size_idx);
3374 }
3375
3376 return meta;
3377 }
3378
3379 kern_return_t
kmem_locate_space(vm_map_size_t size,vm_map_range_id_t range_id,bool from_right,vm_map_offset_t * start_inout,vm_map_entry_t * entry_out)3380 kmem_locate_space(
3381 vm_map_size_t size,
3382 vm_map_range_id_t range_id,
3383 bool from_right,
3384 vm_map_offset_t *start_inout,
3385 vm_map_entry_t *entry_out)
3386 {
3387 vm_map_entry_t entry;
3388 uint32_t size_idx = kmem_get_idx_from_size(size);
3389 uint32_t front = kmem_get_front(range_id, from_right);
3390 struct kmem_sizeclass *sizeclass = &kmem_size_array[size_idx];
3391 struct kmem_page_meta *meta;
3392
3393 assert(size <= sizeclass->ks_size);
3394 again:
3395 if ((meta = LIST_FIRST(&sizeclass->ks_partial_head[front])) != NULL) {
3396 *start_inout = kmem_get_addr_from_meta(meta, range_id, *sizeclass, &entry);
3397 /*
3398 * Requeue to full if necessary
3399 */
3400 assert(meta->km_page_marker == KMEM_META_PRIMARY);
3401 if (__builtin_popcount(meta->km_bitmap) == KMEM_NUM_GUARDS) {
3402 kmem_requeue_meta(meta, &sizeclass->ks_full_head[front]);
3403 }
3404 } else if ((meta = kmem_get_free_chunk_from_list(sizeclass, size_idx,
3405 front)) != NULL) {
3406 *start_inout = kmem_get_addr_from_meta(meta, range_id, *sizeclass, &entry);
3407 /*
3408 * Queue to partial
3409 */
3410 assert(meta->km_page_marker == KMEM_META_PRIMARY);
3411 assert(__builtin_popcount(meta->km_bitmap) > KMEM_NUM_GUARDS);
3412 LIST_INSERT_HEAD(&sizeclass->ks_partial_head[front], meta, km_link);
3413 } else {
3414 meta = kmem_get_new_chunk(range_id, from_right, size_idx);
3415 if (meta == NULL) {
3416 goto again;
3417 }
3418 *start_inout = kmem_get_addr_from_meta(meta, range_id, *sizeclass, &entry);
3419 assert(meta->km_page_marker == KMEM_META_PRIMARY);
3420 LIST_INSERT_HEAD(&sizeclass->ks_partial_head[front], meta, km_link);
3421 }
3422
3423 if (entry_out) {
3424 *entry_out = entry;
3425 }
3426
3427 return KERN_SUCCESS;
3428 }
3429
3430 /*
3431 * Determine whether the given metadata was allocated from the right
3432 */
3433 static bool
kmem_meta_is_from_right(kmem_range_id_t range_id,struct kmem_page_meta * meta)3434 kmem_meta_is_from_right(
3435 kmem_range_id_t range_id,
3436 struct kmem_page_meta *meta)
3437 {
3438 struct kmem_page_meta *metaf = kmem_meta_hwm[kmem_get_front(range_id, 0)];
3439 __assert_only struct kmem_page_meta *metab = kmem_meta_hwm[kmem_get_front(range_id, 1)];
3440 struct kmem_page_meta *meta_base = kmem_meta_base[range_id];
3441 struct kmem_page_meta *meta_end;
3442
3443 meta_end = (struct kmem_page_meta *)kmem_meta_range[range_id].max_address;
3444
3445 if ((meta >= meta_base) && (meta < metaf)) {
3446 return false;
3447 }
3448
3449 assert(meta >= metab && meta < meta_end);
3450 return true;
3451 }
3452
3453 static void
kmem_free_chunk(kmem_range_id_t range_id,struct kmem_page_meta * meta,bool from_right)3454 kmem_free_chunk(
3455 kmem_range_id_t range_id,
3456 struct kmem_page_meta *meta,
3457 bool from_right)
3458 {
3459 struct kmem_page_meta *meta_coalesce = meta - 1;
3460 struct kmem_page_meta *meta_start = meta;
3461 uint32_t num_chunks = kmem_get_chunk_len(meta);
3462 uint32_t add_chunks;
3463 struct kmem_page_meta *meta_end = meta + num_chunks;
3464 struct kmem_page_meta *meta_hwm_l, *meta_hwm_r;
3465 uint32_t front = kmem_get_front(range_id, from_right);
3466
3467 meta_hwm_l = kmem_meta_hwm[kmem_get_front(range_id, 0)];
3468 meta_hwm_r = kmem_meta_hwm[kmem_get_front(range_id, 1)];
3469
3470 LIST_REMOVE(meta, km_link);
3471 kmem_clear_meta_range(meta, num_chunks);
3472
3473 /*
3474 * Coalesce left
3475 */
3476 if (((from_right && (meta_coalesce >= meta_hwm_r)) || !from_right) &&
3477 (meta_coalesce->km_page_marker == KMEM_META_FREE)) {
3478 meta_start = meta_coalesce - kmem_get_free_chunk_len(meta_coalesce) + 1;
3479 add_chunks = kmem_get_free_chunk_len(meta_start);
3480 num_chunks += add_chunks;
3481 LIST_REMOVE(meta_start, km_link);
3482 kmem_clear_meta_range(meta_start + add_chunks - 1, 1);
3483 }
3484
3485 /*
3486 * Coalesce right
3487 */
3488 if (((!from_right && (meta_end < meta_hwm_l)) || from_right) &&
3489 (meta_end->km_page_marker == KMEM_META_FREE)) {
3490 add_chunks = kmem_get_free_chunk_len(meta_end);
3491 LIST_REMOVE(meta_end, km_link);
3492 kmem_clear_meta_range(meta_end, 1);
3493 meta_end = meta_end + add_chunks;
3494 num_chunks += add_chunks;
3495 }
3496
3497 kmem_init_free_chunk(meta_start, num_chunks, front);
3498 }
3499
3500 static void
kmem_free_slot(kmem_range_id_t range_id,mach_vm_range_t slot)3501 kmem_free_slot(
3502 kmem_range_id_t range_id,
3503 mach_vm_range_t slot)
3504 {
3505 struct kmem_page_meta *meta;
3506 vm_map_offset_t chunk_start;
3507 uint32_t size_idx, chunk_elem, slot_idx, num_elem;
3508 struct kmem_sizeclass *sizeclass;
3509 vm_map_size_t slot_size;
3510
3511 meta = kmem_addr_to_meta_start(slot->min_address, range_id, &chunk_start);
3512 size_idx = meta->km_sizeclass;
3513 slot_size = kmem_get_size_from_idx(size_idx);
3514 slot_idx = (slot->min_address - chunk_start) / slot_size;
3515 assert((meta->km_bitmap & kmem_slot_idx_to_bit(slot_idx, size_idx)) == 0);
3516 meta->km_bitmap |= kmem_slot_idx_to_bit(slot_idx, size_idx);
3517
3518 sizeclass = &kmem_size_array[size_idx];
3519 chunk_elem = sizeclass->ks_num_elem;
3520 num_elem = __builtin_popcount(meta->km_bitmap);
3521
3522 if (num_elem == chunk_elem) {
3523 /*
3524 * If entire chunk empty add to emtpy list
3525 */
3526 bool from_right = kmem_meta_is_from_right(range_id, meta);
3527
3528 kmem_free_chunk(range_id, meta, from_right);
3529 } else if (num_elem == KMEM_NUM_GUARDS + 1) {
3530 /*
3531 * If we freed to full chunk move it to partial
3532 */
3533 uint32_t front = kmem_get_front(range_id,
3534 kmem_meta_is_from_right(range_id, meta));
3535
3536 kmem_requeue_meta(meta, &sizeclass->ks_partial_head[front]);
3537 }
3538 }
3539
3540 void
kmem_free_space(vm_map_offset_t start,vm_map_offset_t end,vm_map_range_id_t range_id,mach_vm_range_t slot)3541 kmem_free_space(
3542 vm_map_offset_t start,
3543 vm_map_offset_t end,
3544 vm_map_range_id_t range_id,
3545 mach_vm_range_t slot)
3546 {
3547 bool entry_present = false;
3548 vm_map_entry_t prev_entry;
3549 vm_map_entry_t next_entry;
3550
3551 if ((slot->min_address == start) && (slot->max_address == end)) {
3552 /*
3553 * Entire slot is being freed at once
3554 */
3555 return kmem_free_slot(range_id, slot);
3556 }
3557
3558 entry_present = vm_map_lookup_entry(kernel_map, start, &prev_entry);
3559 assert(!entry_present);
3560 next_entry = prev_entry->vme_next;
3561
3562 if (((prev_entry == vm_map_to_entry(kernel_map) ||
3563 prev_entry->vme_end <= slot->min_address)) &&
3564 (next_entry == vm_map_to_entry(kernel_map) ||
3565 (next_entry->vme_start >= slot->max_address))) {
3566 /*
3567 * Free entire slot
3568 */
3569 kmem_free_slot(range_id, slot);
3570 }
3571 }
3572
3573 #pragma mark kmem init
3574
3575 /*
3576 * The default percentage of memory that can be mlocked is scaled based on the total
3577 * amount of memory in the system. These percentages are caclulated
3578 * offline and stored in this table. We index this table by
3579 * log2(max_mem) - VM_USER_WIREABLE_MIN_CONFIG. We clamp this index in the range
3580 * [0, sizeof(wire_limit_percents) / sizeof(vm_map_size_t))
3581 *
3582 * Note that these values were picked for mac.
3583 * If we ever have very large memory config arm devices, we may want to revisit
3584 * since the kernel overhead is smaller there due to the larger page size.
3585 */
3586
3587 /* Start scaling iff we're managing > 2^32 = 4GB of RAM. */
3588 #define VM_USER_WIREABLE_MIN_CONFIG 32
3589 #if CONFIG_JETSAM
3590 /* Systems with jetsam can wire a bit more b/c the system can relieve wired
3591 * pressure.
3592 */
3593 static vm_map_size_t wire_limit_percents[] =
3594 { 80, 80, 80, 80, 82, 85, 88, 91, 94, 97};
3595 #else
3596 static vm_map_size_t wire_limit_percents[] =
3597 { 70, 73, 76, 79, 82, 85, 88, 91, 94, 97};
3598 #endif /* CONFIG_JETSAM */
3599
3600 /*
3601 * Sets the default global user wire limit which limits the amount of
3602 * memory that can be locked via mlock() based on the above algorithm..
3603 * This can be overridden via a sysctl.
3604 */
3605 static void
kmem_set_user_wire_limits(void)3606 kmem_set_user_wire_limits(void)
3607 {
3608 uint64_t available_mem_log;
3609 uint64_t max_wire_percent;
3610 size_t wire_limit_percents_length = sizeof(wire_limit_percents) /
3611 sizeof(vm_map_size_t);
3612 vm_map_size_t limit;
3613 uint64_t config_memsize = max_mem;
3614 #if defined(XNU_TARGET_OS_OSX)
3615 config_memsize = max_mem_actual;
3616 #endif /* defined(XNU_TARGET_OS_OSX) */
3617
3618 available_mem_log = bit_floor(config_memsize);
3619
3620 if (available_mem_log < VM_USER_WIREABLE_MIN_CONFIG) {
3621 available_mem_log = 0;
3622 } else {
3623 available_mem_log -= VM_USER_WIREABLE_MIN_CONFIG;
3624 }
3625 if (available_mem_log >= wire_limit_percents_length) {
3626 available_mem_log = wire_limit_percents_length - 1;
3627 }
3628 max_wire_percent = wire_limit_percents[available_mem_log];
3629
3630 limit = config_memsize * max_wire_percent / 100;
3631 /* Cap the number of non lockable bytes at VM_NOT_USER_WIREABLE_MAX */
3632 if (config_memsize - limit > VM_NOT_USER_WIREABLE_MAX) {
3633 limit = config_memsize - VM_NOT_USER_WIREABLE_MAX;
3634 }
3635
3636 vm_global_user_wire_limit = limit;
3637 /* the default per task limit is the same as the global limit */
3638 vm_per_task_user_wire_limit = limit;
3639 vm_add_wire_count_over_global_limit = 0;
3640 vm_add_wire_count_over_user_limit = 0;
3641 }
3642
3643 #define KMEM_MAX_CLAIMS 50
3644 __startup_data
3645 struct kmem_range_startup_spec kmem_claims[KMEM_MAX_CLAIMS] = {};
3646 __startup_data
3647 uint32_t kmem_claim_count = 0;
3648
3649 __startup_func
3650 void
kmem_range_startup_init(struct kmem_range_startup_spec * sp)3651 kmem_range_startup_init(
3652 struct kmem_range_startup_spec *sp)
3653 {
3654 assert(kmem_claim_count < KMEM_MAX_CLAIMS - KMEM_RANGE_COUNT);
3655 if (sp->kc_calculate_sz) {
3656 sp->kc_size = (sp->kc_calculate_sz)();
3657 }
3658 if (sp->kc_size) {
3659 kmem_claims[kmem_claim_count] = *sp;
3660 kmem_claim_count++;
3661 }
3662 }
3663
3664 static vm_offset_t
kmem_fuzz_start(void)3665 kmem_fuzz_start(void)
3666 {
3667 vm_offset_t kmapoff_kaddr = 0;
3668 uint32_t kmapoff_pgcnt = (early_random() & 0x1ff) + 1; /* 9 bits */
3669 vm_map_size_t kmapoff_size = ptoa(kmapoff_pgcnt);
3670
3671 kmem_alloc(kernel_map, &kmapoff_kaddr, kmapoff_size,
3672 KMA_NOFAIL | KMA_KOBJECT | KMA_PERMANENT | KMA_VAONLY,
3673 VM_KERN_MEMORY_OSFMK);
3674 return kmapoff_kaddr + kmapoff_size;
3675 }
3676
3677 /*
3678 * Generate a randomly shuffled array of indices from 0 to count - 1
3679 */
3680 __startup_func
3681 void
kmem_shuffle(uint16_t * shuffle_buf,uint16_t count)3682 kmem_shuffle(
3683 uint16_t *shuffle_buf,
3684 uint16_t count)
3685 {
3686 for (uint16_t i = 0; i < count; i++) {
3687 uint16_t j = kmem_get_random16(i);
3688 if (j != i) {
3689 shuffle_buf[i] = shuffle_buf[j];
3690 }
3691 shuffle_buf[j] = i;
3692 }
3693 }
3694
3695 __startup_func
3696 static void
kmem_shuffle_claims(void)3697 kmem_shuffle_claims(void)
3698 {
3699 uint16_t shuffle_buf[KMEM_MAX_CLAIMS] = {};
3700 uint16_t limit = (uint16_t)kmem_claim_count;
3701
3702 kmem_shuffle(&shuffle_buf[0], limit);
3703 for (uint16_t i = 0; i < limit; i++) {
3704 struct kmem_range_startup_spec tmp = kmem_claims[i];
3705 kmem_claims[i] = kmem_claims[shuffle_buf[i]];
3706 kmem_claims[shuffle_buf[i]] = tmp;
3707 }
3708 }
3709
3710 __startup_func
3711 static void
kmem_readjust_ranges(uint32_t cur_idx)3712 kmem_readjust_ranges(
3713 uint32_t cur_idx)
3714 {
3715 assert(cur_idx != 0);
3716 uint32_t j = cur_idx - 1, random;
3717 struct kmem_range_startup_spec sp = kmem_claims[cur_idx];
3718 struct mach_vm_range *sp_range = sp.kc_range;
3719
3720 /*
3721 * Find max index where restriction is met
3722 */
3723 for (; j > 0; j--) {
3724 struct kmem_range_startup_spec spj = kmem_claims[j];
3725 vm_map_offset_t max_start = spj.kc_range->min_address;
3726 if (spj.kc_flags & KC_NO_MOVE) {
3727 panic("kmem_range_init: Can't scramble with multiple constraints");
3728 }
3729 if (max_start <= sp_range->min_address) {
3730 break;
3731 }
3732 }
3733
3734 /*
3735 * Pick a random index from 0 to max index and shift claims to the right
3736 * to make room for restricted claim
3737 */
3738 random = kmem_get_random16((uint16_t)j);
3739 assert(random <= j);
3740
3741 sp_range->min_address = kmem_claims[random].kc_range->min_address;
3742 sp_range->max_address = sp_range->min_address + sp.kc_size;
3743
3744 for (j = cur_idx - 1; j >= random && j != UINT32_MAX; j--) {
3745 struct kmem_range_startup_spec spj = kmem_claims[j];
3746 struct mach_vm_range *range = spj.kc_range;
3747 range->min_address += sp.kc_size;
3748 range->max_address += sp.kc_size;
3749 kmem_claims[j + 1] = spj;
3750 }
3751
3752 sp.kc_flags = KC_NO_MOVE;
3753 kmem_claims[random] = sp;
3754 }
3755
3756 __startup_func
3757 static vm_map_size_t
kmem_add_ptr_claims(void)3758 kmem_add_ptr_claims(void)
3759 {
3760 uint64_t kmem_meta_num, kmem_ptr_chunks;
3761 vm_map_size_t org_ptr_range_size = ptr_range_size;
3762
3763 ptr_range_size -= PAGE_SIZE;
3764 ptr_range_size *= KMEM_CHUNK_SIZE_MIN;
3765 ptr_range_size /= (KMEM_CHUNK_SIZE_MIN + sizeof(struct kmem_page_meta));
3766
3767 kmem_ptr_chunks = ptr_range_size / KMEM_CHUNK_SIZE_MIN;
3768 ptr_range_size = kmem_ptr_chunks * KMEM_CHUNK_SIZE_MIN;
3769
3770 kmem_meta_num = kmem_ptr_chunks + 2;
3771 kmem_meta_size = round_page(kmem_meta_num * sizeof(struct kmem_page_meta));
3772
3773 assert(kmem_meta_size + ptr_range_size <= org_ptr_range_size);
3774 /*
3775 * Add claims for kmem's ranges
3776 */
3777 for (uint32_t i = 0; i < kmem_ptr_ranges; i++) {
3778 struct kmem_range_startup_spec kmem_spec = {
3779 .kc_name = "kmem_ptr_range",
3780 .kc_range = &kmem_ranges[KMEM_RANGE_ID_PTR_0 + i],
3781 .kc_size = ptr_range_size,
3782 .kc_flags = KC_NO_ENTRY,
3783 };
3784 kmem_claims[kmem_claim_count++] = kmem_spec;
3785
3786 struct kmem_range_startup_spec kmem_meta_spec = {
3787 .kc_name = "kmem_ptr_range_meta",
3788 .kc_range = &kmem_meta_range[KMEM_RANGE_ID_PTR_0 + i],
3789 .kc_size = kmem_meta_size,
3790 .kc_flags = KC_NONE,
3791 };
3792 kmem_claims[kmem_claim_count++] = kmem_meta_spec;
3793 }
3794 return (org_ptr_range_size - ptr_range_size - kmem_meta_size) *
3795 kmem_ptr_ranges;
3796 }
3797
3798 __startup_func
3799 static void
kmem_add_extra_claims(void)3800 kmem_add_extra_claims(void)
3801 {
3802 vm_map_size_t largest_free_size = 0, total_claims = 0;
3803
3804 vm_map_sizes(kernel_map, NULL, NULL, &largest_free_size);
3805 largest_free_size = trunc_page(largest_free_size);
3806
3807 /*
3808 * kasan and configs w/o *TRR need to have just one ptr range due to
3809 * resource constraints.
3810 */
3811 #if !ZSECURITY_CONFIG(KERNEL_PTR_SPLIT)
3812 kmem_ptr_ranges = 1;
3813 #endif
3814 /*
3815 * Determine size of data and pointer kmem_ranges
3816 */
3817 for (uint32_t i = 0; i < kmem_claim_count; i++) {
3818 total_claims += kmem_claims[i].kc_size;
3819 }
3820 assert((total_claims & PAGE_MASK) == 0);
3821 largest_free_size -= total_claims;
3822
3823 /*
3824 * Use half the total available VA for all pointer allocations (this
3825 * includes the kmem_sprayqtn range). Given that we have 4 total
3826 * ranges divide the available VA by 8.
3827 */
3828 ptr_range_size = largest_free_size / ((kmem_ptr_ranges + 1) * 2);
3829 sprayqtn_range_size = ptr_range_size;
3830
3831 if (sprayqtn_range_size > (sane_size / 2)) {
3832 sprayqtn_range_size = sane_size / 2;
3833 }
3834
3835 ptr_range_size = round_page(ptr_range_size);
3836 sprayqtn_range_size = round_page(sprayqtn_range_size);
3837
3838
3839 data_range_size = largest_free_size
3840 - (ptr_range_size * kmem_ptr_ranges)
3841 - sprayqtn_range_size;
3842
3843 /*
3844 * Add claims for kmem's ranges
3845 */
3846 data_range_size += kmem_add_ptr_claims();
3847 assert(data_range_size + sprayqtn_range_size +
3848 ((ptr_range_size + kmem_meta_size) * kmem_ptr_ranges) <=
3849 largest_free_size);
3850
3851 struct kmem_range_startup_spec kmem_spec_sprayqtn = {
3852 .kc_name = "kmem_sprayqtn_range",
3853 .kc_range = &kmem_ranges[KMEM_RANGE_ID_SPRAYQTN],
3854 .kc_size = sprayqtn_range_size,
3855 .kc_flags = KC_NO_ENTRY,
3856 };
3857 kmem_claims[kmem_claim_count++] = kmem_spec_sprayqtn;
3858
3859 struct kmem_range_startup_spec kmem_spec_data = {
3860 .kc_name = "kmem_data_range",
3861 .kc_range = &kmem_ranges[KMEM_RANGE_ID_DATA],
3862 .kc_size = data_range_size,
3863 .kc_flags = KC_NO_ENTRY,
3864 };
3865 kmem_claims[kmem_claim_count++] = kmem_spec_data;
3866 }
3867
3868 __startup_func
3869 static void
kmem_scramble_ranges(void)3870 kmem_scramble_ranges(void)
3871 {
3872 vm_map_offset_t start = 0;
3873
3874 /*
3875 * Initiatize KMEM_RANGE_ID_NONE range to use the entire map so that
3876 * the vm can find the requested ranges.
3877 */
3878 kmem_ranges[KMEM_RANGE_ID_NONE].min_address = MAX(kernel_map->min_offset,
3879 VM_MAP_PAGE_SIZE(kernel_map));
3880 kmem_ranges[KMEM_RANGE_ID_NONE].max_address = kernel_map->max_offset;
3881
3882 /*
3883 * Allocating the g_kext_map prior to randomizing the remaining submaps as
3884 * this map is 2G in size and starts at the end of kernel_text on x86. It
3885 * could overflow into the heap.
3886 */
3887 kext_alloc_init();
3888
3889 /*
3890 * Eat a random amount of kernel_map to fuzz subsequent heap, zone and
3891 * stack addresses. (With a 4K page and 9 bits of randomness, this
3892 * eats about 2M of VA from the map)
3893 *
3894 * Note that we always need to slide by at least one page because the VM
3895 * pointer packing schemes using KERNEL_PMAP_HEAP_RANGE_START as a base
3896 * do not admit this address to be part of any zone submap.
3897 */
3898 start = kmem_fuzz_start();
3899
3900 /*
3901 * Add claims for ptr and data kmem_ranges
3902 */
3903 kmem_add_extra_claims();
3904
3905 /*
3906 * Shuffle registered claims
3907 */
3908 assert(kmem_claim_count < UINT16_MAX);
3909 kmem_shuffle_claims();
3910
3911 /*
3912 * Apply restrictions and determine range for each claim
3913 */
3914 for (uint32_t i = 0; i < kmem_claim_count; i++) {
3915 vm_map_offset_t end = 0;
3916 struct kmem_range_startup_spec sp = kmem_claims[i];
3917 struct mach_vm_range *sp_range = sp.kc_range;
3918
3919 if (vm_map_locate_space_anywhere(kernel_map, sp.kc_size, 0,
3920 VM_MAP_KERNEL_FLAGS_ANYWHERE(), &start, NULL) != KERN_SUCCESS) {
3921 panic("kmem_range_init: vm_map_locate_space failing for claim %s",
3922 sp.kc_name);
3923 }
3924
3925 end = start + sp.kc_size;
3926 /*
3927 * Re-adjust ranges if restriction not met
3928 */
3929 if (sp_range->min_address && start > sp_range->min_address) {
3930 kmem_readjust_ranges(i);
3931 } else {
3932 sp_range->min_address = start;
3933 sp_range->max_address = end;
3934 }
3935 start = end;
3936 }
3937
3938 /*
3939 * We have settled on the ranges, now create temporary entries for the
3940 * claims
3941 */
3942 for (uint32_t i = 0; i < kmem_claim_count; i++) {
3943 struct kmem_range_startup_spec sp = kmem_claims[i];
3944 vm_map_entry_t entry = NULL;
3945 if (sp.kc_flags & KC_NO_ENTRY) {
3946 continue;
3947 }
3948 if (vm_map_find_space(kernel_map, sp.kc_range->min_address, sp.kc_size, 0,
3949 VM_MAP_KERNEL_FLAGS_ANYWHERE(), &entry) != KERN_SUCCESS) {
3950 panic("kmem_range_init: vm_map_find_space failing for claim %s",
3951 sp.kc_name);
3952 }
3953 vm_object_reference(kernel_object_default);
3954 VME_OBJECT_SET(entry, kernel_object_default, false, 0);
3955 VME_OFFSET_SET(entry, entry->vme_start);
3956 vm_map_unlock(kernel_map);
3957 }
3958 /*
3959 * Now that we are done assigning all the ranges, reset
3960 * kmem_ranges[KMEM_RANGE_ID_NONE]
3961 */
3962 kmem_ranges[KMEM_RANGE_ID_NONE] = (struct mach_vm_range) {};
3963
3964 #if DEBUG || DEVELOPMENT
3965 for (uint32_t i = 0; i < kmem_claim_count; i++) {
3966 struct kmem_range_startup_spec sp = kmem_claims[i];
3967
3968 printf("%-24s: %p - %p (%u%c)\n", sp.kc_name,
3969 (void *)sp.kc_range->min_address,
3970 (void *)sp.kc_range->max_address,
3971 mach_vm_size_pretty(sp.kc_size),
3972 mach_vm_size_unit(sp.kc_size));
3973 }
3974 #endif /* DEBUG || DEVELOPMENT */
3975 }
3976
3977 __startup_func
3978 static void
kmem_range_init(void)3979 kmem_range_init(void)
3980 {
3981 vm_size_t range_adjustment;
3982
3983 kmem_scramble_ranges();
3984
3985 range_adjustment = sprayqtn_range_size >> 3;
3986 kmem_large_ranges[KMEM_RANGE_ID_SPRAYQTN].min_address =
3987 kmem_ranges[KMEM_RANGE_ID_SPRAYQTN].min_address + range_adjustment;
3988 kmem_large_ranges[KMEM_RANGE_ID_SPRAYQTN].max_address =
3989 kmem_ranges[KMEM_RANGE_ID_SPRAYQTN].max_address;
3990
3991 range_adjustment = data_range_size >> 3;
3992 kmem_large_ranges[KMEM_RANGE_ID_DATA].min_address =
3993 kmem_ranges[KMEM_RANGE_ID_DATA].min_address + range_adjustment;
3994 kmem_large_ranges[KMEM_RANGE_ID_DATA].max_address =
3995 kmem_ranges[KMEM_RANGE_ID_DATA].max_address;
3996
3997 pmap_init();
3998 kmem_metadata_init();
3999 kmem_sizeclass_init();
4000
4001 #if DEBUG || DEVELOPMENT
4002 for (kmem_range_id_t i = 1; i < KMEM_RANGE_COUNT; i++) {
4003 vm_size_t range_size = mach_vm_range_size(&kmem_large_ranges[i]);
4004 printf("kmem_large_ranges[%d] : %p - %p (%u%c)\n", i,
4005 (void *)kmem_large_ranges[i].min_address,
4006 (void *)kmem_large_ranges[i].max_address,
4007 mach_vm_size_pretty(range_size),
4008 mach_vm_size_unit(range_size));
4009 }
4010 #endif
4011 }
4012 STARTUP(KMEM, STARTUP_RANK_THIRD, kmem_range_init);
4013
4014 #if DEBUG || DEVELOPMENT
4015 __startup_func
4016 static void
kmem_log_init(void)4017 kmem_log_init(void)
4018 {
4019 /*
4020 * Log can only be created after the the kmem subsystem is initialized as
4021 * btlog creation uses kmem
4022 */
4023 kmem_outlier_log = btlog_create(BTLOG_LOG, KMEM_OUTLIER_LOG_SIZE, 0);
4024 }
4025 STARTUP(ZALLOC, STARTUP_RANK_FIRST, kmem_log_init);
4026
4027 kmem_gobj_stats
kmem_get_gobj_stats(void)4028 kmem_get_gobj_stats(void)
4029 {
4030 kmem_gobj_stats stats = {};
4031
4032 vm_map_lock(kernel_map);
4033 for (uint8_t i = 0; i < kmem_ptr_ranges; i++) {
4034 kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST + i;
4035 struct mach_vm_range range = kmem_ranges[range_id];
4036 struct kmem_page_meta *meta = kmem_meta_hwm[kmem_get_front(range_id, 0)];
4037 struct kmem_page_meta *meta_end;
4038 uint64_t meta_idx = meta - kmem_meta_base[range_id];
4039 vm_map_size_t used = 0, va = 0, meta_sz = 0, pte_sz = 0;
4040 vm_map_offset_t addr;
4041 vm_map_entry_t entry;
4042
4043 /*
4044 * Left front
4045 */
4046 va = (meta_idx * KMEM_CHUNK_SIZE_MIN);
4047 meta_sz = round_page(meta_idx * sizeof(struct kmem_page_meta));
4048
4049 /*
4050 * Right front
4051 */
4052 meta = kmem_meta_hwm[kmem_get_front(range_id, 1)];
4053 meta_end = kmem_addr_to_meta(range.max_address, range_id, &addr,
4054 &meta_idx);
4055 meta_idx = meta_end - meta;
4056 meta_sz += round_page(meta_idx * sizeof(struct kmem_page_meta));
4057 va += (meta_idx * KMEM_CHUNK_SIZE_MIN);
4058
4059 /*
4060 * Compute VA allocated in entire range
4061 */
4062 if (vm_map_lookup_entry(kernel_map, range.min_address, &entry) == false) {
4063 entry = entry->vme_next;
4064 }
4065 while (entry != vm_map_to_entry(kernel_map) &&
4066 entry->vme_start < range.max_address) {
4067 used += (entry->vme_end - entry->vme_start);
4068 entry = entry->vme_next;
4069 }
4070
4071 pte_sz = round_page(atop(va - used) * 8);
4072
4073 stats.total_used += used;
4074 stats.total_va += va;
4075 stats.pte_sz += pte_sz;
4076 stats.meta_sz += meta_sz;
4077 }
4078 vm_map_unlock(kernel_map);
4079
4080 return stats;
4081 }
4082
4083 #endif /* DEBUG || DEVELOPMENT */
4084
4085 /*
4086 * kmem_init:
4087 *
4088 * Initialize the kernel's virtual memory map, taking
4089 * into account all memory allocated up to this time.
4090 */
4091 __startup_func
4092 void
kmem_init(vm_offset_t start,vm_offset_t end)4093 kmem_init(
4094 vm_offset_t start,
4095 vm_offset_t end)
4096 {
4097 vm_map_offset_t map_start;
4098 vm_map_offset_t map_end;
4099
4100 map_start = vm_map_trunc_page(start,
4101 VM_MAP_PAGE_MASK(kernel_map));
4102 map_end = vm_map_round_page(end,
4103 VM_MAP_PAGE_MASK(kernel_map));
4104
4105 vm_map_will_allocate_early_map(&kernel_map);
4106 #if defined(__arm64__)
4107 kernel_map = vm_map_create_options(pmap_kernel(),
4108 VM_MIN_KERNEL_AND_KEXT_ADDRESS,
4109 VM_MAX_KERNEL_ADDRESS,
4110 VM_MAP_CREATE_DEFAULT);
4111 /*
4112 * Reserve virtual memory allocated up to this time.
4113 */
4114 {
4115 unsigned int region_select = 0;
4116 vm_map_offset_t region_start;
4117 vm_map_size_t region_size;
4118 vm_map_offset_t map_addr;
4119 kern_return_t kr;
4120
4121 while (pmap_virtual_region(region_select, ®ion_start, ®ion_size)) {
4122 map_addr = region_start;
4123 kr = vm_map_enter(kernel_map, &map_addr,
4124 vm_map_round_page(region_size,
4125 VM_MAP_PAGE_MASK(kernel_map)),
4126 (vm_map_offset_t) 0,
4127 VM_MAP_KERNEL_FLAGS_FIXED_PERMANENT(.vmkf_no_pmap_check = true),
4128 VM_OBJECT_NULL,
4129 (vm_object_offset_t) 0, FALSE, VM_PROT_NONE, VM_PROT_NONE,
4130 VM_INHERIT_DEFAULT);
4131
4132 if (kr != KERN_SUCCESS) {
4133 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
4134 (uint64_t) start, (uint64_t) end, (uint64_t) region_start,
4135 (uint64_t) region_size, kr);
4136 }
4137
4138 region_select++;
4139 }
4140 }
4141 #else
4142 kernel_map = vm_map_create_options(pmap_kernel(),
4143 VM_MIN_KERNEL_AND_KEXT_ADDRESS, map_end,
4144 VM_MAP_CREATE_DEFAULT);
4145 /*
4146 * Reserve virtual memory allocated up to this time.
4147 */
4148 if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
4149 vm_map_offset_t map_addr;
4150 kern_return_t kr;
4151
4152 map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
4153 kr = vm_map_enter(kernel_map,
4154 &map_addr,
4155 (vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
4156 (vm_map_offset_t) 0,
4157 VM_MAP_KERNEL_FLAGS_FIXED(.vmkf_no_pmap_check = true),
4158 VM_OBJECT_NULL,
4159 (vm_object_offset_t) 0, FALSE,
4160 VM_PROT_NONE, VM_PROT_NONE,
4161 VM_INHERIT_DEFAULT);
4162
4163 if (kr != KERN_SUCCESS) {
4164 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
4165 (uint64_t) start, (uint64_t) end,
4166 (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS,
4167 (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
4168 kr);
4169 }
4170 }
4171 #endif
4172
4173 kmem_set_user_wire_limits();
4174 }
4175
4176
4177 #pragma mark map copyio
4178 /*
4179 * Note: semantic types aren't used as `copyio` already validates.
4180 */
4181
4182 kern_return_t
copyinmap(vm_map_t map,vm_map_offset_t fromaddr,void * todata,vm_size_t length)4183 copyinmap(
4184 vm_map_t map,
4185 vm_map_offset_t fromaddr,
4186 void *todata,
4187 vm_size_t length)
4188 {
4189 kern_return_t kr = KERN_SUCCESS;
4190 vm_map_t oldmap;
4191
4192 if (vm_map_pmap(map) == pmap_kernel()) {
4193 /* assume a correct copy */
4194 memcpy(todata, CAST_DOWN(void *, fromaddr), length);
4195 } else if (current_map() == map) {
4196 if (copyin(fromaddr, todata, length) != 0) {
4197 kr = KERN_INVALID_ADDRESS;
4198 }
4199 } else {
4200 vm_map_reference(map);
4201 oldmap = vm_map_switch(map);
4202 if (copyin(fromaddr, todata, length) != 0) {
4203 kr = KERN_INVALID_ADDRESS;
4204 }
4205 vm_map_switch(oldmap);
4206 vm_map_deallocate(map);
4207 }
4208 return kr;
4209 }
4210
4211 kern_return_t
copyoutmap(vm_map_t map,void * fromdata,vm_map_address_t toaddr,vm_size_t length)4212 copyoutmap(
4213 vm_map_t map,
4214 void *fromdata,
4215 vm_map_address_t toaddr,
4216 vm_size_t length)
4217 {
4218 kern_return_t kr = KERN_SUCCESS;
4219 vm_map_t oldmap;
4220
4221 if (vm_map_pmap(map) == pmap_kernel()) {
4222 /* assume a correct copy */
4223 memcpy(CAST_DOWN(void *, toaddr), fromdata, length);
4224 } else if (current_map() == map) {
4225 if (copyout(fromdata, toaddr, length) != 0) {
4226 ktriage_record(thread_tid(current_thread()),
4227 KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
4228 KDBG_TRIAGE_RESERVED,
4229 KDBG_TRIAGE_VM_COPYOUTMAP_SAMEMAP_ERROR),
4230 KERN_INVALID_ADDRESS /* arg */);
4231 kr = KERN_INVALID_ADDRESS;
4232 }
4233 } else {
4234 vm_map_reference(map);
4235 oldmap = vm_map_switch(map);
4236 if (copyout(fromdata, toaddr, length) != 0) {
4237 ktriage_record(thread_tid(current_thread()),
4238 KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
4239 KDBG_TRIAGE_RESERVED,
4240 KDBG_TRIAGE_VM_COPYOUTMAP_DIFFERENTMAP_ERROR),
4241 KERN_INVALID_ADDRESS /* arg */);
4242 kr = KERN_INVALID_ADDRESS;
4243 }
4244 vm_map_switch(oldmap);
4245 vm_map_deallocate(map);
4246 }
4247 return kr;
4248 }
4249
4250 kern_return_t
copyoutmap_atomic32(vm_map_t map,uint32_t value,vm_map_address_t toaddr)4251 copyoutmap_atomic32(
4252 vm_map_t map,
4253 uint32_t value,
4254 vm_map_address_t toaddr)
4255 {
4256 kern_return_t kr = KERN_SUCCESS;
4257 vm_map_t oldmap;
4258
4259 if (vm_map_pmap(map) == pmap_kernel()) {
4260 /* assume a correct toaddr */
4261 *(uint32_t *)toaddr = value;
4262 } else if (current_map() == map) {
4263 if (copyout_atomic32(value, toaddr) != 0) {
4264 kr = KERN_INVALID_ADDRESS;
4265 }
4266 } else {
4267 vm_map_reference(map);
4268 oldmap = vm_map_switch(map);
4269 if (copyout_atomic32(value, toaddr) != 0) {
4270 kr = KERN_INVALID_ADDRESS;
4271 }
4272 vm_map_switch(oldmap);
4273 vm_map_deallocate(map);
4274 }
4275 return kr;
4276 }
4277
4278 kern_return_t
copyoutmap_atomic64(vm_map_t map,uint64_t value,vm_map_address_t toaddr)4279 copyoutmap_atomic64(
4280 vm_map_t map,
4281 uint64_t value,
4282 vm_map_address_t toaddr)
4283 {
4284 kern_return_t kr = KERN_SUCCESS;
4285 vm_map_t oldmap;
4286
4287 if (vm_map_pmap(map) == pmap_kernel()) {
4288 /* assume a correct toaddr */
4289 *(uint64_t *)toaddr = value;
4290 } else if (current_map() == map) {
4291 if (copyout_atomic64(value, toaddr) != 0) {
4292 kr = KERN_INVALID_ADDRESS;
4293 }
4294 } else {
4295 vm_map_reference(map);
4296 oldmap = vm_map_switch(map);
4297 if (copyout_atomic64(value, toaddr) != 0) {
4298 kr = KERN_INVALID_ADDRESS;
4299 }
4300 vm_map_switch(oldmap);
4301 vm_map_deallocate(map);
4302 }
4303 return kr;
4304 }
4305
4306
4307 #pragma mark pointer obfuscation / packing
4308
4309 /*
4310 *
4311 * The following two functions are to be used when exposing kernel
4312 * addresses to userspace via any of the various debug or info
4313 * facilities that exist. These are basically the same as VM_KERNEL_ADDRPERM()
4314 * and VM_KERNEL_UNSLIDE_OR_PERM() except they use a different random seed and
4315 * are exported to KEXTs.
4316 *
4317 * NOTE: USE THE MACRO VERSIONS OF THESE FUNCTIONS (in vm_param.h) FROM WITHIN THE KERNEL
4318 */
4319
4320 vm_offset_t
vm_kernel_addrhash_internal(vm_offset_t addr,uint64_t salt)4321 vm_kernel_addrhash_internal(vm_offset_t addr, uint64_t salt)
4322 {
4323 assert(salt != 0);
4324
4325 if (addr == 0) {
4326 return 0ul;
4327 }
4328
4329 if (VM_KERNEL_IS_SLID(addr)) {
4330 return VM_KERNEL_UNSLIDE(addr);
4331 }
4332
4333 addr = VM_KERNEL_STRIP_UPTR(addr);
4334
4335 vm_offset_t sha_digest[SHA256_DIGEST_LENGTH / sizeof(vm_offset_t)];
4336 SHA256_CTX sha_ctx;
4337
4338 SHA256_Init(&sha_ctx);
4339 SHA256_Update(&sha_ctx, &salt, sizeof(salt));
4340 SHA256_Update(&sha_ctx, &addr, sizeof(addr));
4341 SHA256_Final(sha_digest, &sha_ctx);
4342
4343 return sha_digest[0];
4344 }
4345
4346 __exported vm_offset_t
4347 vm_kernel_addrhash_external(vm_offset_t addr);
4348 vm_offset_t
vm_kernel_addrhash_external(vm_offset_t addr)4349 vm_kernel_addrhash_external(vm_offset_t addr)
4350 {
4351 return vm_kernel_addrhash_internal(addr, vm_kernel_addrhash_salt_ext);
4352 }
4353
4354 void
vm_kernel_addrhide(vm_offset_t addr,vm_offset_t * hide_addr)4355 vm_kernel_addrhide(
4356 vm_offset_t addr,
4357 vm_offset_t *hide_addr)
4358 {
4359 *hide_addr = VM_KERNEL_ADDRHIDE(addr);
4360 }
4361
4362 void
vm_kernel_addrperm_external(vm_offset_t addr,vm_offset_t * perm_addr)4363 vm_kernel_addrperm_external(
4364 vm_offset_t addr,
4365 vm_offset_t *perm_addr)
4366 {
4367 if (VM_KERNEL_IS_SLID(addr)) {
4368 *perm_addr = VM_KERNEL_UNSLIDE(addr);
4369 } else if (VM_KERNEL_ADDRESS(addr)) {
4370 *perm_addr = addr + vm_kernel_addrperm_ext;
4371 } else {
4372 *perm_addr = addr;
4373 }
4374 }
4375
4376 void
vm_kernel_unslide_or_perm_external(vm_offset_t addr,vm_offset_t * up_addr)4377 vm_kernel_unslide_or_perm_external(
4378 vm_offset_t addr,
4379 vm_offset_t *up_addr)
4380 {
4381 vm_kernel_addrperm_external(addr, up_addr);
4382 }
4383
4384 void
vm_packing_pointer_invalid(vm_offset_t ptr,vm_packing_params_t params)4385 vm_packing_pointer_invalid(vm_offset_t ptr, vm_packing_params_t params)
4386 {
4387 if (ptr & ((1ul << params.vmpp_shift) - 1)) {
4388 panic("pointer %p can't be packed: low %d bits aren't 0",
4389 (void *)ptr, params.vmpp_shift);
4390 } else if (ptr <= params.vmpp_base) {
4391 panic("pointer %p can't be packed: below base %p",
4392 (void *)ptr, (void *)params.vmpp_base);
4393 } else {
4394 panic("pointer %p can't be packed: maximum encodable pointer is %p",
4395 (void *)ptr, (void *)vm_packing_max_packable(params));
4396 }
4397 }
4398
4399 void
vm_packing_verify_range(const char * subsystem,vm_offset_t min_address,vm_offset_t max_address,vm_packing_params_t params)4400 vm_packing_verify_range(
4401 const char *subsystem,
4402 vm_offset_t min_address,
4403 vm_offset_t max_address,
4404 vm_packing_params_t params)
4405 {
4406 if (min_address > max_address) {
4407 panic("%s: %s range invalid min:%p > max:%p",
4408 __func__, subsystem, (void *)min_address, (void *)max_address);
4409 }
4410
4411 if (!params.vmpp_base_relative) {
4412 return;
4413 }
4414
4415 if (min_address <= params.vmpp_base) {
4416 panic("%s: %s range invalid min:%p <= base:%p",
4417 __func__, subsystem, (void *)min_address, (void *)params.vmpp_base);
4418 }
4419
4420 if (max_address > vm_packing_max_packable(params)) {
4421 panic("%s: %s range invalid max:%p >= max packable:%p",
4422 __func__, subsystem, (void *)max_address,
4423 (void *)vm_packing_max_packable(params));
4424 }
4425 }
4426
4427 #pragma mark tests
4428 #if MACH_ASSERT
4429 #include <sys/errno.h>
4430
4431 static void
4432 kmem_test_for_entry(
4433 vm_map_t map,
4434 vm_offset_t addr,
4435 void (^block)(vm_map_entry_t))
4436 {
4437 vm_map_entry_t entry;
4438
4439 vm_map_lock(map);
4440 block(vm_map_lookup_entry(map, addr, &entry) ? entry : NULL);
4441 vm_map_unlock(map);
4442 }
4443
4444 #define kmem_test_assert_map(map, pg, entries) ({ \
4445 assert3u((map)->size, ==, ptoa(pg)); \
4446 assert3u((map)->hdr.nentries, ==, entries); \
4447 })
4448
4449 static bool
can_write_at(vm_offset_t offs,uint32_t page)4450 can_write_at(vm_offset_t offs, uint32_t page)
4451 {
4452 static const int zero;
4453
4454 return verify_write(&zero, (void *)(offs + ptoa(page) + 128), 1) == 0;
4455 }
4456 #define assert_writeable(offs, page) \
4457 assertf(can_write_at(offs, page), \
4458 "can write at %p + ptoa(%d)", (void *)offs, page)
4459
4460 #define assert_faults(offs, page) \
4461 assertf(!can_write_at(offs, page), \
4462 "can write at %p + ptoa(%d)", (void *)offs, page)
4463
4464 #define peek(offs, page) \
4465 (*(uint32_t *)((offs) + ptoa(page)))
4466
4467 #define poke(offs, page, v) \
4468 (*(uint32_t *)((offs) + ptoa(page)) = (v))
4469
4470 __attribute__((noinline))
4471 static void
kmem_alloc_basic_test(vm_map_t map)4472 kmem_alloc_basic_test(vm_map_t map)
4473 {
4474 kmem_guard_t guard = {
4475 .kmg_tag = VM_KERN_MEMORY_DIAG,
4476 };
4477 vm_offset_t addr;
4478
4479 /*
4480 * Test wired basics:
4481 * - KMA_KOBJECT
4482 * - KMA_GUARD_FIRST, KMA_GUARD_LAST
4483 * - allocation alignment
4484 */
4485 addr = kmem_alloc_guard(map, ptoa(10), ptoa(2) - 1,
4486 KMA_KOBJECT | KMA_GUARD_FIRST | KMA_GUARD_LAST, guard).kmr_address;
4487 assertf(addr != 0ull, "kma(%p, 10p, 0, KO | GF | GL)", map);
4488 assert3u((addr + PAGE_SIZE) % ptoa(2), ==, 0);
4489 kmem_test_assert_map(map, 10, 1);
4490
4491 kmem_test_for_entry(map, addr, ^(__assert_only vm_map_entry_t e){
4492 assertf(e, "unable to find address %p in map %p", (void *)addr, map);
4493 assert(e->vme_kernel_object);
4494 assert(!e->vme_atomic);
4495 assert3u(e->vme_start, <=, addr);
4496 assert3u(addr + ptoa(10), <=, e->vme_end);
4497 });
4498
4499 assert_faults(addr, 0);
4500 for (int i = 1; i < 9; i++) {
4501 assert_writeable(addr, i);
4502 }
4503 assert_faults(addr, 9);
4504
4505 kmem_free(map, addr, ptoa(10));
4506 kmem_test_assert_map(map, 0, 0);
4507
4508 /*
4509 * Test pageable basics.
4510 */
4511 addr = kmem_alloc_guard(map, ptoa(10), 0,
4512 KMA_PAGEABLE, guard).kmr_address;
4513 assertf(addr != 0ull, "kma(%p, 10p, 0, KO | PG)", map);
4514 kmem_test_assert_map(map, 10, 1);
4515
4516 for (int i = 0; i < 9; i++) {
4517 assert_faults(addr, i);
4518 poke(addr, i, 42);
4519 assert_writeable(addr, i);
4520 }
4521
4522 kmem_free(map, addr, ptoa(10));
4523 kmem_test_assert_map(map, 0, 0);
4524 }
4525
4526 __attribute__((noinline))
4527 static void
kmem_realloc_basic_test(vm_map_t map,kmr_flags_t kind)4528 kmem_realloc_basic_test(vm_map_t map, kmr_flags_t kind)
4529 {
4530 kmem_guard_t guard = {
4531 .kmg_atomic = !(kind & KMR_DATA),
4532 .kmg_tag = VM_KERN_MEMORY_DIAG,
4533 .kmg_context = 0xefface,
4534 };
4535 vm_offset_t addr, newaddr;
4536 const int N = 10;
4537
4538 /*
4539 * This isn't something kmem_realloc_guard() _needs_ to do,
4540 * we could conceive an implementation where it grows in place
4541 * if there's space after it.
4542 *
4543 * However, this is what the implementation does today.
4544 */
4545 bool realloc_growth_changes_address = true;
4546 bool GL = (kind & KMR_GUARD_LAST);
4547
4548 /*
4549 * Initial N page allocation
4550 */
4551 addr = kmem_alloc_guard(map, ptoa(N), 0,
4552 (kind & (KMA_KOBJECT | KMA_GUARD_LAST | KMA_DATA)) | KMA_ZERO,
4553 guard).kmr_address;
4554 assert3u(addr, !=, 0);
4555 kmem_test_assert_map(map, N, 1);
4556 for (int pg = 0; pg < N - GL; pg++) {
4557 poke(addr, pg, 42 + pg);
4558 }
4559 for (int pg = N - GL; pg < N; pg++) {
4560 assert_faults(addr, pg);
4561 }
4562
4563
4564 /*
4565 * Grow to N + 3 pages
4566 */
4567 newaddr = kmem_realloc_guard(map, addr, ptoa(N), ptoa(N + 3),
4568 kind | KMR_ZERO, guard).kmr_address;
4569 assert3u(newaddr, !=, 0);
4570 if (realloc_growth_changes_address) {
4571 assert3u(addr, !=, newaddr);
4572 }
4573 if ((kind & KMR_FREEOLD) || (addr == newaddr)) {
4574 kmem_test_assert_map(map, N + 3, 1);
4575 } else {
4576 kmem_test_assert_map(map, 2 * N + 3, 2);
4577 }
4578 for (int pg = 0; pg < N - GL; pg++) {
4579 assert3u(peek(newaddr, pg), ==, 42 + pg);
4580 }
4581 if ((kind & KMR_FREEOLD) == 0) {
4582 for (int pg = 0; pg < N - GL; pg++) {
4583 assert3u(peek(addr, pg), ==, 42 + pg);
4584 }
4585 /* check for tru-share */
4586 poke(addr + 16, 0, 1234);
4587 assert3u(peek(newaddr + 16, 0), ==, 1234);
4588 kmem_free_guard(map, addr, ptoa(N), KMF_NONE, guard);
4589 kmem_test_assert_map(map, N + 3, 1);
4590 }
4591 if (addr != newaddr) {
4592 for (int pg = 0; pg < N - GL; pg++) {
4593 assert_faults(addr, pg);
4594 }
4595 }
4596 for (int pg = N - GL; pg < N + 3 - GL; pg++) {
4597 assert3u(peek(newaddr, pg), ==, 0);
4598 }
4599 for (int pg = N + 3 - GL; pg < N + 3; pg++) {
4600 assert_faults(newaddr, pg);
4601 }
4602 addr = newaddr;
4603
4604
4605 /*
4606 * Shrink to N - 2 pages
4607 */
4608 newaddr = kmem_realloc_guard(map, addr, ptoa(N + 3), ptoa(N - 2),
4609 kind | KMR_ZERO, guard).kmr_address;
4610 assert3u(map->size, ==, ptoa(N - 2));
4611 assert3u(newaddr, ==, addr);
4612 kmem_test_assert_map(map, N - 2, 1);
4613
4614 for (int pg = 0; pg < N - 2 - GL; pg++) {
4615 assert3u(peek(addr, pg), ==, 42 + pg);
4616 }
4617 for (int pg = N - 2 - GL; pg < N + 3; pg++) {
4618 assert_faults(addr, pg);
4619 }
4620
4621 kmem_free_guard(map, addr, ptoa(N - 2), KMF_NONE, guard);
4622 kmem_test_assert_map(map, 0, 0);
4623 }
4624
4625 static int
kmem_basic_test(__unused int64_t in,int64_t * out)4626 kmem_basic_test(__unused int64_t in, int64_t *out)
4627 {
4628 mach_vm_offset_t addr;
4629 vm_map_t map;
4630
4631 printf("%s: test running\n", __func__);
4632
4633 map = kmem_suballoc(kernel_map, &addr, 64U << 20,
4634 VM_MAP_CREATE_DEFAULT, VM_FLAGS_ANYWHERE,
4635 KMS_NOFAIL | KMS_DATA, VM_KERN_MEMORY_DIAG).kmr_submap;
4636
4637 printf("%s: kmem_alloc ...\n", __func__);
4638 kmem_alloc_basic_test(map);
4639 printf("%s: PASS\n", __func__);
4640
4641 printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD) ...\n", __func__);
4642 kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD);
4643 printf("%s: PASS\n", __func__);
4644
4645 printf("%s: kmem_realloc (KMR_FREEOLD) ...\n", __func__);
4646 kmem_realloc_basic_test(map, KMR_FREEOLD);
4647 printf("%s: PASS\n", __func__);
4648
4649 printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST) ...\n", __func__);
4650 kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST);
4651 printf("%s: PASS\n", __func__);
4652
4653 printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_LAST) ...\n", __func__);
4654 kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_LAST);
4655 printf("%s: PASS\n", __func__);
4656
4657 printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST) ...\n", __func__);
4658 kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST);
4659 printf("%s: PASS\n", __func__);
4660
4661 printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_FIRST) ...\n", __func__);
4662 kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_FIRST);
4663 printf("%s: PASS\n", __func__);
4664
4665 printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_LAST) ...\n", __func__);
4666 kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_LAST);
4667 printf("%s: PASS\n", __func__);
4668
4669 printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST) ...\n", __func__);
4670 kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST);
4671 printf("%s: PASS\n", __func__);
4672
4673 /* using KMR_DATA signals to test the non atomic realloc path */
4674 printf("%s: kmem_realloc (KMR_DATA | KMR_FREEOLD) ...\n", __func__);
4675 kmem_realloc_basic_test(map, KMR_DATA | KMR_FREEOLD);
4676 printf("%s: PASS\n", __func__);
4677
4678 printf("%s: kmem_realloc (KMR_DATA) ...\n", __func__);
4679 kmem_realloc_basic_test(map, KMR_DATA);
4680 printf("%s: PASS\n", __func__);
4681
4682 kmem_free_guard(kernel_map, addr, 64U << 20, KMF_NONE, KMEM_GUARD_SUBMAP);
4683 vm_map_deallocate(map);
4684
4685 printf("%s: test passed\n", __func__);
4686 *out = 1;
4687 return 0;
4688 }
4689 SYSCTL_TEST_REGISTER(kmem_basic, kmem_basic_test);
4690
4691 static void
kmem_test_get_size_idx_for_chunks(uint32_t chunks)4692 kmem_test_get_size_idx_for_chunks(uint32_t chunks)
4693 {
4694 __assert_only uint32_t idx = kmem_get_size_idx_for_chunks(chunks);
4695
4696 assert(chunks >= kmem_size_array[idx].ks_num_chunk);
4697 }
4698
4699 __attribute__((noinline))
4700 static void
kmem_test_get_size_idx_for_all_chunks()4701 kmem_test_get_size_idx_for_all_chunks()
4702 {
4703 for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) {
4704 uint32_t chunks = kmem_size_array[i].ks_num_chunk;
4705
4706 if (chunks != 1) {
4707 kmem_test_get_size_idx_for_chunks(chunks - 1);
4708 }
4709 kmem_test_get_size_idx_for_chunks(chunks);
4710 kmem_test_get_size_idx_for_chunks(chunks + 1);
4711 }
4712 }
4713
4714 static int
kmem_guard_obj_test(__unused int64_t in,int64_t * out)4715 kmem_guard_obj_test(__unused int64_t in, int64_t *out)
4716 {
4717 printf("%s: test running\n", __func__);
4718
4719 printf("%s: kmem_get_size_idx_for_chunks\n", __func__);
4720 kmem_test_get_size_idx_for_all_chunks();
4721 printf("%s: PASS\n", __func__);
4722
4723 printf("%s: test passed\n", __func__);
4724 *out = 1;
4725 return 0;
4726 }
4727 SYSCTL_TEST_REGISTER(kmem_guard_obj, kmem_guard_obj_test);
4728 #endif /* MACH_ASSERT */
4729